Last Updated: March 08, 2017
·
21.3K
· vs4vijay

Extract Data from XPath via Google Apps Script

You can get/extract data from a URL via Google Apps Script, Just pass the XPath and URL to monitor.

I have setup a Script that continuously monitors YCombinator's Top post, When it changes I get a Mail with the link.

function getDataFromXpath(path, url) {
  var data = UrlFetchApp.fetch(url);
  var text = data.getContentText();
  var xmlDoc = Xml.parse(text, true);

  // Replacing tbody tag because app script doesnt understand.
  path = path.replace("/html/","").replace("/tbody","","g");
  var tags = path.split("/");
  Logger.log("tags : " + tags);
  // getting the DOM of HTML
  var element = xmlDoc.getElement();

  for(var i in tags) {
    var tag = tags[i];
    Logger.log("Tag : " + tag);
     var index = tag.indexOf("[");
     if(index != -1) {
       var val = parseInt(tag[index+1]);
          tag = tag.substring(0,index);
          element = element.getElements(tag)[val-1];
        } else {
          element = element.getElement(tag);
    }
    //Logger.log(element.toXmlString());
  }
  return element.getText() + ' [ ' + element.getAttribute("href").getValue() + ' ] ';
}


function checkUpdateAndSendEmail() {
  var cache = CacheService.getPrivateCache();

  var url = "https://news.ycombinator.com/news";
  var path = "/html/body/center/table/tbody/tr[3]/td/table/tbody/tr/td[3]/a";
  var cached = cache.get(url);
  var text = getDataFromXpath(path, url);
  if(cached == null || cached != text) {
      cache.put(url, cached, 3666);
      MailApp.sendEmail("<email>", "YCombinator Top", text);
      Logger.log("Mail Sent!!! ");
  }
  Logger.log("text : " + text);
  Logger.log("cached : " + cached);
}

https://gist.github.com/vs4vijay/6724868