Last Updated: February 25, 2016
·
1.749K
· goliardico

Build a node.js spider to test and timing (and prove async benefits)

In node.js is easy to make things async (sometimes is difficult not). I'll test it with a simply spider, which reads url from file and makes reqs until end; at last it write down time spent with parallel and serial execution.

The Test

You need:

  • a web server to call (www.google.it?)
  • some urls to test, write row by row in an external file (url.txt)

then:

$ node get.js

3 urls loaded from url.txt

/doodles/ - 301 - 48 ms
/intl/it/about/ - 200 - 55 ms
/nowhere - 404 - 50 ms

Completed in 62 ms (parallel) versus 153 ms (serial).

Well done!

The Code

Now the code and an example url.txt

url.txt:

/intl/it/about/
/doodles/
/nowhereaction

I think the code is pretty self-explanatory (and commented) so this is:

get.js

var fs = require('fs');
var http = require('http');

// Change this if you want
var urlFile = 'url.txt';
var serverWeb = 'www.google.it';

var totalElapsedTime = 0;
var startRun = +new Date();

// Read url from file
var arrUrl = fs.readFileSync(urlFile).toString().split("\n");
console.log(arrUrl.length + " urls loaded from " + urlFile);

// Setup an array to manage reqs
var url = new Array();

for ( var i = 0; i < arrUrl.length; i++ ) {
  url[i] = new Object();
  url[i].httpGet = http.get({ hostname: serverWeb, port: 80, path: arrUrl[i], headers: {'user-agent': 'node.js'}},
    function(res) {
      var end = +new Date();
      console.log(this.path + " - " + res.statusCode + " - " + (end-this.start) + " ms");
      totalElapsedTime += (end-this.start);
    }).on('error', function(e) {
        var end = +new Date();
      console.log("ERR " + this.path + " - " + res.statusCode + " - " + (end-this.start) + " ms / " + e.message);
        totalElapsedTime += (end-this.start);
    });
  url[i].httpGet.start = +new Date();
};

process.on('exit', function() {
  var end = +new Date();
  console.log("Completed in " + (end-startRun) + " ms (parallel) versus " + totalElapsedTime + " ms (serial).");
});