Last Updated: February 25, 2016
·
2.685K
· MidnightLightning

Node and binary data

When working with raw binary data, NodeJS has a Buffer object built for that purpose, which is specific to Node. However, Javascript has created Typed Arrays which are effectively the same thing. But you'd think that because Node's Buffer objects were custom made for that platform, they'd be optimized for it and the more abstract Typed Arrays structure (built for Javascript, not the Node/V8 engine under it) would be less effective. But are they? Let's test!

So, the question becomes, if you have a set of binary data in a regular Javascript Array, is it faster to convert it to a Buffer and work with it, or a Typed Array DataView?

Both processes have a way of absorbing an array of existing data as an initialization option, but if we speed-compare them:

var crypto = require('crypto');
var rounds = 5000;
var arraySize = 10000;

console.time('Random array generation');
for (var i = 0; i < rounds; i++) {
  var buff = getRandomArray();
  buff = null;
}
console.timeEnd('Random array generation');

console.time('Create and load Buffer');
for (var i = 0; i < rounds; i++) {
  var buff = new Buffer(getRandomArray());
  buff = null;
}
console.timeEnd('Create and load Buffer');

console.time('Create and load ArrayBufferView');
for (var i = 0; i < rounds; i++) {
  var buff = new Uint8Array(getRandomArray());
  buff = null;
}
console.timeEnd('Create and load ArrayBufferView');

function getRandomArray() {
  var random = crypto.randomBytes(arraySize);
  return Array.prototype.slice.apply(random);
}

In this setup, the "Random array generation" is the overhead added to create the data arrays to be tested, and I find that the Typed Arrays are much slower than the Buffers when treated this way. One could simply walk away and say, "Aha, yes, the Buffer objects are optimized for Node use!" But take a look at this: If we separate out the initialization and the loading of the data, we get:

console.time('Create, then load Buffer');
for (var i = 0; i < rounds; i++) {
  var data = getRandomArray();
  var buff = new Buffer(data.length);
  for (var x = 0; x < data.length; x++) {
    buff[x] = data[x];
  }
  data = null;
  buff = null;
}
console.timeEnd('Create, then load Buffer');

console.time('Create, then load Buffer (writeUInt8)');
for (var i = 0; i < rounds; i++) {
  var data = getRandomArray();
  var buff = new Buffer(data.length);
  for (var x = 0; x < data.length; x++) {
    buff.writeUInt8(data[x], x);
  }
  data = null;
  buff = null;
}
console.timeEnd('Create, then load Buffer (writeUInt8)');

console.time('Create, then load ArrayBufferView');
for (var i = 0; i < rounds; i++) {
  var data = getRandomArray();
  var i8 = new Uint8Array(data.length);
  for (var x = 0; x < data.length; x++) {
    i8[x] = data[x];
  }
  data = null;
  i8 = null;
}
console.timeEnd('Create, then load ArrayBufferView');

In my tests, loading a Buffer object via array index accessor is faster than feeding it an array to begin with, and faster than using the writeUInt8() method (by a hair). However, I see massive improvements in the ArrayBufferView object, putting it back on par with a Buffer object in terms of loading speed. So, Buffer objects are faster when able to be accessed by index, but what if you're dealing with larger than 8-bit binary values? Buffers have read/write methods for larger bit-sizes, but how to they stack up, given that we just saw that the writeUInt8() method is not quite as fast as array access?

console.log('32-bit Big-endian numbers:');

console.time('Node Buffer object');
for (var i = 0; i < rounds; i++) {
  var data = getRandomArray();
  var buff = new Buffer(data.length); // Create
  for (var x = 0; x < data.length; x++) {
    buff[x] = data[x]; // Load
  }

  // Manipulate
  for (var x = 0; x < buff.length-10; x++) {
    buff.writeUInt32BE(buff.readUInt32BE(x+5), x);
  }
  data = null;
  buff = null;
}
console.timeEnd('Node Buffer object');

console.time('DataView');
for (var i = 0; i < rounds; i++) {
  var data = getRandomArray();
  var i8 = new Uint8Array(data.length); // Create
  for (var x = 0; x < data.length; x++) {
    i8[x] = data[x]; // Load
  }

  // Manipulate
  var buff = new DataView(i8.buffer);
  for (var x = 0; x < buff.byteLength-10; x++) {
    buff.setUint32(x, buff.getUint32(x+5), false);
  }
  data = null;
  buff = null;
}
console.timeEnd('DataView');

Those tests come out quite differently for me on my test workstation, with the DataView over twice the time as the Buffer object. So it looks like in creation and loading, Typed Arrays are pretty much on par with Buffer objects, but once you start manipulating the data, it slows down a whole lot. And so overall Buffer objects are better for use when working with binary data in the NodeJS environment, unless someone can find a flaw in my test cases here? Or is there a way around the bottleneck in the data manipulation I ran into?