Thursday, July 14, 2011

After a bug fix in M/R processing in mongo 1.8.1, I am posting a follow up on the logdb collection analysis.

Software versions:
mongod 1.8.2
node 1.4.5
node-mongodb-native

A node.js script node_mongodb_log_app.js is shown below.

GLOBAL.DEBUG = true;

var sys = require('sys');

var Db = require('mongodb').Db,
    Connection = require('mongodb').Connection,
    Server = require('mongodb').Server,
    BSON = require('mongodb').BSONNative;

var db = new Db('logdb', new Server('127.0.0.1', 27017, {}), {native_parser: true});

var filter = null;
var lon = (process.ARGV[2]) ? Number(process.ARGV[2]): null;
var lat = (process.ARGV[3]) ? Number(process.ARGV[3]): null;

if(lat != null && lon != null){
    filter = { query: {  loc : { $near: [lon, lat]}}};
    sys.puts(sys.inspect(filter));
}

db.open(function(error, db) {
    var map_request_size = function(){
        emit(1, this.size);
    }

    var reduce = function (k, vals) {
        var sum = 0;
        vals.forEach(function (v) {sum += v;});
        return sum;
    }

    // query all results, total bandwith
    db.collection('logentry', function(error, collection){
        if(filter != null){
            collection.mapReduce(map_request_size, reduce, filter,
                function(error, collection){
                    collection.find(function(error, cursor){
                        cursor.each(function(error, mrdoc){
                            if(mrdoc != null){
                                sys.puts(mrdoc.value);
                            }
                            db.close();
                        });
                    });
            });
        }
        else{
            collection.mapReduce(map_request_size, reduce,
                function(error, collection){
                    collection.find(function(error, cursor){
                        cursor.each(function(error, mrdoc){
                            if(mrdoc != null){
                                sys.puts(mrdoc.value);
                            }
                            db.close();
                        });
                    });
            });
        }
    });

});

We want to accumulate all file download into a single number of bytes. For that we execute the script without parameters:

$ node node_mongodb_log_app.js
204612741925

If we want to analyze consumption near a geographic location, we issue a call like this:

$ node node_mongodb_log_app.js -108 32.6
{ query: { loc: { '$near': [Object] } } }
30435795