The log entries (logentry documents) were originally inserted from a Pylons framework using
the mongokit package. Each GET or POST call to a controller is recorded following this document model:
class LogEntry(Document): collection_name = 'logentry' structure = { 'host_url': unicode, 'controller' : unicode, 'url' : unicode, 'query_string' : unicode, 'request_start': datetime.datetime, 'request_end' : datetime.datetime, 'size' : float, 'is_xhr' : bool, 'method': unicode, 'user_agent': unicode, 'referrer' : unicode, 'session' : unicode, 'post_vars': dict, 'ip' : unicode } default_values = { 'request_start' : datetime.datetime.now(), 'request_end' : datetime.datetime.now(), 'size' : 0 }
The database in mongo is called "logdb" and the collection "logentry".
The code shown below illustrates the use of node.js http client to fetch asynchronously a JSON response, via the freegeoip.net REST API service, with the geographic location of the ip value stored in the logentry document. Using the mongodb node.js driver, the record is then updated with a point containing longitude and latitude information.
Software versions:
mongod 1.8.1
node 1.4.5
node-mongodb-native
GLOBAL.DEBUG = true; var sys = require('sys'); var http = require('http'); var Db = require('mongodb').Db; var Server = require('mongodb').Server; var BSON = require('mongodb').BSONNative; var db = new Db('logdb', new Server('127.0.0.1', 27017, {}), {native_parser: true}); // Make sure spatial index exists http://www.mongodb.org/display/DOCS/Geospatial+Indexing // db.logentry.ensureIndex({loc: "2d"}); db.open(function(error, db) { // get the logentry collection db.collection('logentry', function(error, collection){ // Find all ips without geo location. (Only 10 at a time) collection.find({geoip: {$exists: false}}, {'limit': 10}, function(error, cursor){ //sys.puts(error); cursor.each(function(error, logdoc){ if(logdoc != null){ // https://github.com/fiorix/freegeoip var options = { port: 80, host: 'freegeoip.net', path: '/json/' + logdoc.ip }; var request = http.request(options, function(response) { response.setEncoding('utf8'); var jsonResponse = ''; response.on('data', function (chunk) { jsonResponse += chunk; }); response.on('end', function(){ var geoip = JSON.parse(jsonResponse); sys.puts("POINT("+ geoip.longitude + " " + geoip.latitude + ")"); console.log(geoip.ip); var loc = { 'loc': { lon: Number(geoip.longitude), lat: Number(geoip.latitude) } }; var updateCommand = { "$set": loc }; console.log(sys.inspect(updateCommand)); collection.update( {'ip': logdoc.ip}, updateCommand, function(error){ if(error){ sys.puts(error); } db.close(); }); }); }); request.end(); } }); }); }); });
From the mongo console we check logentry records now contain locatable coordinates. I will post a MapReduce analysis procedure later on.
MongoDB shell version: 1.8.1 connecting to: test > use logdb switched to db logdb > db.logentry.find({loc: {$exists: true}}) { "_id" : "LogEntry-000050c5-487a-421e-bf91-b062db988c7d", "controller" : "root", "host_url" : "http://12x.x4.x3.10x:8884", "ip" : "17x.2x.x6.x16", "is_xhr" : true, "loc" : { "lon" : -105.96, "lat" : 35.678 }, "method" : "POST", "post_vars" : { "node" : "Digital Orthophotography_|_2003 Color Infrared (CIR)_|_New Mexico (1m)", "end_date" : "", "filter" : "", "limit" : "25", "offset" : "0", "start_date" : "" }, "query_string" : "", "referrer" : "http://rgis.unm.edu/browsedata", "request_end" : ISODate("2010-01-26T13:59:14.019Z"), "request_start" : ISODate("2010-01-26T13:59:13.938Z"), "session" : "237211756113636361222701966583755313674", "size" : 6495, "url" : "browsedata/json/tree/themes", "user_agent" : "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)" } ...
No comments:
Post a Comment