Tuesday, May 17, 2011

Geotagging records in mongoDB with node.js

We use the popular non-relational MongoDB database to store web traffic in custom logs.

The log entries (logentry documents) were originally inserted from a Pylons framework using
the mongokit package. Each GET or POST call to a controller is recorded following this document model:

class LogEntry(Document):
    collection_name = 'logentry'
    structure = {
        'host_url': unicode,
        'controller' : unicode,
        'url' : unicode,
        'query_string' : unicode,
        'request_start': datetime.datetime,
        'request_end' : datetime.datetime,
        'size' : float,
        'is_xhr' : bool,
        'method': unicode,
        'user_agent': unicode,
        'referrer' : unicode,
        'session' : unicode,
        'post_vars': dict,
        'ip' : unicode
    }
    default_values = {
        'request_start' : datetime.datetime.now(),
        'request_end' : datetime.datetime.now(),
        'size' : 0
     }

The database in mongo is called "logdb" and the collection "logentry".

The code shown below illustrates the use of node.js http client to fetch asynchronously a JSON response, via the freegeoip.net REST API service, with the geographic location of the ip value stored in the logentry document. Using the mongodb node.js driver, the record is then updated with a point containing longitude and latitude information.

Software versions:
mongod 1.8.1
node 1.4.5
node-mongodb-native

GLOBAL.DEBUG = true;

var sys = require('sys');
var http = require('http');

var Db = require('mongodb').Db;
var Server = require('mongodb').Server;
var BSON = require('mongodb').BSONNative;

var db = new Db('logdb', new Server('127.0.0.1', 27017, {}), {native_parser: true});

// Make sure spatial index exists http://www.mongodb.org/display/DOCS/Geospatial+Indexing
// db.logentry.ensureIndex({loc: "2d"});

db.open(function(error, db) {
    // get the logentry collection
    db.collection('logentry', function(error, collection){
        // Find all ips without geo location. (Only 10 at a time)
        collection.find({geoip: {$exists: false}}, {'limit': 10}, function(error, cursor){
            //sys.puts(error);
            cursor.each(function(error, logdoc){
                if(logdoc != null){
                    // https://github.com/fiorix/freegeoip
                    var options = {
                        port: 80,
                        host: 'freegeoip.net',
                        path: '/json/' + logdoc.ip
                    };

                    var request = http.request(options, function(response) {
                        response.setEncoding('utf8');
                        var jsonResponse = '';
                        response.on('data', function (chunk) {
                            jsonResponse += chunk;
                        });
                        response.on('end', function(){
                            var geoip = JSON.parse(jsonResponse);
                            sys.puts("POINT("+ geoip.longitude + " " + geoip.latitude + ")");
                            console.log(geoip.ip);
                            var loc = {
                                'loc': {
                                    lon: Number(geoip.longitude),
                                    lat: Number(geoip.latitude)
                                }
                            };
                            var updateCommand = { "$set": loc };
                            console.log(sys.inspect(updateCommand));
                            collection.update(
                                {'ip': logdoc.ip}, updateCommand, function(error){
                                    if(error){
                                        sys.puts(error);
                                    }
                                    db.close();
                                });
                            });
                    });
                    request.end();
                }
            });
        });
    });
});

From the mongo console we check logentry records now contain locatable coordinates. I will post a MapReduce analysis procedure later on.

MongoDB shell version: 1.8.1
connecting to: test
> use logdb                               
switched to db logdb
> db.logentry.find({loc: {$exists: true}})
{ "_id" : "LogEntry-000050c5-487a-421e-bf91-b062db988c7d", 
  "controller" : "root", 
  "host_url" : "http://12x.x4.x3.10x:8884", 
  "ip" : "17x.2x.x6.x16", 
  "is_xhr" : true, 
  "loc" : { "lon" : -105.96, "lat" : 35.678 }, 
  "method" : "POST", 
  "post_vars" : { "node" : "Digital Orthophotography_|_2003 Color Infrared (CIR)_|_New Mexico (1m)", "end_date" : "", "filter" : "", "limit" : "25", "offset" : "0", "start_date" : "" }, 
  "query_string" : "", 
  "referrer" : "http://rgis.unm.edu/browsedata", 
  "request_end" : ISODate("2010-01-26T13:59:14.019Z"), 
  "request_start" : ISODate("2010-01-26T13:59:13.938Z"), 
  "session" : "237211756113636361222701966583755313674", 
  "size" : 6495, 
  "url" : "browsedata/json/tree/themes", 
  "user_agent" : "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)" 
}

...

Saturday, May 14, 2011

Converting an Ontology into a Category

Category Theory is the mathematical theory of structure. We are interested in studying the practical aspects of categorical completion of ontologies, a process of transforming ontologies into categories. This process is not generic, (for reasons not explained here), apart from the main axioms a category must satisfy, we require the presence of other elements such as all colimits and a terminal object.

The first thing to do is to represent the identity morphism that all objects in category must have. This can be done simply by adding a reflexive and transitive object property hasIdentityMorphism with range Thing and domain Thing, so that all things have such identity property.

A second task to accomplish is to endow an ontology O with a terminal object class to represent the terminal object in the categorical completion C of O. I took the Pizza ontology and added a subclass TerminalObject to Thing. Also, in order to ensure "all things" have a terminal object, a transitive object property (terminalProperty) was added.

I use the Java OWLAPI to materialize these operations. These are the relevant lines of code for the two first steps explained above:

OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
IRI iri = IRI.create("http://www.co-ode.org/ontologies/pizza/pizza.owl");
OWLOntology pizzaOntology = manager.loadOntologyFromOntologyDocument(iri);

OWLClass Thing = factory.getOWLClass(IRI.create("http://www.w3.org/2002/07/owl#Thing"));

OWLClass TerminalObject = factory.getOWLClass(IRI.create(iri + "#TerminalObject"));
OWLObjectProperty terminalProperty = factory.getOWLObjectProperty(IRI.create(iri + "#hasTerminalObject"));
        
OWLTransitiveObjectPropertyAxiom TerminalObjectProperty = factory.getOWLTransitiveObjectPropertyAxiom(terminalProperty);
manager.addAxiom(pizzaOntology, TerminalObjectProperty);
Set terminalAxioms = new HashSet();
        
terminalAxioms.add(factory.getOWLObjectPropertyDomainAxiom(terminalProperty, Thing));
terminalAxioms.add(factory.getOWLObjectPropertyRangeAxiom(terminalProperty, TerminalObject));
terminalAxioms.add(factory.getOWLSubClassOfAxiom(TerminalObject, Thing));
        
manager.addAxioms(pizzaOntology, terminalAxioms);

File file = new File("/tmp/catcomp_pizza.owl");
manager.saveOntology(pizzaOntology, owlxmlFormat, IRI.create(file.toURI()));

This is screenshot taken from a Protege/OntoGraf view on the new transformed ontology.