Friday, March 30, 2012

Great circle flights from Beijing, arcpy version

In the previous post, we found a pure Python solution to generating great circles from a common point, specifically from Beijing airports to all other airports in the world that maintain commercial flight routes from it. This situation is modeled by finding geodesic arcs, great circles from point to point in the spheroid.


In this post we present a solution implemented only with arcpy (ESRI's ArcGIS 10.0 Python geoprocessing package):



Here you can download the complete script with data. Change the root of your workspace from "C:\pythonGIS" accordingly and unzip the files in the new root defined.

Friday, March 23, 2012

Generating great circles from a common origin with Python

Following Claudia Engel's blog post: Great circles on a recentered worldmap, in ggplot, in which a solution in R is provided, I was asked to build an alternative solution in Python. I used the following components:

Python 2.7
pandas 0.7.0.dev
pyproj 1.9.0



The CSV data files with airport locations and flight information from Beijing (PEK) airport are airports_new.csv and PEK.csv, respectively. These data were derived from openflights.org.

The KML generated is easily visualized in Google Earth:


As expected, the vector layer is displayed in other GIS desktop clients unpleasantly. See for instance how it looks in QGIS:


I decided not to alter the coordinates and recenter the world map around Beijing and fix the split polygons problem. Google Earth does the trick.

Produce the output KML file from command line (UNIX) like so:
$ python great_circles.py > out.kml

Here is the compressed KML output file.


Alternative partial implementations


Shown next is how I use pyKML (0.1.0) to generate 'almost' the same output above:

Thursday, July 14, 2011

After a bug fix in M/R processing in mongo 1.8.1, I am posting a follow up on the logdb collection analysis.

Software versions:
mongod 1.8.2
node 1.4.5
node-mongodb-native

A node.js script node_mongodb_log_app.js is shown below.

GLOBAL.DEBUG = true;

var sys = require('sys');

var Db = require('mongodb').Db,
    Connection = require('mongodb').Connection,
    Server = require('mongodb').Server,
    BSON = require('mongodb').BSONNative;

var db = new Db('logdb', new Server('127.0.0.1', 27017, {}), {native_parser: true});

var filter = null;
var lon = (process.ARGV[2]) ? Number(process.ARGV[2]): null;
var lat = (process.ARGV[3]) ? Number(process.ARGV[3]): null;

if(lat != null && lon != null){
    filter = { query: {  loc : { $near: [lon, lat]}}};
    sys.puts(sys.inspect(filter));
}

db.open(function(error, db) {
    var map_request_size = function(){
        emit(1, this.size);
    }

    var reduce = function (k, vals) {
        var sum = 0;
        vals.forEach(function (v) {sum += v;});
        return sum;
    }

    // query all results, total bandwith
    db.collection('logentry', function(error, collection){
        if(filter != null){
            collection.mapReduce(map_request_size, reduce, filter,
                function(error, collection){
                    collection.find(function(error, cursor){
                        cursor.each(function(error, mrdoc){
                            if(mrdoc != null){
                                sys.puts(mrdoc.value);
                            }
                            db.close();
                        });
                    });
            });
        }
        else{
            collection.mapReduce(map_request_size, reduce,
                function(error, collection){
                    collection.find(function(error, cursor){
                        cursor.each(function(error, mrdoc){
                            if(mrdoc != null){
                                sys.puts(mrdoc.value);
                            }
                            db.close();
                        });
                    });
            });
        }
    });

});

We want to accumulate all file download into a single number of bytes. For that we execute the script without parameters:

$ node node_mongodb_log_app.js
204612741925

If we want to analyze consumption near a geographic location, we issue a call like this:

$ node node_mongodb_log_app.js -108 32.6
{ query: { loc: { '$near': [Object] } } }
30435795

Tuesday, May 17, 2011

Geotagging records in mongoDB with node.js

We use the popular non-relational MongoDB database to store web traffic in custom logs.

The log entries (logentry documents) were originally inserted from a Pylons framework using
the mongokit package. Each GET or POST call to a controller is recorded following this document model:

class LogEntry(Document):
    collection_name = 'logentry'
    structure = {
        'host_url': unicode,
        'controller' : unicode,
        'url' : unicode,
        'query_string' : unicode,
        'request_start': datetime.datetime,
        'request_end' : datetime.datetime,
        'size' : float,
        'is_xhr' : bool,
        'method': unicode,
        'user_agent': unicode,
        'referrer' : unicode,
        'session' : unicode,
        'post_vars': dict,
        'ip' : unicode
    }
    default_values = {
        'request_start' : datetime.datetime.now(),
        'request_end' : datetime.datetime.now(),
        'size' : 0
     }

The database in mongo is called "logdb" and the collection "logentry".

The code shown below illustrates the use of node.js http client to fetch asynchronously a JSON response, via the freegeoip.net REST API service, with the geographic location of the ip value stored in the logentry document. Using the mongodb node.js driver, the record is then updated with a point containing longitude and latitude information.

Software versions:
mongod 1.8.1
node 1.4.5
node-mongodb-native

GLOBAL.DEBUG = true;

var sys = require('sys');
var http = require('http');

var Db = require('mongodb').Db;
var Server = require('mongodb').Server;
var BSON = require('mongodb').BSONNative;

var db = new Db('logdb', new Server('127.0.0.1', 27017, {}), {native_parser: true});

// Make sure spatial index exists http://www.mongodb.org/display/DOCS/Geospatial+Indexing
// db.logentry.ensureIndex({loc: "2d"});

db.open(function(error, db) {
    // get the logentry collection
    db.collection('logentry', function(error, collection){
        // Find all ips without geo location. (Only 10 at a time)
        collection.find({geoip: {$exists: false}}, {'limit': 10}, function(error, cursor){
            //sys.puts(error);
            cursor.each(function(error, logdoc){
                if(logdoc != null){
                    // https://github.com/fiorix/freegeoip
                    var options = {
                        port: 80,
                        host: 'freegeoip.net',
                        path: '/json/' + logdoc.ip
                    };

                    var request = http.request(options, function(response) {
                        response.setEncoding('utf8');
                        var jsonResponse = '';
                        response.on('data', function (chunk) {
                            jsonResponse += chunk;
                        });
                        response.on('end', function(){
                            var geoip = JSON.parse(jsonResponse);
                            sys.puts("POINT("+ geoip.longitude + " " + geoip.latitude + ")");
                            console.log(geoip.ip);
                            var loc = {
                                'loc': {
                                    lon: Number(geoip.longitude),
                                    lat: Number(geoip.latitude)
                                }
                            };
                            var updateCommand = { "$set": loc };
                            console.log(sys.inspect(updateCommand));
                            collection.update(
                                {'ip': logdoc.ip}, updateCommand, function(error){
                                    if(error){
                                        sys.puts(error);
                                    }
                                    db.close();
                                });
                            });
                    });
                    request.end();
                }
            });
        });
    });
});

From the mongo console we check logentry records now contain locatable coordinates. I will post a MapReduce analysis procedure later on.

MongoDB shell version: 1.8.1
connecting to: test
> use logdb                               
switched to db logdb
> db.logentry.find({loc: {$exists: true}})
{ "_id" : "LogEntry-000050c5-487a-421e-bf91-b062db988c7d", 
  "controller" : "root", 
  "host_url" : "http://12x.x4.x3.10x:8884", 
  "ip" : "17x.2x.x6.x16", 
  "is_xhr" : true, 
  "loc" : { "lon" : -105.96, "lat" : 35.678 }, 
  "method" : "POST", 
  "post_vars" : { "node" : "Digital Orthophotography_|_2003 Color Infrared (CIR)_|_New Mexico (1m)", "end_date" : "", "filter" : "", "limit" : "25", "offset" : "0", "start_date" : "" }, 
  "query_string" : "", 
  "referrer" : "http://rgis.unm.edu/browsedata", 
  "request_end" : ISODate("2010-01-26T13:59:14.019Z"), 
  "request_start" : ISODate("2010-01-26T13:59:13.938Z"), 
  "session" : "237211756113636361222701966583755313674", 
  "size" : 6495, 
  "url" : "browsedata/json/tree/themes", 
  "user_agent" : "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729)" 
}

...

Saturday, May 14, 2011

Converting an Ontology into a Category

Category Theory is the mathematical theory of structure. We are interested in studying the practical aspects of categorical completion of ontologies, a process of transforming ontologies into categories. This process is not generic, (for reasons not explained here), apart from the main axioms a category must satisfy, we require the presence of other elements such as all colimits and a terminal object.

The first thing to do is to represent the identity morphism that all objects in category must have. This can be done simply by adding a reflexive and transitive object property hasIdentityMorphism with range Thing and domain Thing, so that all things have such identity property.

A second task to accomplish is to endow an ontology O with a terminal object class to represent the terminal object in the categorical completion C of O. I took the Pizza ontology and added a subclass TerminalObject to Thing. Also, in order to ensure "all things" have a terminal object, a transitive object property (terminalProperty) was added.

I use the Java OWLAPI to materialize these operations. These are the relevant lines of code for the two first steps explained above:

OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
IRI iri = IRI.create("http://www.co-ode.org/ontologies/pizza/pizza.owl");
OWLOntology pizzaOntology = manager.loadOntologyFromOntologyDocument(iri);

OWLClass Thing = factory.getOWLClass(IRI.create("http://www.w3.org/2002/07/owl#Thing"));

OWLClass TerminalObject = factory.getOWLClass(IRI.create(iri + "#TerminalObject"));
OWLObjectProperty terminalProperty = factory.getOWLObjectProperty(IRI.create(iri + "#hasTerminalObject"));
        
OWLTransitiveObjectPropertyAxiom TerminalObjectProperty = factory.getOWLTransitiveObjectPropertyAxiom(terminalProperty);
manager.addAxiom(pizzaOntology, TerminalObjectProperty);
Set terminalAxioms = new HashSet();
        
terminalAxioms.add(factory.getOWLObjectPropertyDomainAxiom(terminalProperty, Thing));
terminalAxioms.add(factory.getOWLObjectPropertyRangeAxiom(terminalProperty, TerminalObject));
terminalAxioms.add(factory.getOWLSubClassOfAxiom(TerminalObject, Thing));
        
manager.addAxioms(pizzaOntology, terminalAxioms);

File file = new File("/tmp/catcomp_pizza.owl");
manager.saveOntology(pizzaOntology, owlxmlFormat, IRI.create(file.toURI()));

This is screenshot taken from a Protege/OntoGraf view on the new transformed ontology.