diff --git a/analyze.py b/analyze.py index ffdd7f8..0f37d88 100644 --- a/analyze.py +++ b/analyze.py @@ -96,8 +96,28 @@ def get_categories(article_body): return found_categories or ['other'] def get_geoloc(query): - encoded_query = urlencode(query + ", Berlin") - url = "http://nominatim.openstreetmap.org/search?q=" + encoded_query + " + "&countrycodes=de&format=json&limit=1" - r = requests.get(url) + confidence_map = { + "ROOFTOP": 10, + "RANGE_INTERPOLATED": 7, + "GEOMETRIC_CENTER": 4, + "APPROXIMATE": 1 + } - return(r.json()[0]["lat"], r.json()[0]["lon"]) + params = { + "address": query + ", Berlin", + "components": "country:DE" + } + + url = "http://maps.googleapis.com/maps/api/geocode/json?" + urlencode(params) + r = requests.get(url).json()["results"] + + locations = [] + for location in r: + print(location) + locations.append({ + "lat": location["geometry"]["location"]["lat"], + "lng": location["geometry"]["location"]["lng"], + "confidence": confidence_map[location["geometry"]["location_type"]] + }) + + return locations diff --git a/locator.py b/locator.py new file mode 100644 index 0000000..3f06c6a --- /dev/null +++ b/locator.py @@ -0,0 +1,22 @@ +import time +from models import * +from analyze import * + +for article in Article.select().limit(5): + potential = get_potential_places(article.place, article.description) + places = improve_potential_places(potential) + + print("Found places: {}".format(places)) + + for place in places: + query = " ".join([word for (word, tag) in place]) + print("Query: {}, Berlin".format(query)) + + locations = get_geoloc(query) + + for location in locations: + location["article"] = article + location["match"] = query + Location.create(**location) + + # time.sleep(1) diff --git a/models.py b/models.py index 78aa901..9a2c85f 100644 --- a/models.py +++ b/models.py @@ -16,7 +16,17 @@ class Article(BaseModel): description = TextField() hash = BlobField(index=True) +class Location(BaseModel): + """ + A location describes the place an incident has happened + """ + confidence = IntegerField() + lat = DoubleField() + lng = DoubleField() + match = CharField() + article = ForeignKeyField(Article) + # Set up the tables def create_tables(): db.connect() - db.create_tables([Article]) + db.create_tables([Article, Location]) diff --git a/tagger.py b/tagger.py deleted file mode 100644 index d62efac..0000000 --- a/tagger.py +++ /dev/null @@ -1,17 +0,0 @@ -from nltk.tag.stanford import POSTagger -from models import Article -from analyze import * - -tagger = POSTagger('./stanford-postagger-full-2014-10-26/models/german-fast.tagger', - './stanford-postagger-full-2014-10-26/stanford-postagger-3.5.0.jar', - 'UTF-8') - -for article in Article.select().limit(100): - potential = get_potential_places(article.place, article.description) - places = improve_potential_places(potential) - - print(article.place) - print(article.description) - print() - print("Potential: " + str(potential)) - print("Improved: " + str(places))