berliner-winter/locator.py
2015-02-07 14:53:27 +01:00

38 lines
1.2 KiB
Python

import time
from models import *
from analyze import *
print("Start geocoding...")
start_time = time.time()
articles = Article.select().where(Article.id < 701)
# use our list of german nouns for filtering
with open("german_nouns.txt", "r") as f:
german_nouns = f.read().splitlines()
for article in articles:
potential = get_potential_places(article.place, article.description)
places = improve_potential_places(potential)
print("Found places: {}".format(places))
for place in places:
query = " ".join([word for (word, tag) in place])
if query in german_nouns:
print("Skipping {}".format(query))
else:
print("Query: {}, Berlin".format(query))
locations = get_geoloc(query)
# TODO: Only insert matches that have a higher confidence than current
# ones
for location in locations:
location["article"] = article
location["match"] = query
Location.create(**location)
time.sleep(1)
time_taken = time.time() - start_time
print("Geocoded {} articles in {} seconds".format(articles.count(), time_taken))