mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-07 03:33:39 +02:00
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
import time
|
|
from models import *
|
|
from analyze import *
|
|
|
|
print("Start geocoding...")
|
|
start_time = time.time()
|
|
articles = Article.select().where(Article.id < 701)
|
|
|
|
# use our list of german nouns for filtering
|
|
with open("german_nouns.txt", "r") as f:
|
|
german_nouns = f.read().splitlines()
|
|
|
|
for article in articles:
|
|
potential = get_potential_places(article.place, article.description)
|
|
places = improve_potential_places(potential)
|
|
|
|
print("Found places: {}".format(places))
|
|
|
|
for place in places:
|
|
query = " ".join([word for (word, tag) in place])
|
|
if query in german_nouns:
|
|
print("Skipping {}".format(query))
|
|
else:
|
|
print("Query: {}, Berlin".format(query))
|
|
|
|
locations = get_geoloc(query)
|
|
|
|
# TODO: Only insert matches that have a higher confidence than current
|
|
# ones
|
|
for location in locations:
|
|
location["article"] = article
|
|
location["match"] = query
|
|
Location.create(**location)
|
|
|
|
time.sleep(1)
|
|
|
|
time_taken = time.time() - start_time
|
|
print("Geocoded {} articles in {} seconds".format(articles.count(), time_taken))
|