mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Use list of german nouns before geocoding
This commit is contained in:
parent
9cbff4aa30
commit
c09a1d78a2
4 changed files with 24735 additions and 11 deletions
27
locator.py
27
locator.py
|
|
@ -4,7 +4,11 @@ from analyze import *
|
|||
|
||||
print("Start geocoding...")
|
||||
start_time = time.time()
|
||||
articles = Article.select()
|
||||
articles = Article.select().where(Article.id < 701)
|
||||
|
||||
# use our list of german nouns for filtering
|
||||
with open("german_nouns.txt", "r") as f:
|
||||
german_nouns = f.read().splitlines()
|
||||
|
||||
for article in articles:
|
||||
potential = get_potential_places(article.place, article.description)
|
||||
|
|
@ -14,18 +18,21 @@ for article in articles:
|
|||
|
||||
for place in places:
|
||||
query = " ".join([word for (word, tag) in place])
|
||||
print("Query: {}, Berlin".format(query))
|
||||
if query in german_nouns:
|
||||
print("Skipping {}".format(query))
|
||||
else:
|
||||
print("Query: {}, Berlin".format(query))
|
||||
|
||||
locations = get_geoloc(query)
|
||||
locations = get_geoloc(query)
|
||||
|
||||
# TODO: Only insert matches that have a higher confidence than current
|
||||
# ones
|
||||
for location in locations:
|
||||
location["article"] = article
|
||||
location["match"] = query
|
||||
Location.create(**location)
|
||||
# TODO: Only insert matches that have a higher confidence than current
|
||||
# ones
|
||||
for location in locations:
|
||||
location["article"] = article
|
||||
location["match"] = query
|
||||
Location.create(**location)
|
||||
|
||||
time.sleep(1)
|
||||
time.sleep(1)
|
||||
|
||||
time_taken = time.time() - start_time
|
||||
print("Geocoded {} articles in {} seconds".format(articles.count(), time_taken))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue