Use list of german nouns before geocoding

This commit is contained in:
Arne Schlüter 2015-02-07 14:53:27 +01:00
commit c09a1d78a2
4 changed files with 24735 additions and 11 deletions

View file

@ -119,7 +119,8 @@ def get_geoloc(query):
locations.append({
"lat": location["geometry"]["location"]["lat"],
"lng": location["geometry"]["location"]["lng"],
"confidence": confidence_map[location["geometry"]["location_type"]]
"confidence": confidence_map[location["geometry"]["location_type"]],
"returned_place": location["formatted_address"]
})
return locations

24715
german_nouns.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,11 @@ from analyze import *
print("Start geocoding...")
start_time = time.time()
articles = Article.select()
articles = Article.select().where(Article.id < 701)
# use our list of german nouns for filtering
with open("german_nouns.txt", "r") as f:
german_nouns = f.read().splitlines()
for article in articles:
potential = get_potential_places(article.place, article.description)
@ -14,18 +18,21 @@ for article in articles:
for place in places:
query = " ".join([word for (word, tag) in place])
print("Query: {}, Berlin".format(query))
if query in german_nouns:
print("Skipping {}".format(query))
else:
print("Query: {}, Berlin".format(query))
locations = get_geoloc(query)
locations = get_geoloc(query)
# TODO: Only insert matches that have a higher confidence than current
# ones
for location in locations:
location["article"] = article
location["match"] = query
Location.create(**location)
# TODO: Only insert matches that have a higher confidence than current
# ones
for location in locations:
location["article"] = article
location["match"] = query
Location.create(**location)
time.sleep(1)
time.sleep(1)
time_taken = time.time() - start_time
print("Geocoded {} articles in {} seconds".format(articles.count(), time_taken))

View file

@ -24,6 +24,7 @@ class Location(BaseModel):
lat = DoubleField()
lng = DoubleField()
match = CharField()
returned_place = CharField()
article = ForeignKeyField(Article)
class Category(BaseModel):