Use list of german nouns before geocoding

This commit is contained in:
Arne Schlüter 2015-02-07 14:53:27 +01:00
commit c09a1d78a2
4 changed files with 24735 additions and 11 deletions

View file

@ -119,7 +119,8 @@ def get_geoloc(query):
locations.append({
"lat": location["geometry"]["location"]["lat"],
"lng": location["geometry"]["location"]["lng"],
"confidence": confidence_map[location["geometry"]["location_type"]]
"confidence": confidence_map[location["geometry"]["location_type"]],
"returned_place": location["formatted_address"]
})
return locations

24715
german_nouns.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,11 @@ from analyze import *
print("Start geocoding...")
start_time = time.time()
articles = Article.select()
articles = Article.select().where(Article.id < 701)
# use our list of german nouns for filtering
with open("german_nouns.txt", "r") as f:
german_nouns = f.read().splitlines()
for article in articles:
potential = get_potential_places(article.place, article.description)
@ -14,6 +18,9 @@ for article in articles:
for place in places:
query = " ".join([word for (word, tag) in place])
if query in german_nouns:
print("Skipping {}".format(query))
else:
print("Query: {}, Berlin".format(query))
locations = get_geoloc(query)

View file

@ -24,6 +24,7 @@ class Location(BaseModel):
lat = DoubleField()
lng = DoubleField()
match = CharField()
returned_place = CharField()
article = ForeignKeyField(Article)
class Category(BaseModel):