mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Use list of german nouns before geocoding
This commit is contained in:
parent
9cbff4aa30
commit
c09a1d78a2
4 changed files with 24735 additions and 11 deletions
|
|
@ -119,7 +119,8 @@ def get_geoloc(query):
|
|||
locations.append({
|
||||
"lat": location["geometry"]["location"]["lat"],
|
||||
"lng": location["geometry"]["location"]["lng"],
|
||||
"confidence": confidence_map[location["geometry"]["location_type"]]
|
||||
"confidence": confidence_map[location["geometry"]["location_type"]],
|
||||
"returned_place": location["formatted_address"]
|
||||
})
|
||||
|
||||
return locations
|
||||
|
|
|
|||
24715
german_nouns.txt
Normal file
24715
german_nouns.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,11 @@ from analyze import *
|
|||
|
||||
print("Start geocoding...")
|
||||
start_time = time.time()
|
||||
articles = Article.select()
|
||||
articles = Article.select().where(Article.id < 701)
|
||||
|
||||
# use our list of german nouns for filtering
|
||||
with open("german_nouns.txt", "r") as f:
|
||||
german_nouns = f.read().splitlines()
|
||||
|
||||
for article in articles:
|
||||
potential = get_potential_places(article.place, article.description)
|
||||
|
|
@ -14,6 +18,9 @@ for article in articles:
|
|||
|
||||
for place in places:
|
||||
query = " ".join([word for (word, tag) in place])
|
||||
if query in german_nouns:
|
||||
print("Skipping {}".format(query))
|
||||
else:
|
||||
print("Query: {}, Berlin".format(query))
|
||||
|
||||
locations = get_geoloc(query)
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ class Location(BaseModel):
|
|||
lat = DoubleField()
|
||||
lng = DoubleField()
|
||||
match = CharField()
|
||||
returned_place = CharField()
|
||||
article = ForeignKeyField(Article)
|
||||
|
||||
class Category(BaseModel):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue