mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Use list of german nouns before geocoding
This commit is contained in:
parent
9cbff4aa30
commit
c09a1d78a2
4 changed files with 24735 additions and 11 deletions
|
|
@ -119,7 +119,8 @@ def get_geoloc(query):
|
||||||
locations.append({
|
locations.append({
|
||||||
"lat": location["geometry"]["location"]["lat"],
|
"lat": location["geometry"]["location"]["lat"],
|
||||||
"lng": location["geometry"]["location"]["lng"],
|
"lng": location["geometry"]["location"]["lng"],
|
||||||
"confidence": confidence_map[location["geometry"]["location_type"]]
|
"confidence": confidence_map[location["geometry"]["location_type"]],
|
||||||
|
"returned_place": location["formatted_address"]
|
||||||
})
|
})
|
||||||
|
|
||||||
return locations
|
return locations
|
||||||
|
|
|
||||||
24715
german_nouns.txt
Normal file
24715
german_nouns.txt
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -4,7 +4,11 @@ from analyze import *
|
||||||
|
|
||||||
print("Start geocoding...")
|
print("Start geocoding...")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
articles = Article.select()
|
articles = Article.select().where(Article.id < 701)
|
||||||
|
|
||||||
|
# use our list of german nouns for filtering
|
||||||
|
with open("german_nouns.txt", "r") as f:
|
||||||
|
german_nouns = f.read().splitlines()
|
||||||
|
|
||||||
for article in articles:
|
for article in articles:
|
||||||
potential = get_potential_places(article.place, article.description)
|
potential = get_potential_places(article.place, article.description)
|
||||||
|
|
@ -14,6 +18,9 @@ for article in articles:
|
||||||
|
|
||||||
for place in places:
|
for place in places:
|
||||||
query = " ".join([word for (word, tag) in place])
|
query = " ".join([word for (word, tag) in place])
|
||||||
|
if query in german_nouns:
|
||||||
|
print("Skipping {}".format(query))
|
||||||
|
else:
|
||||||
print("Query: {}, Berlin".format(query))
|
print("Query: {}, Berlin".format(query))
|
||||||
|
|
||||||
locations = get_geoloc(query)
|
locations = get_geoloc(query)
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ class Location(BaseModel):
|
||||||
lat = DoubleField()
|
lat = DoubleField()
|
||||||
lng = DoubleField()
|
lng = DoubleField()
|
||||||
match = CharField()
|
match = CharField()
|
||||||
|
returned_place = CharField()
|
||||||
article = ForeignKeyField(Article)
|
article = ForeignKeyField(Article)
|
||||||
|
|
||||||
class Category(BaseModel):
|
class Category(BaseModel):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue