mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Move geocoding to Google API
This commit is contained in:
parent
13ac77b575
commit
cb0175a7b7
4 changed files with 57 additions and 22 deletions
28
analyze.py
28
analyze.py
|
|
@ -96,8 +96,28 @@ def get_categories(article_body):
|
||||||
return found_categories or ['other']
|
return found_categories or ['other']
|
||||||
|
|
||||||
def get_geoloc(query):
|
def get_geoloc(query):
|
||||||
encoded_query = urlencode(query + ", Berlin")
|
confidence_map = {
|
||||||
url = "http://nominatim.openstreetmap.org/search?q=" + encoded_query + " + "&countrycodes=de&format=json&limit=1"
|
"ROOFTOP": 10,
|
||||||
r = requests.get(url)
|
"RANGE_INTERPOLATED": 7,
|
||||||
|
"GEOMETRIC_CENTER": 4,
|
||||||
|
"APPROXIMATE": 1
|
||||||
|
}
|
||||||
|
|
||||||
return(r.json()[0]["lat"], r.json()[0]["lon"])
|
params = {
|
||||||
|
"address": query + ", Berlin",
|
||||||
|
"components": "country:DE"
|
||||||
|
}
|
||||||
|
|
||||||
|
url = "http://maps.googleapis.com/maps/api/geocode/json?" + urlencode(params)
|
||||||
|
r = requests.get(url).json()["results"]
|
||||||
|
|
||||||
|
locations = []
|
||||||
|
for location in r:
|
||||||
|
print(location)
|
||||||
|
locations.append({
|
||||||
|
"lat": location["geometry"]["location"]["lat"],
|
||||||
|
"lng": location["geometry"]["location"]["lng"],
|
||||||
|
"confidence": confidence_map[location["geometry"]["location_type"]]
|
||||||
|
})
|
||||||
|
|
||||||
|
return locations
|
||||||
|
|
|
||||||
22
locator.py
Normal file
22
locator.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
import time
|
||||||
|
from models import *
|
||||||
|
from analyze import *
|
||||||
|
|
||||||
|
for article in Article.select().limit(5):
|
||||||
|
potential = get_potential_places(article.place, article.description)
|
||||||
|
places = improve_potential_places(potential)
|
||||||
|
|
||||||
|
print("Found places: {}".format(places))
|
||||||
|
|
||||||
|
for place in places:
|
||||||
|
query = " ".join([word for (word, tag) in place])
|
||||||
|
print("Query: {}, Berlin".format(query))
|
||||||
|
|
||||||
|
locations = get_geoloc(query)
|
||||||
|
|
||||||
|
for location in locations:
|
||||||
|
location["article"] = article
|
||||||
|
location["match"] = query
|
||||||
|
Location.create(**location)
|
||||||
|
|
||||||
|
# time.sleep(1)
|
||||||
12
models.py
12
models.py
|
|
@ -16,7 +16,17 @@ class Article(BaseModel):
|
||||||
description = TextField()
|
description = TextField()
|
||||||
hash = BlobField(index=True)
|
hash = BlobField(index=True)
|
||||||
|
|
||||||
|
class Location(BaseModel):
|
||||||
|
"""
|
||||||
|
A location describes the place an incident has happened
|
||||||
|
"""
|
||||||
|
confidence = IntegerField()
|
||||||
|
lat = DoubleField()
|
||||||
|
lng = DoubleField()
|
||||||
|
match = CharField()
|
||||||
|
article = ForeignKeyField(Article)
|
||||||
|
|
||||||
# Set up the tables
|
# Set up the tables
|
||||||
def create_tables():
|
def create_tables():
|
||||||
db.connect()
|
db.connect()
|
||||||
db.create_tables([Article])
|
db.create_tables([Article, Location])
|
||||||
|
|
|
||||||
17
tagger.py
17
tagger.py
|
|
@ -1,17 +0,0 @@
|
||||||
from nltk.tag.stanford import POSTagger
|
|
||||||
from models import Article
|
|
||||||
from analyze import *
|
|
||||||
|
|
||||||
tagger = POSTagger('./stanford-postagger-full-2014-10-26/models/german-fast.tagger',
|
|
||||||
'./stanford-postagger-full-2014-10-26/stanford-postagger-3.5.0.jar',
|
|
||||||
'UTF-8')
|
|
||||||
|
|
||||||
for article in Article.select().limit(100):
|
|
||||||
potential = get_potential_places(article.place, article.description)
|
|
||||||
places = improve_potential_places(potential)
|
|
||||||
|
|
||||||
print(article.place)
|
|
||||||
print(article.description)
|
|
||||||
print()
|
|
||||||
print("Potential: " + str(potential))
|
|
||||||
print("Improved: " + str(places))
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue