Match latitude and longitude in description for point which don't have an address

This commit is contained in:
Arne Schlüter 2017-01-29 16:29:19 +01:00
commit 03f715c88e

View file

@ -16,7 +16,8 @@ import sys
import time import time
import random import random
coords_in_content = re.compile('\/@(-?\d+\.\d+),(-?\d+\.\d+),') coords_in_content = re.compile(r'\/@(-?\d+\.\d+),(-?\d+\.\d+),')
coords_in_description = re.compile(r'^(-?\d+\.\d+),(-?\d+\.\d+)$')
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36' user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
filename = r'GoogleBookmarks.html' filename = r'GoogleBookmarks.html'
@ -42,37 +43,43 @@ for label in doc.body.iterfind('dl/dt/h3'):
for element, _, url, _ in label.getparent().getnext().iterlinks(): for element, _, url, _ in label.getparent().getnext().iterlinks():
if 'maps.google' in url: if 'maps.google' in url:
description = element.text or '' description = element.text or ''
safe_query = ['{0}={1}'.format(k, quote_plus(v)) latitude = longitude = None
for (k, v) in parse_qsl(urlparse(url).query)]
url = '{0}/?{1}'.format(url.split('/?')[0], '&'.join(safe_query))
print('GET {0} {1}'.format(url, description.encode('UTF8')))
browser = Browser()
# Load map and find coordinates in source of page
sock = False
while not sock:
try:
sock = browser.open(url)
except Exception as e:
print('Connection problem:' + repr(e))
print('Retrying randomly between 15 and 60 seconds.')
time.sleep(random.randint(15, 60))
content = sock.read().decode("utf-8")
sock.close()
try: try:
coords = coords_in_content.search(content).groups() # check if the link itself contains the coordinate
latitude = coords[0] latitude, longitude = coords_in_description.search(
longitude = coords[1] description).groups()
print("Found point {0},{1} in description".format(
latitude, longitude))
except (AttributeError, IndexError): except (AttributeError, IndexError):
print('[Coordinates not found: ' + str(coords) + safe_query = ['{0}={1}'.format(k, quote_plus(v))
'. Try to update "user_agent"]') for (k, v) in parse_qsl(urlparse(url).query)]
continue url = '{0}/?{1}'.format(url.split('/?')
[0], '&'.join(safe_query))
print('GET {0} {1}'.format(url, description.encode('UTF8')))
browser = Browser()
# Load map and find coordinates in source of page
sock = False
while not sock:
try:
sock = browser.open(url)
except Exception as e:
print('Connection problem:' + repr(e))
print('Retrying randomly between 15 and 60 seconds.')
time.sleep(random.randint(15, 60))
content = sock.read().decode("utf-8")
sock.close()
try:
latitude, longitude = coords_in_content.search(
content).groups()
except (AttributeError, IndexError):
print('[Coordinates not found: ({0},{1}).'
'Try to update "user_agent"]'.format(latitude, longitude))
continue
print(latitude, longitude)
try: try:
kml.newpoint(name=description, kml.newpoint(name=description,
coords=[(float(longitude), float(latitude))]) coords=[(float(longitude), float(latitude))])