update and fix

This commit is contained in:
hexmind 2016-04-17 21:48:38 +02:00
commit 2cae694035
4 changed files with 45 additions and 63 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.idea
*.html
maps/*

View file

@ -1,10 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Go to Google Bookmarks: https://www.google.com/bookmarks/ See readme.md
On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en
After downloading the html file, run this script on it to generate a KML.
""" """
@ -13,80 +9,59 @@ import simplekml
from urllib import FancyURLopener from urllib import FancyURLopener
import os
import random
import re import re
import sys import sys
import time import time
# filename = r'GoogleBookmarks.html' coords_in_content = re.compile('\/@(\d+\.\d+),(\d+\.\d+),')
mobile_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
filename = r'GoogleBookmarks.html'
if(len(sys.argv) > 1):
filename = sys.argv[1] filename = sys.argv[1]
print 'opening ' + filename
with open(filename) as bookmarks_file: with open(filename) as bookmarks_file:
data = bookmarks_file.read() data = bookmarks_file.read()
# kml = simplekml.Kml()
# Hacky and doesn't work for all of the stars:
lat_re = re.compile('markers:[^\]]*latlng[^}]*lat:([^,]*)')
lon_re = re.compile('markers:[^\]]*latlng[^}]*lng:([^}]*)')
coords_in_url = re.compile('\?q=(-?\d{,3}\.\d*),\s*(-?\d{,3}\.\d*)')
doc = document_fromstring(data) doc = document_fromstring(data)
class Browser(FancyURLopener): class Browser(FancyURLopener):
user_agents = [ version = mobile_agent
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
version = random.choice(user_agents) for label in doc.body.iterfind('dl/dt/h3'):
for label in doc.body.iterfind('dl/dl/h3'):
labelName = label.text_content() labelName = label.text_content()
#print labelName
kml = simplekml.Kml() kml = simplekml.Kml()
kml.document.name = labelName kml.document.name = labelName
for element, attribute, url, pos in label.getnext().iterlinks(): for element, attribute, url, pos in label.getparent().getnext().iterlinks():
if 'maps.google' in url: if 'maps.google' in url:
print
description = element.text or '' description = element.text or ''
print description.encode('UTF8') print 'GET {0} {1}'.format(url, description.encode('UTF8'))
print "URL: {0}".format(url)
browser = Browser() browser = Browser()
if coords_in_url.search(url):
# Coordinates are in URL itself
latitude = coords_in_url.search(url).groups()[0]
longitude = coords_in_url.search(url).groups()[1]
else:
# Load map and find coordinates in source of page # Load map and find coordinates in source of page
sock = False sock = False
while not sock: while not sock:
try: try:
sock = browser.open(url.replace(' ','+')) sock = browser.open(url.replace(' ','+'))
except Exception, e: except Exception, e:
print 'Connection problem:' print 'Connection problem:' + repr(e)
print repr(e)
print 'Retrying randomly between 15 and 60 seconds.' print 'Retrying randomly between 15 and 60 seconds.'
time.sleep(random.randint(15, 60)) time.sleep(random.randint(15, 60))
content = sock.read() content = sock.read()
sock.close() sock.close()
time.sleep(random.randint(15, 60)) # Don't annoy server
try: try:
latitude = lat_re.findall(content)[0] coords = coords_in_content.search(content)
longitude = lon_re.findall(content)[0] latitude = coords.groups()[0]
except IndexError: longitude = coords.groups()[1]
print '[Coordinates not found]'
print except (AttributeError, IndexError):
print '[Coordinates not found] ' + str(coords) + ' Try to update "mobile_agent"'
continue continue
print latitude, longitude print latitude, longitude
@ -95,6 +70,7 @@ for label in doc.body.iterfind('dl/dl/h3'):
coords=[(float(longitude), float(latitude))]) coords=[(float(longitude), float(latitude))])
except ValueError: except ValueError:
print '[Invalid coordinates]' print '[Invalid coordinates]'
print
kml.save("./maps/" + labelName + ".kml") output = './maps/' + labelName + '.kml'
print 'saving results to ' + output
kml.save(output)

0
maps/.gitignore vendored
View file

View file

@ -1,9 +1,12 @@
To export your Google Maps starred locations: V2016.04.17
Go to Google Bookmarks: https://www.google.com/bookmarks/ Go to Google Bookmarks: https://www.google.com/bookmarks/
On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en
Install script dependencies:
pip install simplekml
After downloading the html file, run this script on it to generate a KML file per bookmark label. After downloading the html file, run this script on it to generate a KML file per bookmark label.
It's hacky and doesn't work on all of them, but it kinda works. It's hacky and doesn't work on all of them, but it kinda works.