diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d435c64 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +*.html +maps/* \ No newline at end of file diff --git a/bookmarkstokml.py b/bookmarkstokml.py index f45db3d..b4b656d 100644 --- a/bookmarkstokml.py +++ b/bookmarkstokml.py @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- """ -Go to Google Bookmarks: https://www.google.com/bookmarks/ - -On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en - -After downloading the html file, run this script on it to generate a KML. +See readme.md """ @@ -13,81 +9,60 @@ import simplekml from urllib import FancyURLopener -import os -import random import re import sys import time -# filename = r'GoogleBookmarks.html' -filename = sys.argv[1] +coords_in_content = re.compile('\/@(\d+\.\d+),(\d+\.\d+),') +mobile_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36' +filename = r'GoogleBookmarks.html' +if(len(sys.argv) > 1): + filename = sys.argv[1] + +print 'opening ' + filename with open(filename) as bookmarks_file: data = bookmarks_file.read() -# kml = simplekml.Kml() - -# Hacky and doesn't work for all of the stars: -lat_re = re.compile('markers:[^\]]*latlng[^}]*lat:([^,]*)') -lon_re = re.compile('markers:[^\]]*latlng[^}]*lng:([^}]*)') -coords_in_url = re.compile('\?q=(-?\d{,3}\.\d*),\s*(-?\d{,3}\.\d*)') - doc = document_fromstring(data) class Browser(FancyURLopener): - user_agents = [ - 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', - 'Opera/9.25 (Windows NT 5.1; U; en)', - 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', - 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', - 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', - 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9' - ] + version = mobile_agent - version = random.choice(user_agents) - -for label in doc.body.iterfind('dl/dl/h3'): +for label in doc.body.iterfind('dl/dt/h3'): labelName = label.text_content() - #print labelName kml = simplekml.Kml() kml.document.name = labelName - for element, attribute, url, pos in label.getnext().iterlinks(): + for element, attribute, url, pos in label.getparent().getnext().iterlinks(): if 'maps.google' in url: + print description = element.text or '' - print description.encode('UTF8') - print "URL: {0}".format(url) - + print 'GET {0} {1}'.format(url, description.encode('UTF8')) browser = Browser() - if coords_in_url.search(url): - # Coordinates are in URL itself - latitude = coords_in_url.search(url).groups()[0] - longitude = coords_in_url.search(url).groups()[1] - else: - # Load map and find coordinates in source of page - sock = False - - while not sock: - try: - sock = browser.open(url.replace(' ','+')) - except Exception, e: - print 'Connection problem:' - print repr(e) - print 'Retrying randomly between 15 and 60 seconds.' - time.sleep(random.randint(15, 60)) - - content = sock.read() - sock.close() - time.sleep(random.randint(15, 60)) # Don't annoy server + # Load map and find coordinates in source of page + sock = False + while not sock: try: - latitude = lat_re.findall(content)[0] - longitude = lon_re.findall(content)[0] - except IndexError: - print '[Coordinates not found]' - print - continue + sock = browser.open(url.replace(' ','+')) + except Exception, e: + print 'Connection problem:' + repr(e) + print 'Retrying randomly between 15 and 60 seconds.' + time.sleep(random.randint(15, 60)) + + content = sock.read() + sock.close() + + try: + coords = coords_in_content.search(content) + latitude = coords.groups()[0] + longitude = coords.groups()[1] + + except (AttributeError, IndexError): + print '[Coordinates not found] ' + str(coords) + ' Try to update "mobile_agent"' + continue print latitude, longitude try: @@ -95,6 +70,7 @@ for label in doc.body.iterfind('dl/dl/h3'): coords=[(float(longitude), float(latitude))]) except ValueError: print '[Invalid coordinates]' - print - kml.save("./maps/" + labelName + ".kml") + output = './maps/' + labelName + '.kml' + print 'saving results to ' + output + kml.save(output) \ No newline at end of file diff --git a/maps/.gitignore b/maps/.gitignore deleted file mode 100644 index e69de29..0000000 diff --git a/readme.md b/readme.md index d34cada..71b3dc2 100644 --- a/readme.md +++ b/readme.md @@ -1,9 +1,12 @@ -To export your Google Maps starred locations: +V2016.04.17 Go to Google Bookmarks: https://www.google.com/bookmarks/ On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en +Install script dependencies: +pip install simplekml + After downloading the html file, run this script on it to generate a KML file per bookmark label. -It's hacky and doesn't work on all of them, but it kinda works. +It's hacky and doesn't work on all of them, but it kinda works. \ No newline at end of file