# -*- coding: utf-8 -*- """ See readme.md """ from lxml.html import document_fromstring import simplekml from urllib import FancyURLopener import re import sys import time coords_in_content = re.compile('\/@(\d+\.\d+),(\d+\.\d+),') mobile_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36' filename = r'GoogleBookmarks.html' if(len(sys.argv) > 1): filename = sys.argv[1] print 'opening ' + filename with open(filename) as bookmarks_file: data = bookmarks_file.read() doc = document_fromstring(data) class Browser(FancyURLopener): version = mobile_agent for label in doc.body.iterfind('dl/dt/h3'): labelName = label.text_content() kml = simplekml.Kml() kml.document.name = labelName for element, attribute, url, pos in label.getparent().getnext().iterlinks(): if 'maps.google' in url: print description = element.text or '' print 'GET {0} {1}'.format(url, description.encode('UTF8')) browser = Browser() # Load map and find coordinates in source of page sock = False while not sock: try: sock = browser.open(url.replace(' ','+')) except Exception, e: print 'Connection problem:' + repr(e) print 'Retrying randomly between 15 and 60 seconds.' time.sleep(random.randint(15, 60)) content = sock.read() sock.close() try: coords = coords_in_content.search(content) latitude = coords.groups()[0] longitude = coords.groups()[1] except (AttributeError, IndexError): print '[Coordinates not found] ' + str(coords) + ' Try to update "mobile_agent"' continue print latitude, longitude try: kml.newpoint(name=description, coords=[(float(longitude), float(latitude))]) except ValueError: print '[Invalid coordinates]' output = './maps/' + labelName + '.kml' print 'saving results to ' + output kml.save(output)