update and fix

This commit is contained in:
hexmind 2016-04-17 21:48:38 +02:00
commit 2cae694035
4 changed files with 45 additions and 63 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.idea
*.html
maps/*

View file

@ -1,10 +1,6 @@
# -*- coding: utf-8 -*-
"""
Go to Google Bookmarks: https://www.google.com/bookmarks/
On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en
After downloading the html file, run this script on it to generate a KML.
See readme.md
"""
@ -13,81 +9,60 @@ import simplekml
from urllib import FancyURLopener
import os
import random
import re
import sys
import time
# filename = r'GoogleBookmarks.html'
filename = sys.argv[1]
coords_in_content = re.compile('\/@(\d+\.\d+),(\d+\.\d+),')
mobile_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
filename = r'GoogleBookmarks.html'
if(len(sys.argv) > 1):
filename = sys.argv[1]
print 'opening ' + filename
with open(filename) as bookmarks_file:
data = bookmarks_file.read()
# kml = simplekml.Kml()
# Hacky and doesn't work for all of the stars:
lat_re = re.compile('markers:[^\]]*latlng[^}]*lat:([^,]*)')
lon_re = re.compile('markers:[^\]]*latlng[^}]*lng:([^}]*)')
coords_in_url = re.compile('\?q=(-?\d{,3}\.\d*),\s*(-?\d{,3}\.\d*)')
doc = document_fromstring(data)
class Browser(FancyURLopener):
user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
version = mobile_agent
version = random.choice(user_agents)
for label in doc.body.iterfind('dl/dl/h3'):
for label in doc.body.iterfind('dl/dt/h3'):
labelName = label.text_content()
#print labelName
kml = simplekml.Kml()
kml.document.name = labelName
for element, attribute, url, pos in label.getnext().iterlinks():
for element, attribute, url, pos in label.getparent().getnext().iterlinks():
if 'maps.google' in url:
print
description = element.text or ''
print description.encode('UTF8')
print "URL: {0}".format(url)
print 'GET {0} {1}'.format(url, description.encode('UTF8'))
browser = Browser()
if coords_in_url.search(url):
# Coordinates are in URL itself
latitude = coords_in_url.search(url).groups()[0]
longitude = coords_in_url.search(url).groups()[1]
else:
# Load map and find coordinates in source of page
sock = False
while not sock:
try:
sock = browser.open(url.replace(' ','+'))
except Exception, e:
print 'Connection problem:'
print repr(e)
print 'Retrying randomly between 15 and 60 seconds.'
time.sleep(random.randint(15, 60))
content = sock.read()
sock.close()
time.sleep(random.randint(15, 60)) # Don't annoy server
# Load map and find coordinates in source of page
sock = False
while not sock:
try:
latitude = lat_re.findall(content)[0]
longitude = lon_re.findall(content)[0]
except IndexError:
print '[Coordinates not found]'
print
continue
sock = browser.open(url.replace(' ','+'))
except Exception, e:
print 'Connection problem:' + repr(e)
print 'Retrying randomly between 15 and 60 seconds.'
time.sleep(random.randint(15, 60))
content = sock.read()
sock.close()
try:
coords = coords_in_content.search(content)
latitude = coords.groups()[0]
longitude = coords.groups()[1]
except (AttributeError, IndexError):
print '[Coordinates not found] ' + str(coords) + ' Try to update "mobile_agent"'
continue
print latitude, longitude
try:
@ -95,6 +70,7 @@ for label in doc.body.iterfind('dl/dl/h3'):
coords=[(float(longitude), float(latitude))])
except ValueError:
print '[Invalid coordinates]'
print
kml.save("./maps/" + labelName + ".kml")
output = './maps/' + labelName + '.kml'
print 'saving results to ' + output
kml.save(output)

0
maps/.gitignore vendored
View file

View file

@ -1,9 +1,12 @@
To export your Google Maps starred locations:
V2016.04.17
Go to Google Bookmarks: https://www.google.com/bookmarks/
On the bottom left, click "Export bookmarks": https://www.google.com/bookmarks/bookmarks.html?hl=en
Install script dependencies:
pip install simplekml
After downloading the html file, run this script on it to generate a KML file per bookmark label.
It's hacky and doesn't work on all of them, but it kinda works.