mirror of
https://github.com/heyarne/bookmarks-to-kml.git
synced 2026-05-06 18:43:40 +02:00
Escape urls to fix unicode bug
This commit is contained in:
parent
40082f0975
commit
d167158a6e
1 changed files with 11 additions and 4 deletions
|
|
@ -8,11 +8,13 @@ from lxml.html import document_fromstring
|
||||||
import simplekml
|
import simplekml
|
||||||
|
|
||||||
from urllib.request import FancyURLopener
|
from urllib.request import FancyURLopener
|
||||||
|
from urllib.parse import quote_plus, urlparse, parse_qsl
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
coords_in_content = re.compile('\/@(-?\d+\.\d+),(-?\d+\.\d+),')
|
coords_in_content = re.compile('\/@(-?\d+\.\d+),(-?\d+\.\d+),')
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
|
||||||
|
|
@ -27,6 +29,7 @@ with open(filename) as bookmarks_file:
|
||||||
|
|
||||||
doc = document_fromstring(data)
|
doc = document_fromstring(data)
|
||||||
|
|
||||||
|
|
||||||
class Browser(FancyURLopener):
|
class Browser(FancyURLopener):
|
||||||
version = user_agent
|
version = user_agent
|
||||||
|
|
||||||
|
|
@ -36,10 +39,13 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
||||||
kml = simplekml.Kml()
|
kml = simplekml.Kml()
|
||||||
kml.document.name = labelName
|
kml.document.name = labelName
|
||||||
|
|
||||||
for element, attribute, url, pos in label.getparent().getnext().iterlinks():
|
for element, _, url, _ in label.getparent().getnext().iterlinks():
|
||||||
if 'maps.google' in url:
|
if 'maps.google' in url:
|
||||||
print
|
|
||||||
description = element.text or ''
|
description = element.text or ''
|
||||||
|
safe_query = ['{0}={1}'.format(k, quote_plus(v))
|
||||||
|
for (k, v) in parse_qsl(urlparse(url).query)]
|
||||||
|
url = '{0}/?{1}'.format(url.split('/?')[0], '&'.join(safe_query))
|
||||||
|
|
||||||
print('GET {0} {1}'.format(url, description.encode('UTF8')))
|
print('GET {0} {1}'.format(url, description.encode('UTF8')))
|
||||||
browser = Browser()
|
browser = Browser()
|
||||||
|
|
||||||
|
|
@ -47,7 +53,7 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
||||||
sock = False
|
sock = False
|
||||||
while not sock:
|
while not sock:
|
||||||
try:
|
try:
|
||||||
sock = browser.open(url.replace(' ','+'))
|
sock = browser.open(url)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('Connection problem:' + repr(e))
|
print('Connection problem:' + repr(e))
|
||||||
print('Retrying randomly between 15 and 60 seconds.')
|
print('Retrying randomly between 15 and 60 seconds.')
|
||||||
|
|
@ -62,7 +68,8 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
||||||
longitude = coords[1]
|
longitude = coords[1]
|
||||||
|
|
||||||
except (AttributeError, IndexError):
|
except (AttributeError, IndexError):
|
||||||
print('[Coordinates not found: ' + str(coords) + '. Try to update "user_agent"]')
|
print('[Coordinates not found: ' + str(coords) +
|
||||||
|
'. Try to update "user_agent"]')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(latitude, longitude)
|
print(latitude, longitude)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue