mirror of
https://github.com/heyarne/bookmarks-to-kml.git
synced 2026-05-06 18:43:40 +02:00
Escape urls to fix unicode bug
This commit is contained in:
parent
40082f0975
commit
d167158a6e
1 changed files with 11 additions and 4 deletions
|
|
@ -8,11 +8,13 @@ from lxml.html import document_fromstring
|
|||
import simplekml
|
||||
|
||||
from urllib.request import FancyURLopener
|
||||
from urllib.parse import quote_plus, urlparse, parse_qsl
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
|
||||
coords_in_content = re.compile('\/@(-?\d+\.\d+),(-?\d+\.\d+),')
|
||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
|
||||
|
|
@ -27,6 +29,7 @@ with open(filename) as bookmarks_file:
|
|||
|
||||
doc = document_fromstring(data)
|
||||
|
||||
|
||||
class Browser(FancyURLopener):
|
||||
version = user_agent
|
||||
|
||||
|
|
@ -36,10 +39,13 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
|||
kml = simplekml.Kml()
|
||||
kml.document.name = labelName
|
||||
|
||||
for element, attribute, url, pos in label.getparent().getnext().iterlinks():
|
||||
for element, _, url, _ in label.getparent().getnext().iterlinks():
|
||||
if 'maps.google' in url:
|
||||
print
|
||||
description = element.text or ''
|
||||
safe_query = ['{0}={1}'.format(k, quote_plus(v))
|
||||
for (k, v) in parse_qsl(urlparse(url).query)]
|
||||
url = '{0}/?{1}'.format(url.split('/?')[0], '&'.join(safe_query))
|
||||
|
||||
print('GET {0} {1}'.format(url, description.encode('UTF8')))
|
||||
browser = Browser()
|
||||
|
||||
|
|
@ -47,7 +53,7 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
|||
sock = False
|
||||
while not sock:
|
||||
try:
|
||||
sock = browser.open(url.replace(' ','+'))
|
||||
sock = browser.open(url)
|
||||
except Exception as e:
|
||||
print('Connection problem:' + repr(e))
|
||||
print('Retrying randomly between 15 and 60 seconds.')
|
||||
|
|
@ -62,7 +68,8 @@ for label in doc.body.iterfind('dl/dt/h3'):
|
|||
longitude = coords[1]
|
||||
|
||||
except (AttributeError, IndexError):
|
||||
print('[Coordinates not found: ' + str(coords) + '. Try to update "user_agent"]')
|
||||
print('[Coordinates not found: ' + str(coords) +
|
||||
'. Try to update "user_agent"]')
|
||||
continue
|
||||
|
||||
print(latitude, longitude)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue