mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Clean up code, clarify and remove an unnecessary try-except-block
This commit is contained in:
parent
98d1e21a90
commit
7c4cf8f9f0
2 changed files with 6 additions and 10 deletions
|
|
@ -34,8 +34,7 @@ for article in articles:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
Article.get(Article.hash == digest)
|
Article.get(Article.hash == digest)
|
||||||
except:
|
except Article.DoesNotExist:
|
||||||
# article not found
|
|
||||||
Article.create(
|
Article.create(
|
||||||
date = article['date'],
|
date = article['date'],
|
||||||
month_only = article['month_only'],
|
month_only = article['month_only'],
|
||||||
|
|
|
||||||
|
|
@ -31,19 +31,16 @@ class Scraper():
|
||||||
articles = []
|
articles = []
|
||||||
|
|
||||||
for table in article_tables:
|
for table in article_tables:
|
||||||
# headlines are always YYYY-MM-DD? Berlin-DISTRICT (+ sometimes additional info)
|
# headlines are always YYYY-MM-DD? Berlin-DISTRICT
|
||||||
|
# sometimes they use Berlin followed by a space, usually by a dash;
|
||||||
|
# additionally maybe there is some information such as a
|
||||||
|
# train or bus station appended but often there isn't.
|
||||||
headline = table.select('tr:first-child')[0].get_text()
|
headline = table.select('tr:first-child')[0].get_text()
|
||||||
|
|
||||||
date_match = self.date_matcher.match(headline.strip())
|
date_match = self.date_matcher.match(headline.strip())
|
||||||
|
|
||||||
try:
|
|
||||||
year, month, day = date_match.group(1,2,4)
|
year, month, day = date_match.group(1,2,4)
|
||||||
except:
|
|
||||||
print('Failed for headline ' + headline)
|
|
||||||
raise
|
|
||||||
|
|
||||||
place = headline[headline.find(' ') + 1:]
|
place = headline[headline.find(' ') + 1:]
|
||||||
|
|
||||||
text = table.select('tr')[2].select('td')[1].get_text()
|
text = table.select('tr')[2].select('td')[1].get_text()
|
||||||
|
|
||||||
article = {
|
article = {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue