mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Clean up code, clarify and remove an unnecessary try-except-block
This commit is contained in:
parent
98d1e21a90
commit
7c4cf8f9f0
2 changed files with 6 additions and 10 deletions
|
|
@ -34,8 +34,7 @@ for article in articles:
|
|||
|
||||
try:
|
||||
Article.get(Article.hash == digest)
|
||||
except:
|
||||
# article not found
|
||||
except Article.DoesNotExist:
|
||||
Article.create(
|
||||
date = article['date'],
|
||||
month_only = article['month_only'],
|
||||
|
|
|
|||
|
|
@ -31,19 +31,16 @@ class Scraper():
|
|||
articles = []
|
||||
|
||||
for table in article_tables:
|
||||
# headlines are always YYYY-MM-DD? Berlin-DISTRICT (+ sometimes additional info)
|
||||
# headlines are always YYYY-MM-DD? Berlin-DISTRICT
|
||||
# sometimes they use Berlin followed by a space, usually by a dash;
|
||||
# additionally maybe there is some information such as a
|
||||
# train or bus station appended but often there isn't.
|
||||
headline = table.select('tr:first-child')[0].get_text()
|
||||
|
||||
date_match = self.date_matcher.match(headline.strip())
|
||||
|
||||
try:
|
||||
year, month, day = date_match.group(1,2,4)
|
||||
except:
|
||||
print('Failed for headline ' + headline)
|
||||
raise
|
||||
|
||||
year, month, day = date_match.group(1,2,4)
|
||||
place = headline[headline.find(' ') + 1:]
|
||||
|
||||
text = table.select('tr')[2].select('td')[1].get_text()
|
||||
|
||||
article = {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue