Clean up code, clarify and remove an unnecessary try-except-block

This commit is contained in:
Arne Schlüter 2014-12-11 00:24:41 +01:00
commit 7c4cf8f9f0
2 changed files with 6 additions and 10 deletions

View file

@ -34,8 +34,7 @@ for article in articles:
try:
Article.get(Article.hash == digest)
except:
# article not found
except Article.DoesNotExist:
Article.create(
date = article['date'],
month_only = article['month_only'],

View file

@ -31,19 +31,16 @@ class Scraper():
articles = []
for table in article_tables:
# headlines are always YYYY-MM-DD? Berlin-DISTRICT (+ sometimes additional info)
# headlines are always YYYY-MM-DD? Berlin-DISTRICT
# sometimes they use Berlin followed by a space, usually by a dash;
# additionally maybe there is some information such as a
# train or bus station appended but often there isn't.
headline = table.select('tr:first-child')[0].get_text()
date_match = self.date_matcher.match(headline.strip())
try:
year, month, day = date_match.group(1,2,4)
except:
print('Failed for headline ' + headline)
raise
place = headline[headline.find(' ') + 1:]
text = table.select('tr')[2].select('td')[1].get_text()
article = {