From 7c4cf8f9f080358b9d25ea2e47134d7419470a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20Schl=C3=BCter?= Date: Thu, 11 Dec 2014 00:24:41 +0100 Subject: [PATCH] Clean up code, clarify and remove an unnecessary try-except-block --- get_incidents.py | 3 +-- scraper/scraper.py | 13 +++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/get_incidents.py b/get_incidents.py index d208bcc..8960736 100644 --- a/get_incidents.py +++ b/get_incidents.py @@ -34,8 +34,7 @@ for article in articles: try: Article.get(Article.hash == digest) - except: - # article not found + except Article.DoesNotExist: Article.create( date = article['date'], month_only = article['month_only'], diff --git a/scraper/scraper.py b/scraper/scraper.py index 87d9cd4..f22667a 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -31,19 +31,16 @@ class Scraper(): articles = [] for table in article_tables: - # headlines are always YYYY-MM-DD? Berlin-DISTRICT (+ sometimes additional info) + # headlines are always YYYY-MM-DD? Berlin-DISTRICT + # sometimes they use Berlin followed by a space, usually by a dash; + # additionally maybe there is some information such as a + # train or bus station appended but often there isn't. headline = table.select('tr:first-child')[0].get_text() date_match = self.date_matcher.match(headline.strip()) - try: - year, month, day = date_match.group(1,2,4) - except: - print('Failed for headline ' + headline) - raise - + year, month, day = date_match.group(1,2,4) place = headline[headline.find(' ') + 1:] - text = table.select('tr')[2].select('td')[1].get_text() article = {