From 98d1e21a90e849a565d737d07dcc807fee483af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20Schl=C3=BCter?= Date: Thu, 11 Dec 2014 00:07:48 +0100 Subject: [PATCH] Remove field 'addtional_place' because it can't be reliably parsed --- get_incidents.py | 2 -- models.py | 5 ++--- scraper/scraper.py | 12 ++---------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/get_incidents.py b/get_incidents.py index b3b4cb5..d208bcc 100644 --- a/get_incidents.py +++ b/get_incidents.py @@ -29,7 +29,6 @@ for article in articles: h = hashlib.sha256() h.update(str(article['date']).encode(encoding)) h.update(article['place'].encode(encoding)) - h.update((article['additional_place'] or '').encode(encoding)) h.update(article['description'].encode(encoding)) digest = h.digest() @@ -41,7 +40,6 @@ for article in articles: date = article['date'], month_only = article['month_only'], place = article['place'], - additional_place = article['additional_place'], description = article['description'], hash = digest ) diff --git a/models.py b/models.py index 07de832..78aa901 100644 --- a/models.py +++ b/models.py @@ -13,11 +13,10 @@ class Article(BaseModel): date = DateField(index=True) month_only = BooleanField(default=False) place = CharField() - additional_place = CharField(null=True) description = TextField() hash = BlobField(index=True) # Set up the tables def create_tables(): - database.connect() - database.create_tables([Article]) + db.connect() + db.create_tables([Article]) diff --git a/scraper/scraper.py b/scraper/scraper.py index 24485b7..87d9cd4 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -42,22 +42,14 @@ class Scraper(): print('Failed for headline ' + headline) raise - places = headline[headline.find(' ') + 1:] - - if places.find(' ') == -1: - district = places - additional = None - else: - district = places[:places.find(' ')] - additional = places[places.find(' ') + 1:].strip() + place = headline[headline.find(' ') + 1:] text = table.select('tr')[2].select('td')[1].get_text() article = { 'date': date(int(year), int(month), int(day) if day else 1), 'month_only': day is None, - 'place': district.strip(), - 'additional_place': additional, + 'place': place.strip(), 'description': text.strip() } articles.append(article)