diff --git a/get_incidents.py b/get_incidents.py index b3b4cb5..d208bcc 100644 --- a/get_incidents.py +++ b/get_incidents.py @@ -29,7 +29,6 @@ for article in articles: h = hashlib.sha256() h.update(str(article['date']).encode(encoding)) h.update(article['place'].encode(encoding)) - h.update((article['additional_place'] or '').encode(encoding)) h.update(article['description'].encode(encoding)) digest = h.digest() @@ -41,7 +40,6 @@ for article in articles: date = article['date'], month_only = article['month_only'], place = article['place'], - additional_place = article['additional_place'], description = article['description'], hash = digest ) diff --git a/models.py b/models.py index 07de832..78aa901 100644 --- a/models.py +++ b/models.py @@ -13,11 +13,10 @@ class Article(BaseModel): date = DateField(index=True) month_only = BooleanField(default=False) place = CharField() - additional_place = CharField(null=True) description = TextField() hash = BlobField(index=True) # Set up the tables def create_tables(): - database.connect() - database.create_tables([Article]) + db.connect() + db.create_tables([Article]) diff --git a/scraper/scraper.py b/scraper/scraper.py index 24485b7..87d9cd4 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -42,22 +42,14 @@ class Scraper(): print('Failed for headline ' + headline) raise - places = headline[headline.find(' ') + 1:] - - if places.find(' ') == -1: - district = places - additional = None - else: - district = places[:places.find(' ')] - additional = places[places.find(' ') + 1:].strip() + place = headline[headline.find(' ') + 1:] text = table.select('tr')[2].select('td')[1].get_text() article = { 'date': date(int(year), int(month), int(day) if day else 1), 'month_only': day is None, - 'place': district.strip(), - 'additional_place': additional, + 'place': place.strip(), 'description': text.strip() } articles.append(article)