mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Remove field 'addtional_place' because it can't be reliably parsed
This commit is contained in:
parent
c1ac5e5ed4
commit
98d1e21a90
3 changed files with 4 additions and 15 deletions
|
|
@ -29,7 +29,6 @@ for article in articles:
|
|||
h = hashlib.sha256()
|
||||
h.update(str(article['date']).encode(encoding))
|
||||
h.update(article['place'].encode(encoding))
|
||||
h.update((article['additional_place'] or '').encode(encoding))
|
||||
h.update(article['description'].encode(encoding))
|
||||
digest = h.digest()
|
||||
|
||||
|
|
@ -41,7 +40,6 @@ for article in articles:
|
|||
date = article['date'],
|
||||
month_only = article['month_only'],
|
||||
place = article['place'],
|
||||
additional_place = article['additional_place'],
|
||||
description = article['description'],
|
||||
hash = digest
|
||||
)
|
||||
|
|
|
|||
|
|
@ -13,11 +13,10 @@ class Article(BaseModel):
|
|||
date = DateField(index=True)
|
||||
month_only = BooleanField(default=False)
|
||||
place = CharField()
|
||||
additional_place = CharField(null=True)
|
||||
description = TextField()
|
||||
hash = BlobField(index=True)
|
||||
|
||||
# Set up the tables
|
||||
def create_tables():
|
||||
database.connect()
|
||||
database.create_tables([Article])
|
||||
db.connect()
|
||||
db.create_tables([Article])
|
||||
|
|
|
|||
|
|
@ -42,22 +42,14 @@ class Scraper():
|
|||
print('Failed for headline ' + headline)
|
||||
raise
|
||||
|
||||
places = headline[headline.find(' ') + 1:]
|
||||
|
||||
if places.find(' ') == -1:
|
||||
district = places
|
||||
additional = None
|
||||
else:
|
||||
district = places[:places.find(' ')]
|
||||
additional = places[places.find(' ') + 1:].strip()
|
||||
place = headline[headline.find(' ') + 1:]
|
||||
|
||||
text = table.select('tr')[2].select('td')[1].get_text()
|
||||
|
||||
article = {
|
||||
'date': date(int(year), int(month), int(day) if day else 1),
|
||||
'month_only': day is None,
|
||||
'place': district.strip(),
|
||||
'additional_place': additional,
|
||||
'place': place.strip(),
|
||||
'description': text.strip()
|
||||
}
|
||||
articles.append(article)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue