mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Write insertion logic for articles
This commit is contained in:
parent
e018aead0e
commit
55a599a47b
1 changed files with 39 additions and 6 deletions
|
|
@ -3,10 +3,13 @@
|
||||||
# them if needed.
|
# them if needed.
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import hashlib
|
||||||
from scraper.scraper import Scraper
|
from scraper.scraper import Scraper
|
||||||
|
|
||||||
# scraper = Scraper()
|
encoding = 'utf-8'
|
||||||
# articles = scraper.scrape()
|
|
||||||
|
scraper = Scraper()
|
||||||
|
articles = scraper.scrape()
|
||||||
|
|
||||||
conn = sqlite3.connect('violence.db')
|
conn = sqlite3.connect('violence.db')
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
|
@ -18,7 +21,8 @@ c.execute('''
|
||||||
date TEXT,
|
date TEXT,
|
||||||
place TEXT,
|
place TEXT,
|
||||||
additional_place TEXT,
|
additional_place TEXT,
|
||||||
description TEXT
|
description TEXT,
|
||||||
|
hash
|
||||||
);
|
);
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
|
@ -27,6 +31,35 @@ c.execute('''
|
||||||
ON incidents (date);
|
ON incidents (date);
|
||||||
''')
|
''')
|
||||||
|
|
||||||
# insert articles
|
c.execute('''
|
||||||
# for article in articles:
|
CREATE INDEX IF NOT EXISTS incidents_hash
|
||||||
# pass
|
ON incidents (hash);
|
||||||
|
''')
|
||||||
|
|
||||||
|
# insert articles if necessary
|
||||||
|
select_query = 'SELECT * FROM incidents WHERE hash=?'
|
||||||
|
insert_query = '''
|
||||||
|
INSERT INTO incidents (
|
||||||
|
date, place, additional_place, description, hash
|
||||||
|
) VALUES (?)
|
||||||
|
'''
|
||||||
|
for article in articles:
|
||||||
|
# build a hash so we can more easily find out if we have an article already
|
||||||
|
h = h.sha256()
|
||||||
|
h.update(article.date.encode(encoding))
|
||||||
|
h.update(article.place.encode(encoding))
|
||||||
|
h.update(article.additional_place.encode(encoding))
|
||||||
|
h.update(article.description.encode(encoding))
|
||||||
|
digest = h.digest()
|
||||||
|
|
||||||
|
c.execute(select_query)
|
||||||
|
|
||||||
|
if (not c.fetchone()):
|
||||||
|
article_tuple = (
|
||||||
|
article.date,
|
||||||
|
article.place,
|
||||||
|
article.additional_place,
|
||||||
|
article.description,
|
||||||
|
digest
|
||||||
|
)
|
||||||
|
c.execute(insert_query, article_tuple)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue