Write insertion logic for articles

This commit is contained in:
Arne Schlüter 2014-12-08 16:19:18 +01:00
commit 55a599a47b

View file

@ -3,10 +3,13 @@
# them if needed. # them if needed.
import sqlite3 import sqlite3
import hashlib
from scraper.scraper import Scraper from scraper.scraper import Scraper
# scraper = Scraper() encoding = 'utf-8'
# articles = scraper.scrape()
scraper = Scraper()
articles = scraper.scrape()
conn = sqlite3.connect('violence.db') conn = sqlite3.connect('violence.db')
c = conn.cursor() c = conn.cursor()
@ -18,7 +21,8 @@ c.execute('''
date TEXT, date TEXT,
place TEXT, place TEXT,
additional_place TEXT, additional_place TEXT,
description TEXT description TEXT,
hash
); );
''') ''')
@ -27,6 +31,35 @@ c.execute('''
ON incidents (date); ON incidents (date);
''') ''')
# insert articles c.execute('''
# for article in articles: CREATE INDEX IF NOT EXISTS incidents_hash
# pass ON incidents (hash);
''')
# insert articles if necessary
select_query = 'SELECT * FROM incidents WHERE hash=?'
insert_query = '''
INSERT INTO incidents (
date, place, additional_place, description, hash
) VALUES (?)
'''
for article in articles:
# build a hash so we can more easily find out if we have an article already
h = h.sha256()
h.update(article.date.encode(encoding))
h.update(article.place.encode(encoding))
h.update(article.additional_place.encode(encoding))
h.update(article.description.encode(encoding))
digest = h.digest()
c.execute(select_query)
if (not c.fetchone()):
article_tuple = (
article.date,
article.place,
article.additional_place,
article.description,
digest
)
c.execute(insert_query, article_tuple)