From e018aead0e950a457d1ae335f047a2e516013f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20Schl=C3=BCter?= Date: Mon, 8 Dec 2014 15:40:31 +0100 Subject: [PATCH] Add database setup code --- get_incidents.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 get_incidents.py diff --git a/get_incidents.py b/get_incidents.py new file mode 100644 index 0000000..6dcf0a6 --- /dev/null +++ b/get_incidents.py @@ -0,0 +1,32 @@ +# This file contains the logic that periodically fetches all pages on the +# Reachout Berlin homepage, checks if they're already in the database and inserts +# them if needed. + +import sqlite3 +from scraper.scraper import Scraper + +# scraper = Scraper() +# articles = scraper.scrape() + +conn = sqlite3.connect('violence.db') +c = conn.cursor() + +# setup database schema +c.execute(''' + CREATE TABLE IF NOT EXISTS incidents ( + incident_id INTEGER PRIMARY KEY, + date TEXT, + place TEXT, + additional_place TEXT, + description TEXT + ); +''') + +c.execute(''' + CREATE INDEX IF NOT EXISTS incidents_date + ON incidents (date); +''') + +# insert articles +# for article in articles: +# pass