mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Start writing some analyzaton code
This commit is contained in:
parent
a9a096396b
commit
06814ecae8
1 changed files with 25 additions and 0 deletions
25
analyze.py
Normal file
25
analyze.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
def get_district(article_headline):
|
||||||
|
"""
|
||||||
|
Returns a geo-coded version of a district an article is about, based on its
|
||||||
|
headline.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_categories(article_body):
|
||||||
|
"""
|
||||||
|
Gives a list of categories an article falls into, which is empty if none of
|
||||||
|
the following are matched:
|
||||||
|
- sexism
|
||||||
|
- antisemitism
|
||||||
|
- homophobia
|
||||||
|
- racism
|
||||||
|
"""
|
||||||
|
bad_words = {
|
||||||
|
'antisemit': 'antisemitism',
|
||||||
|
'homophob': 'homophobia',
|
||||||
|
'sexis': 'sexism',
|
||||||
|
'rassis': 'racism'
|
||||||
|
}
|
||||||
|
found_categories = [bad_words[key] for key in bad_words
|
||||||
|
if key in article_body.lower()]
|
||||||
|
return found_categories or ['other']
|
||||||
Loading…
Add table
Add a link
Reference in a new issue