mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Look at title and text separately
This commit is contained in:
parent
b85c84139b
commit
73dee66d16
1 changed files with 5 additions and 7 deletions
12
analyze.py
12
analyze.py
|
|
@ -13,16 +13,14 @@ def get_potential_places(article_place, article_body):
|
||||||
Returns a list of potential places as tuples with their part-of-speech tags
|
Returns a list of potential places as tuples with their part-of-speech tags
|
||||||
for later filtering
|
for later filtering
|
||||||
"""
|
"""
|
||||||
# remove punctuation
|
place_pos = tagger.tag(punctuation_regex.sub(" ", article_place).split())
|
||||||
full_text = punctuation_regex.sub(" ", article_place + " " + article_body)
|
text_pos = tagger.tag(punctuation_regex.sub(" ", article_body).split())
|
||||||
|
|
||||||
pos = tagger.tag(full_text.split())
|
# extract the places out of the full text
|
||||||
|
places = [place_pos]
|
||||||
# extract the places
|
|
||||||
places = []
|
|
||||||
is_matching = False
|
is_matching = False
|
||||||
current_match = []
|
current_match = []
|
||||||
for tuple in pos:
|
for tuple in text_pos:
|
||||||
if is_matching:
|
if is_matching:
|
||||||
# when we're matching, the phrases we're looking for look like
|
# when we're matching, the phrases we're looking for look like
|
||||||
# "Im S-Bahnhof Wedding"... the tags below mean
|
# "Im S-Bahnhof Wedding"... the tags below mean
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue