mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Look at title and text separately
This commit is contained in:
parent
b85c84139b
commit
73dee66d16
1 changed files with 5 additions and 7 deletions
12
analyze.py
12
analyze.py
|
|
@ -13,16 +13,14 @@ def get_potential_places(article_place, article_body):
|
|||
Returns a list of potential places as tuples with their part-of-speech tags
|
||||
for later filtering
|
||||
"""
|
||||
# remove punctuation
|
||||
full_text = punctuation_regex.sub(" ", article_place + " " + article_body)
|
||||
place_pos = tagger.tag(punctuation_regex.sub(" ", article_place).split())
|
||||
text_pos = tagger.tag(punctuation_regex.sub(" ", article_body).split())
|
||||
|
||||
pos = tagger.tag(full_text.split())
|
||||
|
||||
# extract the places
|
||||
places = []
|
||||
# extract the places out of the full text
|
||||
places = [place_pos]
|
||||
is_matching = False
|
||||
current_match = []
|
||||
for tuple in pos:
|
||||
for tuple in text_pos:
|
||||
if is_matching:
|
||||
# when we're matching, the phrases we're looking for look like
|
||||
# "Im S-Bahnhof Wedding"... the tags below mean
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue