Look at title and text separately

This commit is contained in:
Arne Schlüter 2015-01-17 16:27:53 +01:00
commit 73dee66d16

View file

@ -13,16 +13,14 @@ def get_potential_places(article_place, article_body):
Returns a list of potential places as tuples with their part-of-speech tags Returns a list of potential places as tuples with their part-of-speech tags
for later filtering for later filtering
""" """
# remove punctuation place_pos = tagger.tag(punctuation_regex.sub(" ", article_place).split())
full_text = punctuation_regex.sub(" ", article_place + " " + article_body) text_pos = tagger.tag(punctuation_regex.sub(" ", article_body).split())
pos = tagger.tag(full_text.split()) # extract the places out of the full text
places = [place_pos]
# extract the places
places = []
is_matching = False is_matching = False
current_match = [] current_match = []
for tuple in pos: for tuple in text_pos:
if is_matching: if is_matching:
# when we're matching, the phrases we're looking for look like # when we're matching, the phrases we're looking for look like
# "Im S-Bahnhof Wedding"... the tags below mean # "Im S-Bahnhof Wedding"... the tags below mean