Fix POS-tuple cleanup

This commit is contained in:
Arne Schlüter 2015-01-17 17:36:14 +01:00
commit 492ea184bc

View file

@ -50,26 +50,27 @@ def improve_potential_places(pos_tuples):
for tuple_list in pos_tuples: for tuple_list in pos_tuples:
# first, exluce empty lists # first, exluce empty lists
if tuple_list: if tuple_list:
cleaner = [] cleaner_list = []
index = -1 index = -1
for tuple in tuple_list: for tuple in tuple_list:
index += 1 index += 1
# exclude articles ("the", "a"), they only introduce noise, but # exclude articles ("the", "a"), they only introduce noise, but
# keep the wh # keep the list as a whole
if tuple[1] is "ART": if tuple[1] == "ART":
continue continue
# if we have numbers in the middle of our phrase, it's probably # if we have numbers in the middle of our phrase, probably the
# also not useful (as opposed to Krügerstr. 22) # whole list is not useful (as opposed to e.g. Krügerstr. 22)
if tuple[1] is "CARD" and index < len(tuple_list): if tuple[1] == "CARD" and index < len(tuple_list):
cleaner_tuple = [] cleaner_list = []
break break
cleaner.append(tuple) cleaner_list.append(tuple)
better_tuples.append(cleaner) if cleaner_list:
better_tuples.append(cleaner_list)
return better_tuples return better_tuples