mirror of
https://github.com/heyarne/berliner-winter.git
synced 2026-05-06 19:23:39 +02:00
Fix POS-tuple cleanup
This commit is contained in:
parent
adb0252130
commit
492ea184bc
1 changed files with 10 additions and 9 deletions
19
analyze.py
19
analyze.py
|
|
@ -50,26 +50,27 @@ def improve_potential_places(pos_tuples):
|
||||||
for tuple_list in pos_tuples:
|
for tuple_list in pos_tuples:
|
||||||
# first, exluce empty lists
|
# first, exluce empty lists
|
||||||
if tuple_list:
|
if tuple_list:
|
||||||
cleaner = []
|
cleaner_list = []
|
||||||
|
|
||||||
index = -1
|
index = -1
|
||||||
for tuple in tuple_list:
|
for tuple in tuple_list:
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
# exclude articles ("the", "a"), they only introduce noise, but
|
# exclude articles ("the", "a"), they only introduce noise, but
|
||||||
# keep the wh
|
# keep the list as a whole
|
||||||
if tuple[1] is "ART":
|
if tuple[1] == "ART":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if we have numbers in the middle of our phrase, it's probably
|
# if we have numbers in the middle of our phrase, probably the
|
||||||
# also not useful (as opposed to Krügerstr. 22)
|
# whole list is not useful (as opposed to e.g. Krügerstr. 22)
|
||||||
if tuple[1] is "CARD" and index < len(tuple_list):
|
if tuple[1] == "CARD" and index < len(tuple_list):
|
||||||
cleaner_tuple = []
|
cleaner_list = []
|
||||||
break
|
break
|
||||||
|
|
||||||
cleaner.append(tuple)
|
cleaner_list.append(tuple)
|
||||||
|
|
||||||
better_tuples.append(cleaner)
|
if cleaner_list:
|
||||||
|
better_tuples.append(cleaner_list)
|
||||||
|
|
||||||
return better_tuples
|
return better_tuples
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue