From 492ea184bc5c2fcc5959b761bf2768877ea5e178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20Schl=C3=BCter?= Date: Sat, 17 Jan 2015 17:36:14 +0100 Subject: [PATCH] Fix POS-tuple cleanup --- analyze.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/analyze.py b/analyze.py index 88fdde6..4691a5d 100644 --- a/analyze.py +++ b/analyze.py @@ -50,26 +50,27 @@ def improve_potential_places(pos_tuples): for tuple_list in pos_tuples: # first, exluce empty lists if tuple_list: - cleaner = [] + cleaner_list = [] index = -1 for tuple in tuple_list: index += 1 # exclude articles ("the", "a"), they only introduce noise, but - # keep the wh - if tuple[1] is "ART": + # keep the list as a whole + if tuple[1] == "ART": continue - # if we have numbers in the middle of our phrase, it's probably - # also not useful (as opposed to Krügerstr. 22) - if tuple[1] is "CARD" and index < len(tuple_list): - cleaner_tuple = [] + # if we have numbers in the middle of our phrase, probably the + # whole list is not useful (as opposed to e.g. Krügerstr. 22) + if tuple[1] == "CARD" and index < len(tuple_list): + cleaner_list = [] break - cleaner.append(tuple) + cleaner_list.append(tuple) - better_tuples.append(cleaner) + if cleaner_list: + better_tuples.append(cleaner_list) return better_tuples