Start with improving the place results

2026-05-06 19:23:39 +02:00 · 2015-01-17 15:57:34 +01:00 · 2015-01-17 15:57:34 +01:00 · b85c84139b
commit b85c84139b
parent 8e6032be3a
2 changed files with 79 additions and 21 deletions
--- a/tagger.py
+++ b/tagger.py
@ -1,33 +1,17 @@
 from nltk.tag.stanford import POSTagger
 from models import Article
+from analyze import *

 tagger = POSTagger('./stanford-postagger-full-2014-10-26/models/german-fast.tagger',
                   './stanford-postagger-full-2014-10-26/stanford-postagger-3.5.0.jar',
                    'UTF-8')

 for article in Article.select().limit(100):
-    pos = tagger.tag((article.place + " " + article.description).split())
-
-    # extract the places
-    places = []
-    is_matching = False
-    current_match = []
-    for tuple in pos:
-        if is_matching:
-            # when we're matching, the phrases we're looking for look like
-            # "Im S-Bahnhof Wedding"... the tags below mean
-            if tuple[1] in ("ART", "ADJA", "NN", "NE", "CARD"):
-                current_match.append(tuple)
-            else:
-                places.append(current_match)
-                current_match = []
-                is_matching = False
-        else:
-            # start matching when we have a preposition
-            if tuple[1] in ("APPR", "APPRART"):
-                is_matching = True
+    potential = get_potential_places(article.place, article.description)
+    places = improve_potential_places(potential)

    print(article.place)
    print(article.description)
    print()
-    print("Relevant: " + str(places))
+    print("Potential: " + str(potential))
+    print("Improved:  " + str(places))