Skip to content

Commit e671bec

Browse files
committed
Merge pull request #1 from dcalacci/coref
Fixes coreference results, updates jar directory
2 parents 73e6b1d + 8604812 commit e671bec

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

corenlp/corenlp.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@
3636
VERBOSE = False
3737
STATE_START, STATE_TEXT, STATE_WORDS, STATE_TREE, STATE_DEPENDENCY, STATE_COREFERENCE = 0, 1, 2, 3, 4, 5
3838
WORD_PATTERN = re.compile('\[([^\]]+)\]')
39-
CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"")
39+
CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)[\]\)]\) -> \((\d*),(\d)*,\[(\d*),(\d*)[\]\)]\), that is: \"(.*)\" -> \"(.*)\"")
4040

41-
DIRECTORY = "stanford-corenlp-full-2013-06-20"
41+
DIRECTORY = "stanford-corenlp-full-2014-01-04"
4242

4343

4444
class bc:
@@ -84,8 +84,8 @@ def init_corenlp_command(corenlp_path, memory, properties):
8484
"""
8585

8686
# TODO: Can edit jar constants
87-
jars = ["stanford-corenlp-3.2.0.jar",
88-
"stanford-corenlp-3.2.0-models.jar",
87+
jars = ["stanford-corenlp-3.3.1.jar",
88+
"stanford-corenlp-3.3.1-models.jar",
8989
"xom.jar",
9090
"joda-time.jar",
9191
"jollyday.jar"]
@@ -152,7 +152,8 @@ def parse_parser_results(text):
152152
"""
153153
results = {"sentences": []}
154154
state = STATE_START
155-
for line in text.split("\n"):
155+
lines = text.split("\n")
156+
for index, line in enumerate(lines):
156157
line = line.strip()
157158

158159
if line.startswith("Sentence #"):
@@ -188,7 +189,7 @@ def parse_parser_results(text):
188189
sentence['dependencies'].append(tuple([rel, left, right]))
189190

190191
elif state == STATE_COREFERENCE:
191-
if "Coreference set" in line:
192+
if 'Coreference set' in line:
192193
if 'coref' not in results:
193194
results['coref'] = []
194195
coref_set = []
@@ -198,7 +199,6 @@ def parse_parser_results(text):
198199
src_i, src_pos, src_l, src_r = int(src_i) - 1, int(src_pos) - 1, int(src_l) - 1, int(src_r) - 1
199200
sink_i, sink_pos, sink_l, sink_r = int(sink_i) - 1, int(sink_pos) - 1, int(sink_l) - 1, int(sink_r) - 1
200201
coref_set.append(((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r)))
201-
202202
return results
203203

204204

0 commit comments

Comments
 (0)