diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py index 765db6d..a9c2bc2 100755 --- a/corenlp/corenlp.py +++ b/corenlp/corenlp.py @@ -36,9 +36,9 @@ VERBOSE = False STATE_START, STATE_TEXT, STATE_WORDS, STATE_TREE, STATE_DEPENDENCY, STATE_COREFERENCE = 0, 1, 2, 3, 4, 5 WORD_PATTERN = re.compile('\[([^\]]+)\]') -CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"") +CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)[\]\)]\) -> \((\d*),(\d)*,\[(\d*),(\d*)[\]\)]\), that is: \"(.*)\" -> \"(.*)\"") -DIRECTORY = "stanford-corenlp-full-2013-06-20" +DIRECTORY = "stanford-corenlp-full-2014-01-04" class bc: @@ -84,8 +84,8 @@ def init_corenlp_command(corenlp_path, memory, properties): """ # TODO: Can edit jar constants - jars = ["stanford-corenlp-3.2.0.jar", - "stanford-corenlp-3.2.0-models.jar", + jars = ["stanford-corenlp-3.3.1.jar", + "stanford-corenlp-3.3.1-models.jar", "xom.jar", "joda-time.jar", "jollyday.jar"] @@ -152,7 +152,8 @@ def parse_parser_results(text): """ results = {"sentences": []} state = STATE_START - for line in text.split("\n"): + lines = text.split("\n") + for index, line in enumerate(lines): line = line.strip() if line.startswith("Sentence #"): @@ -188,7 +189,7 @@ def parse_parser_results(text): sentence['dependencies'].append(tuple([rel, left, right])) elif state == STATE_COREFERENCE: - if "Coreference set" in line: + if 'Coreference set' in line: if 'coref' not in results: results['coref'] = [] coref_set = [] @@ -198,7 +199,6 @@ def parse_parser_results(text): src_i, src_pos, src_l, src_r = int(src_i) - 1, int(src_pos) - 1, int(src_l) - 1, int(src_r) - 1 sink_i, sink_pos, sink_l, sink_r = int(sink_i) - 1, int(sink_pos) - 1, int(sink_l) - 1, int(sink_r) - 1 coref_set.append(((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r))) - return results