3636VERBOSE = False
3737STATE_START , STATE_TEXT , STATE_WORDS , STATE_TREE , STATE_DEPENDENCY , STATE_COREFERENCE = 0 , 1 , 2 , 3 , 4 , 5
3838WORD_PATTERN = re .compile ('\[([^\]]+)\]' )
39- CR_PATTERN = re .compile (r"\((\d*),(\d)*,\[(\d*),(\d*)\) \) -> \((\d*),(\d)*,\[(\d*),(\d*)\) \), that is: \"(.*)\" -> \"(.*)\"" )
39+ CR_PATTERN = re .compile (r"\((\d*),(\d)*,\[(\d*),(\d*)[\]\)] \) -> \((\d*),(\d)*,\[(\d*),(\d*)[\]\)] \), that is: \"(.*)\" -> \"(.*)\"" )
4040
41- DIRECTORY = "stanford-corenlp-full-2013-06-20 "
41+ DIRECTORY = "stanford-corenlp-full-2014-01-04 "
4242
4343
4444class bc :
@@ -84,8 +84,8 @@ def init_corenlp_command(corenlp_path, memory, properties):
8484 """
8585
8686 # TODO: Can edit jar constants
87- jars = ["stanford-corenlp-3.2.0 .jar" ,
88- "stanford-corenlp-3.2.0 -models.jar" ,
87+ jars = ["stanford-corenlp-3.3.1 .jar" ,
88+ "stanford-corenlp-3.3.1 -models.jar" ,
8989 "xom.jar" ,
9090 "joda-time.jar" ,
9191 "jollyday.jar" ]
@@ -152,7 +152,8 @@ def parse_parser_results(text):
152152 """
153153 results = {"sentences" : []}
154154 state = STATE_START
155- for line in text .split ("\n " ):
155+ lines = text .split ("\n " )
156+ for index , line in enumerate (lines ):
156157 line = line .strip ()
157158
158159 if line .startswith ("Sentence #" ):
@@ -188,7 +189,7 @@ def parse_parser_results(text):
188189 sentence ['dependencies' ].append (tuple ([rel , left , right ]))
189190
190191 elif state == STATE_COREFERENCE :
191- if " Coreference set" in line :
192+ if ' Coreference set' in line :
192193 if 'coref' not in results :
193194 results ['coref' ] = []
194195 coref_set = []
@@ -198,7 +199,6 @@ def parse_parser_results(text):
198199 src_i , src_pos , src_l , src_r = int (src_i ) - 1 , int (src_pos ) - 1 , int (src_l ) - 1 , int (src_r ) - 1
199200 sink_i , sink_pos , sink_l , sink_r = int (sink_i ) - 1 , int (sink_pos ) - 1 , int (sink_l ) - 1 , int (sink_r ) - 1
200201 coref_set .append (((src_word , src_i , src_pos , src_l , src_r ), (sink_word , sink_i , sink_pos , sink_l , sink_r )))
201-
202202 return results
203203
204204
0 commit comments