From 8ed7640388cac8ba6d897739f5c8fe24eb87cc48 Mon Sep 17 00:00:00 2001 From: Abhaya Agarwal Date: Mon, 10 Oct 2011 19:57:50 +0530 Subject: [PATCH] Fix the regex to capture all the words of the sentence. --- corenlp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corenlp.py b/corenlp.py index 0b8328e..b624a3e 100644 --- a/corenlp.py +++ b/corenlp.py @@ -48,9 +48,10 @@ def parse_parser_results(text): print line raise Exception("Parse error. Could not find [Text=") tmp['words'] = [] - exp = re.compile('\[([a-zA-Z0-9=. ]+)\]') + exp = re.compile('\[([^\]]+)\]') matches = exp.findall(line) for s in matches: + print s # split into attribute-value list av = re.split("=| ", s) # make [ignore,ignore,a,b,c,d] into [[a,b],[c,d]] @@ -137,7 +138,7 @@ def __init__(self): sys.exit(1) # spawn the server - self._server = pexpect.spawn("%s -Xmx3g -cp %s %s %s" % (java_path, ':'.join(jars), classname, props)) + self._server = pexpect.spawn("%s -Xmx1800m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props)) print "Starting the Stanford Core NLP parser." self.state = "plays hard to get, smiles from time to time"