Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# vim swap
*.swp

# macOS
.DS_Store
Binary file removed code/__init__.pyc
Binary file not shown.
Binary file removed code/buildtree.pyc
Binary file not shown.
Binary file removed code/data.pyc
Binary file not shown.
Binary file removed code/datastructure.pyc
Binary file not shown.
Binary file removed code/docreader.pyc
Binary file not shown.
Binary file removed code/evalparser.pyc
Binary file not shown.
Binary file removed code/evaluation.pyc
Binary file not shown.
Binary file removed code/featselection.pyc
Binary file not shown.
Binary file removed code/feature.pyc
Binary file not shown.
Binary file removed code/model.pyc
Binary file not shown.
Binary file removed code/parser.pyc
Binary file not shown.
Binary file removed code/readdoc.pyc
Binary file not shown.
Binary file removed code/tree.pyc
Binary file not shown.
Binary file removed code/util.pyc
Binary file not shown.
12 changes: 5 additions & 7 deletions corenlp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
#
# Runs Stanford CoreNLP.
# Simple uses for xml and plain text output to files are:
# ./corenlp.sh -file filename
# ./corenlp.sh -file filename -outputFormat text
# ./corenlp.sh 8g /path/to/target_dir

scriptdir=`dirname $0`
scriptdir="stanford-corenlp"

# echo java -mx3g -cp \"$scriptdir/*\" edu.stanford.nlp.pipeline.StanfordCoreNLP $*

# $1 - path

PATH=$1
JAVA_XMX=$1
PATH=$2
for FNAME in $PATH/*
do
/usr/bin/java -mx2g -cp "$scriptdir/*" edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -ssplit.eolonly -tokenize.whitespace true -file $FNAME
# /usr/bin/java -mx2g -cp "$scriptdir/*" edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -file $FNAME
/bin/mv $(/usr/bin/basename $FNAME.xml) $PATH/
/usr/bin/java -Xmx$JAVA_XMX -cp "$scriptdir/*" edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma,ner,parse -file $FNAME -outputFormat xml -outputDirectory $PATH
done
Binary file removed discoseg/__init__.pyc
Binary file not shown.
Binary file removed discoseg/buildedu.pyc
Binary file not shown.
Binary file removed discoseg/model/__init__.pyc
Binary file not shown.
Binary file removed discoseg/model/classifier.pyc
Binary file not shown.
Binary file removed discoseg/model/datastruct.pyc
Binary file not shown.
Binary file removed discoseg/model/docreader.pyc
Binary file not shown.
Binary file removed discoseg/model/feature.pyc
Binary file not shown.
Binary file removed discoseg/model/sample.pyc
Binary file not shown.
Binary file removed discoseg/model/util.pyc
Binary file not shown.
Binary file removed preprocess/__init__.pyc
Binary file not shown.
2 changes: 2 additions & 0 deletions preprocess/xmlreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def combineparse2sent(sent, parse):
partialparse = parselist[tidx].replace(' ','')
partialparse = partialparse.encode("ascii", "ignore")
word = tokenlist[tidx].replace(' ','')
if word == '(' or word == ')':
word = sent.tokenlist[tidx].pos
# print word, partialparse
if (word + ')') in partialparse:
tidx += 1
Expand Down
Binary file removed preprocess/xmlreader.pyc
Binary file not shown.
10 changes: 10 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
click==7.1.2
joblib==0.14.1
nltk==3.4.1
numpy==1.16.6
regex==2021.7.6
scikit-learn==0.20.4
scipy==1.2.3
singledispatch==3.6.2
six==1.16.0
tqdm==4.61.2