PyThaiNLP · wannaphong · Oct 14, 2018 · Oct 14, 2018
diff --git a/.editorconfig b/.editorconfig
diff --git a/docs/วิธีติดตั้ง PyThaiNLP เวชั่นล่าสุดจาก GitHub.md b/docs/วิธีติดตั้ง PyThaiNLP เวชั่นล่าสุดจาก GitHub.md
diff --git a/examples/collation.py b/examples/collation.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
+
+print(collation(["ไก่", "ไข่", "ก", "ฮา"]))  # ['ก', 'ไก่', 'ไข่', 'ฮา']
diff --git a/examples/etcc.py b/examples/etcc.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.tokenize import etcc
-print(etcc.etcc('คืนความสุข')) # /คืน/ความสุข
+
+print(etcc.etcc("คืนความสุข"))  # /คืน/ความสุข
diff --git a/examples/normalize.py b/examples/normalize.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.util import normalize
-print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
+
+print(normalize("เเปลก") == "แปลก")  # เ เ ป ล ก กับ แปลก
diff --git a/examples/romanization.py b/examples/romanization.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.romanization import romanization
-print(romanization("แมว"))
+
+print(romanization("แมว"))
diff --git a/examples/segment.py b/examples/segment.py
diff --git a/examples/soundex.py b/examples/soundex.py
@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
-from pythainlp.soundex import LK82,Udom83
-print(LK82('รถ')==LK82('รด'))
-print(Udom83('วรร')==Udom83('วัน'))
+
+from pythainlp.soundex import LK82, Udom83
+
+print(LK82("รถ") == LK82("รด"))
+
+print(Udom83("วรร") == Udom83("วัน"))
diff --git a/examples/spell.py b/examples/spell.py
@@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
+
+from pythainlp.spell import spell
+
+a = spell("สี่เหลียม")
+print(a)  # ['สี่เหลี่ยม']
+
+# a = spell("สี่เหลียม", engine="hunspell")  # available in some Linux systems
diff --git a/examples/tcc.py b/examples/tcc.py
@@ -1,3 +1,10 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.tokenize import tcc
-print(tcc.tcc('ประเทศไทย')) # ป/ระ/เท/ศ/ไท/ย
+
+print(tcc.tcc("ประเทศไทย"))  # ป/ระ/เท/ศ/ไท/ย
+
+print(tcc.tcc_pos("ประเทศไทย"))  # {1, 3, 5, 6, 8, 9}
+
+for ch in tcc.tcc_gen("ประเทศไทย"):  # ป-ระ-เท-ศ-ไท-ย-
+    print(ch, end='-')
diff --git a/examples/tokenize.py b/examples/tokenize.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from pythainlp.tokenize import sent_tokenize, word_tokenize
+
+text = "ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย "
+print(text)
+
+print(sent_tokenize(text))
+# ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย', '']
+
+print(word_tokenize(text))
+# ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']
+
+print(word_tokenize(text, whitespaces=False))
+# ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']
+
+text2 = "กฎหมายแรงงาน"
+print(text2)
+
+print(word_tokenize(text2))
+# ['กฎหมายแรงงาน']
+
+print(word_tokenize(text2, engine="longest-matching"))
+# ['กฎหมาย', 'แรงงาน']
diff --git a/examples/whitespaceTokenizer.py b/examples/whitespaceTokenizer.py
diff --git a/pythainlp/corpus/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/corpus/.ipynb_checkpoints/__init__-checkpoint.py
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/sentiment/.ipynb_checkpoints/__init__-checkpoint.py
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/build_pythainlp-checkpoint.tool b/pythainlp/sentiment/.ipynb_checkpoints/build_pythainlp-checkpoint.tool
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/ulmfit_sent-checkpoint.py b/pythainlp/sentiment/.ipynb_checkpoints/ulmfit_sent-checkpoint.py