From e46f13cf5d84ccc9c95fd6d820692e46246d28c1 Mon Sep 17 00:00:00 2001 From: SONG Ke Date: Sat, 8 Jun 2013 02:38:16 +0200 Subject: [PATCH 1/2] automatically parse the script path and create the absolute path of word.data. No more need to pass the path manually --- pinyin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pinyin.py b/pinyin.py index 938b727..2a96c19 100644 --- a/pinyin.py +++ b/pinyin.py @@ -13,11 +13,11 @@ class PinYin(object): - def __init__(self, dict_file='word.data'): + def __init__(self): self.word_dict = {} - self.dict_file = dict_file - - + dirname, filename = os.path.split(__file__) + self.dict_file = os.path.join(dirname, 'word.data') + def load_word(self): if not os.path.exists(self.dict_file): raise IOError("NotFoundFile") From fafcde94bac61d5bea7e75d6fa9e4ce049726aff Mon Sep 17 00:00:00 2001 From: SONG Ke Date: Sun, 9 Jun 2013 02:32:04 +0200 Subject: [PATCH 2/2] automatically load dictionary file. explicitly close dictionary file after loading --- README.md | 43 +++++++++++++++++++++---------------------- pinyin.py | 16 +++++++++------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index c29fedd..21aae21 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,21 @@ -pinyin.py -========= - -汉字转拼音,With Python - - -Example: - - from pinyin import PinYin - - test = PinYin() - test.load_word() - test.hanzi2pinyin(string='钓鱼岛是中国的') - - -Out: - - test.hanzi2pinyin(string='钓鱼岛是中国的') - ['diao', 'yu', 'dao', 'shi', 'zhong', 'guo', 'de'] - test.hanzi2pinyin_split(string='钓鱼岛是中国的', split="-") - diao-yu-dao-shi-zhong-guo-de - +pinyin.py +========= + +汉字转拼音,With Python + + +Example: + + from pinyin import PinYin + + test = PinYin() + test.hanzi2pinyin(string='钓鱼岛是中国的') + + +Out: + + test.hanzi2pinyin(string='钓鱼岛是中国的') + ['diao', 'yu', 'dao', 'shi', 'zhong', 'guo', 'de'] + test.hanzi2pinyin_split(string='钓鱼岛是中国的', split="-") + diao-yu-dao-shi-zhong-guo-de + diff --git a/pinyin.py b/pinyin.py index 2a96c19..c6c0b3b 100644 --- a/pinyin.py +++ b/pinyin.py @@ -16,13 +16,16 @@ class PinYin(object): def __init__(self): self.word_dict = {} dirname, filename = os.path.split(__file__) - self.dict_file = os.path.join(dirname, 'word.data') + dict_file = os.path.join(dirname, 'word.data') + self.load_word(dict_file) - def load_word(self): - if not os.path.exists(self.dict_file): + def load_word(self, dict_file): + if not os.path.exists(dict_file): raise IOError("NotFoundFile") - - with file(self.dict_file) as f_obj: + if len(self.word_dict.keys()) > 0: + print 'Dictionary already loaded' + return + with file(dict_file) as f_obj: for f_line in f_obj.readlines(): try: line = f_line.split(' ') @@ -30,7 +33,7 @@ def load_word(self): except: line = f_line.split(' ') self.word_dict[line[0]] = line[1] - + f_obj.close() def hanzi2pinyin(self, string=""): result = [] @@ -54,7 +57,6 @@ def hanzi2pinyin_split(self, string="", split=""): if __name__ == "__main__": test = PinYin() - test.load_word() string = "钓鱼岛是中国的" print "in: %s" % string print "out: %s" % str(test.hanzi2pinyin(string=string))