diff --git a/README.md b/README.md index f2f4639..77108b7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ [简体中文](https://github.com/go-ego/gpy/blob/master/README_zh.md) - ## Installation ``` @@ -30,20 +29,22 @@ zhong guo hua ``` ## Usage +- If you only want to convert some simple Chinese words to pinyin, you can use the `github.com/go-ego/gpy` package. +- If you want to convert Chinese sentences to pinyin with more accurate results, you can use the `github.com/go-ego/gpy/phrase` package. + +### gpy example +Use the `github.com/go-ego/gpy` package to convert Chinese words to pinyin. ```go package main import ( "fmt" - - "github.com/go-ego/gse" - "github.com/go-ego/gpy" - "github.com/go-ego/gpy/phrase" ) var test = `西雅图都会区; 长夜漫漫, winter is coming!` +// if you just want to get the pinyin of a word without segmenting it, you can use the following code func main() { args := gpy.Args{ Style: gpy.Tone, @@ -54,20 +55,6 @@ func main() { s := gpy.ToString(py) fmt.Println("gpy string:", s) - - phrase.LoadGseDict() - go func() { - fmt.Println("gpy phrase1:", phrase.Paragraph(test)) - }() - fmt.Println("gpy phrase2:", phrase.Paragraph(test)) - - seg := gse.New("zh, dict.txt") - // phrase.DictAdd["都会区"] = "dū huì qū" - phrase.AddDict("都会区", "dū huì qū") - - fmt.Println("gpy phrase:", phrase.Paragraph(test, seg)) - fmt.Println("pinyin: ", phrase.Pinyin(test)) - fmt.Println("Initial: ", phrase.Initial("都会区")) } ``` @@ -118,6 +105,65 @@ func main() { } ``` +### gpy/phrase example +Use the `github.com/go-ego/gpy/phrase` package to convert Chinese sentences to pinyin. +- Based on segment - More accurate sentence pinyin conversion +- Support for custom segmentation dictionaries +- Support for custom pinyin of words +```go +package main + +import ( + "fmt" + "github.com/go-ego/gpy/phrase" + "github.com/go-ego/gse" +) + +var test = `西雅图都会区; 长夜漫漫, winter is coming!` + +func main() { + // use default embed segmentation dict + phraseExampleWithEmbedDict() + // use custom file segmentation dict + phraseExampleWithFileDict1() + // use custom file segmentation dict + phraseExampleWithFileDict2() +} + +func phraseExampleWithEmbedDict() { + // load default gse dict + _ = phrase.LoadGseDictEmbed("zh") + // convert a Chinese string paragraph to pinyin + fmt.Println("gpy phrase:", phrase.Paragraph(test)) + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + // convert a Chinese string paragraph to pinyin with user's dict + fmt.Println("gpy phrase:", phrase.Paragraph(test)) +} + +// if you want to customize the segmentation dict, you can use the following code +func phraseExampleWithFileDict1() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + // load gse dict from file + seg, _ := gse.New("zh, dict.txt") + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test, seg)) +} + +// if you want to customize the segmentation dict, you can also use the following code +func phraseExampleWithFileDict2() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + phrase.LoadGseDict() + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test)) +} +``` + ## Related Projects @@ -125,7 +171,6 @@ func main() { * [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。 * [mozillazg/rust-pinyin](https://github.com/mozillazg/rust-pinyin): 汉语拼音转换工具 Rust 版。 - ## License Under the MIT License, base on [go-pinyin](https://github.com/mozillazg/go-pinyin). diff --git a/README_zh.md b/README_zh.md index fd6dfa6..e4d1f4e 100755 --- a/README_zh.md +++ b/README_zh.md @@ -28,6 +28,34 @@ zhong guo hua ``` ## Usage +- 简单汉字转换成拼音,可以使用 `github.com/go-ego/gpy` 。 +- 中文词句转换成拼音,结果更准确,可以使用 `github.com/go-ego/gpy/phrase` 。 + +### gpy 使用示例 +使用 `github.com/go-ego/gpy` 将中文单词转换为拼音 +```go +package main + +import ( + "fmt" + "github.com/go-ego/gpy" +) + +var test = `西雅图都会区; 长夜漫漫, winter is coming!` + +// if you just want to get the pinyin of a word without segmenting it, you can use the following code +func main() { + args := gpy.Args{ + Style: gpy.Tone, + Heteronym: true} + + py := gpy.Pinyin(test, args) + fmt.Println("gpy:", py) + + s := gpy.ToString(py) + fmt.Println("gpy string:", s) +} +``` ```go package main @@ -76,6 +104,65 @@ func main() { } ``` +### gpy/phrase 使用示例 +使用 `github.com/go-ego/gpy/phrase` 将中文句子转换为拼音. +- 基于分词 - 更准确的句子拼音转换 +- 支持自定义分词词典 +- 支持单词的自定义拼音 +```go +package main + +import ( + "fmt" + "github.com/go-ego/gpy/phrase" + "github.com/go-ego/gse" +) + +var test = `西雅图都会区; 长夜漫漫, winter is coming!` + +func main() { + // use default embed segmentation dict + phraseExampleWithEmbedDict() + // use custom file segmentation dict + phraseExampleWithFileDict1() + // use custom file segmentation dict + phraseExampleWithFileDict2() +} + +func phraseExampleWithEmbedDict() { + // load default gse dict + _ = phrase.LoadGseDictEmbed("zh") + // convert a Chinese string paragraph to pinyin + fmt.Println("gpy phrase:", phrase.Paragraph(test)) + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + // convert a Chinese string paragraph to pinyin with user's dict + fmt.Println("gpy phrase:", phrase.Paragraph(test)) +} + +// if you want to customize the segmentation dict, you can use the following code +func phraseExampleWithFileDict1() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + // load gse dict from file + seg, _ := gse.New("zh, dict.txt") + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test, seg)) +} + +// if you want to customize the segmentation dict, you can also use the following code +func phraseExampleWithFileDict2() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + phrase.LoadGseDict() + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test)) +} +``` + ## Related Projects diff --git a/examples/example_gpy/main.go b/examples/example_gpy/main.go new file mode 100644 index 0000000..cc28fdb --- /dev/null +++ b/examples/example_gpy/main.go @@ -0,0 +1,21 @@ +package main + +import ( + "fmt" + "github.com/go-ego/gpy" +) + +var test = `西雅图都会区; 长夜漫漫, winter is coming!` + +// if you just want to get the pinyin of a word without segmenting it, you can use the following code +func main() { + args := gpy.Args{ + Style: gpy.Tone, + Heteronym: true} + + py := gpy.Pinyin(test, args) + fmt.Println("gpy:", py) + + s := gpy.ToString(py) + fmt.Println("gpy string:", s) +} diff --git a/examples/dict.txt b/examples/example_phrase/dict.txt similarity index 100% rename from examples/dict.txt rename to examples/example_phrase/dict.txt diff --git a/examples/example_phrase/main.go b/examples/example_phrase/main.go new file mode 100644 index 0000000..99b9223 --- /dev/null +++ b/examples/example_phrase/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "github.com/go-ego/gpy/phrase" + "github.com/go-ego/gse" +) + +var test = `西雅图都会区; 长夜漫漫, winter is coming!` + +func main() { + // use default embed segmentation dict + phraseExampleWithEmbedDict() + // use custom file segmentation dict + phraseExampleWithFileDict1() + // use custom file segmentation dict + phraseExampleWithFileDict2() +} + +func phraseExampleWithEmbedDict() { + // load default gse dict + _ = phrase.LoadGseDictEmbed("zh") + // convert a Chinese string paragraph to pinyin + fmt.Println("gpy phrase:", phrase.Paragraph(test)) + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + // convert a Chinese string paragraph to pinyin with user's dict + fmt.Println("gpy phrase:", phrase.Paragraph(test)) +} + +// if you want to customize the segmentation dict, you can use the following code +func phraseExampleWithFileDict1() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + // load gse dict from file + seg, _ := gse.New("zh, dict.txt") + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test, seg)) +} + +// if you want to customize the segmentation dict, you can also use the following code +func phraseExampleWithFileDict2() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + phrase.LoadGseDict() + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test)) +} diff --git a/examples/main.go b/examples/main.go deleted file mode 100644 index 08ec500..0000000 --- a/examples/main.go +++ /dev/null @@ -1,34 +0,0 @@ -package main - -import ( - "fmt" - - "github.com/go-ego/gse" - - "github.com/go-ego/gpy" - "github.com/go-ego/gpy/phrase" -) - -var test = `西雅图都会区; 长夜漫漫, winter is coming!` - -func main() { - args := gpy.Args{ - Style: gpy.Tone, - Heteronym: true} - - py := gpy.Pinyin(test, args) - fmt.Println("gpy:", py) - - s := gpy.ToString(py) - fmt.Println("gpy string:", s) - - phrase.LoadGseDict() - go func() { - fmt.Println("gpy phrase1:", phrase.Paragraph(test)) - }() - fmt.Println("gpy phrase2:", phrase.Paragraph(test)) - - seg, _ := gse.New("zh, dict.txt") - phrase.DictAdd["都会区"] = "dū huì qū" - fmt.Println("gpy phrase:", phrase.Paragraph(test, seg)) -} diff --git a/phrase/README.md b/phrase/README.md index 5506ba7..d77d812 100644 --- a/phrase/README.md +++ b/phrase/README.md @@ -5,32 +5,51 @@ package main import ( "fmt" - - "github.com/go-ego/gse" - "github.com/go-ego/gpy" "github.com/go-ego/gpy/phrase" + "github.com/go-ego/gse" ) -var test = `那里湖面总是澄清, 那里空气充满宁静; 西雅图都会区` +var test = `西雅图都会区; 长夜漫漫, winter is coming!` func main() { - args := gpy.Args{ - Style: gpy.Tone, - Heteronym: true} + // use default embed segmentation dict + phraseExampleWithEmbedDict() + // use custom file segmentation dict + phraseExampleWithFileDict1() + // use custom file segmentation dict + phraseExampleWithFileDict2() +} - py := gpy.Pinyin(test, args) - fmt.Println("gpy:", py) +func phraseExampleWithEmbedDict() { + // load default gse dict + _ = phrase.LoadGseDictEmbed("zh") + // convert a Chinese string paragraph to pinyin + fmt.Println("gpy phrase:", phrase.Paragraph(test)) + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + // convert a Chinese string paragraph to pinyin with user's dict + fmt.Println("gpy phrase:", phrase.Paragraph(test)) +} - s := gpy.ToString(py) - fmt.Println("gpy string:", s) +// if you want to customize the segmentation dict, you can use the following code +func phraseExampleWithFileDict1() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) + // load gse dict from file + seg, _ := gse.New("zh, dict.txt") + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" + phrase.AddDict("都会区", "dū huì qū") + fmt.Println("gpy phrase 2:", phrase.Paragraph(test, seg)) +} +// if you want to customize the segmentation dict, you can also use the following code +func phraseExampleWithFileDict2() { + fmt.Println("gpy phrase 1:", phrase.Paragraph(test)) phrase.LoadGseDict() - - seg := gse.New("zh, dict.txt") + // if you want to customize the pinyin of a word, you can use the following code + //phrase.DictAdd["都会区"] = "dū huì qū" phrase.AddDict("都会区", "dū huì qū") - - fmt.Println("gpy phrase:", phrase.Paragraph(test, seg)) - fmt.Println("pinyin: ", phrase.Pinyin(test)) - fmt.Println("Initial: ", phrase.Initial("都会区")) + fmt.Println("gpy phrase 2:", phrase.Paragraph(test)) } ``` diff --git a/phrase/paragraph_test.go b/phrase/paragraph_test.go index ed9c632..f20392f 100644 --- a/phrase/paragraph_test.go +++ b/phrase/paragraph_test.go @@ -33,7 +33,7 @@ func TestParagraph(t *testing.T) { "北京八达岭长城": "bei jing ba da ling chang cheng", } - seg, err := gse.New("zh, ../examples/dict.txt") + seg, err := gse.New("zh, ../examples/example_phrase/dict.txt") tt.Nil(t, err) for source, expect := range expects { actual := Paragraph(source, seg) @@ -46,7 +46,7 @@ func TestParagraph(t *testing.T) { } func TestPinyin(t *testing.T) { - seg, _ := gse.New("zh, ../examples/dict.txt") + seg, _ := gse.New("zh, ../examples/example_phrase/dict.txt") WithGse(seg) text := "西雅图都会区, 西雅图太空针"