@@ -14,11 +14,8 @@ namespace Flow.Launcher.Infrastructure
1414{
1515 public class PinyinAlphabet : IAlphabet
1616 {
17- private ConcurrentDictionary < string , ( string translation , TranslationMapping map ) > _pinyinCache =
18- new ( ) ;
19-
17+ private readonly ConcurrentDictionary < string , ( string translation , TranslationMapping map ) > _pinyinCache = new ( ) ;
2018 private readonly Settings _settings ;
21-
2219 private ReadOnlyDictionary < string , string > currentDoublePinyinTable ;
2320
2421 public PinyinAlphabet ( )
@@ -44,105 +41,142 @@ public void Reload()
4441
4542 private void CreateDoublePinyinTableFromStream ( Stream jsonStream )
4643 {
47- Dictionary < string , Dictionary < string , string > > table = JsonSerializer . Deserialize < Dictionary < string , Dictionary < string , string > > > ( jsonStream ) ;
48- string schemaKey = _settings . DoublePinyinSchema . ToString ( ) ; // Convert enum to string
49- if ( ! table . TryGetValue ( schemaKey , out var value ) )
44+ var table = JsonSerializer . Deserialize < Dictionary < string , Dictionary < string , string > > > ( jsonStream ) ??
45+ throw new InvalidOperationException ( "Failed to deserialize double pinyin table: result is null" ) ;
46+
47+ var schemaKey = _settings . DoublePinyinSchema . ToString ( ) ;
48+ if ( ! table . TryGetValue ( schemaKey , out var schemaDict ) )
5049 {
51- throw new ArgumentException ( "DoublePinyinSchema is invalid or double pinyin table is broken." ) ;
50+ throw new ArgumentException ( $ "DoublePinyinSchema ' { schemaKey } ' is invalid or double pinyin table is broken.") ;
5251 }
53- currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( value ) ;
52+
53+ currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( schemaDict ) ;
5454 }
5555
5656 private void LoadDoublePinyinTable ( )
5757 {
58- if ( _settings . UseDoublePinyin )
58+ if ( ! _settings . UseDoublePinyin )
5959 {
60- var tablePath = Path . Join ( AppContext . BaseDirectory , "Resources" , "double_pinyin.json" ) ;
61- try
62- {
63- using var fs = File . OpenRead ( tablePath ) ;
64- CreateDoublePinyinTableFromStream ( fs ) ;
65- }
66- catch ( System . Exception e )
67- {
68- Log . Exception ( nameof ( PinyinAlphabet ) , "Failed to load double pinyin table from file: " + tablePath , e ) ;
69- currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
70- }
60+ currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
61+ return ;
62+ }
63+
64+ var tablePath = Path . Combine ( AppContext . BaseDirectory , "Resources" , "double_pinyin.json" ) ;
65+ try
66+ {
67+ using var fs = File . OpenRead ( tablePath ) ;
68+ CreateDoublePinyinTableFromStream ( fs ) ;
69+ }
70+ catch ( FileNotFoundException e )
71+ {
72+ Log . Exception ( nameof ( PinyinAlphabet ) , $ "Double pinyin table file not found: { tablePath } ", e ) ;
73+ currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
7174 }
72- else
75+ catch ( DirectoryNotFoundException e )
7376 {
77+ Log . Exception ( nameof ( PinyinAlphabet ) , $ "Directory not found for double pinyin table: { tablePath } ", e ) ;
78+ currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
79+ }
80+ catch ( UnauthorizedAccessException e )
81+ {
82+ Log . Exception ( nameof ( PinyinAlphabet ) , $ "Access denied to double pinyin table: { tablePath } ", e ) ;
83+ currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
84+ }
85+ catch ( System . Exception e )
86+ {
87+ Log . Exception ( nameof ( PinyinAlphabet ) , $ "Failed to load double pinyin table from file: { tablePath } ", e ) ;
7488 currentDoublePinyinTable = new ReadOnlyDictionary < string , string > ( new Dictionary < string , string > ( ) ) ;
7589 }
7690 }
7791
7892 public bool ShouldTranslate ( string stringToTranslate )
7993 {
80- // If a string has Chinese characters, we don't need to translate it to pinyin.
81- return _settings . ShouldUsePinyin && ! WordsHelper . HasChinese ( stringToTranslate ) ;
94+ // If the query (stringToTranslate) does NOT contain Chinese characters,
95+ // we should translate the target string to pinyin for matching
96+ return _settings . ShouldUsePinyin && ! ContainsChinese ( stringToTranslate ) ;
8297 }
8398
8499 public ( string translation , TranslationMapping map ) Translate ( string content )
85100 {
86- if ( ! _settings . ShouldUsePinyin || ! WordsHelper . HasChinese ( content ) )
101+ if ( ! _settings . ShouldUsePinyin || ! ContainsChinese ( content ) )
87102 return ( content , null ) ;
88103
89- return _pinyinCache . TryGetValue ( content , out var value )
90- ? value
91- : BuildCacheFromContent ( content ) ;
104+ return _pinyinCache . TryGetValue ( content , out var cached ) ? cached : BuildCacheFromContent ( content ) ;
92105 }
93106
94107 private ( string translation , TranslationMapping map ) BuildCacheFromContent ( string content )
95108 {
96109 var resultList = WordsHelper . GetPinyinList ( content ) ;
97-
98- var resultBuilder = new StringBuilder ( ) ;
110+ var resultBuilder = new StringBuilder ( _settings . UseDoublePinyin ? 3 : 4 ) ; // Pre-allocate with estimated capacity
99111 var map = new TranslationMapping ( ) ;
100112
101113 var previousIsChinese = false ;
102114
103115 for ( var i = 0 ; i < resultList . Length ; i ++ )
104116 {
105- if ( content [ i ] >= 0x3400 && content [ i ] <= 0x9FD5 )
117+ if ( IsChineseCharacter ( content [ i ] ) )
106118 {
107- string translated = _settings . UseDoublePinyin ? ToDoublePin ( resultList [ i ] ) : resultList [ i ] ;
119+ var translated = _settings . UseDoublePinyin ? ToDoublePinyin ( resultList [ i ] ) : resultList [ i ] ;
120+
108121 if ( i > 0 )
109122 {
110123 resultBuilder . Append ( ' ' ) ;
111124 }
125+
112126 map . AddNewIndex ( resultBuilder . Length , translated . Length ) ;
113127 resultBuilder . Append ( translated ) ;
114128 previousIsChinese = true ;
115129 }
116130 else
117131 {
132+ // Add space after Chinese characters before non-Chinese characters
118133 if ( previousIsChinese )
119134 {
120135 previousIsChinese = false ;
121136 resultBuilder . Append ( ' ' ) ;
122137 }
138+
123139 map . AddNewIndex ( resultBuilder . Length , resultList [ i ] . Length ) ;
124140 resultBuilder . Append ( resultList [ i ] ) ;
125141 }
126142 }
127143
128- map . endConstruct ( ) ;
144+ map . EndConstruct ( ) ;
129145
130- var key = resultBuilder . ToString ( ) ;
131-
132- return _pinyinCache [ content ] = ( key , map ) ;
146+ var translation = resultBuilder . ToString ( ) ;
147+ var result = ( translation , map ) ;
148+
149+ return _pinyinCache [ content ] = result ;
133150 }
134151
135- #region Double Pinyin
136-
137- private string ToDoublePin ( string fullPinyin )
152+ /// <summary>
153+ /// Optimized Chinese character detection using the comprehensive CJK Unicode ranges
154+ /// </summary>
155+ private static bool ContainsChinese ( ReadOnlySpan < char > text )
138156 {
139- if ( currentDoublePinyinTable . TryGetValue ( fullPinyin , out var doublePinyinValue ) )
157+ foreach ( var c in text )
140158 {
141- return doublePinyinValue ;
159+ if ( IsChineseCharacter ( c ) )
160+ return true ;
142161 }
143- return fullPinyin ;
162+ return false ;
144163 }
145164
146- #endregion
165+ /// <summary>
166+ /// Check if a character is a Chinese character using comprehensive Unicode ranges
167+ /// Covers CJK Unified Ideographs, Extension A
168+ /// </summary>
169+ private static bool IsChineseCharacter ( char c )
170+ {
171+ return ( c >= 0x4E00 && c <= 0x9FFF ) || // CJK Unified Ideographs
172+ ( c >= 0x3400 && c <= 0x4DBF ) ; // CJK Extension A
173+ }
174+
175+ private string ToDoublePinyin ( string fullPinyin )
176+ {
177+ return currentDoublePinyinTable . TryGetValue ( fullPinyin , out var doublePinyinValue )
178+ ? doublePinyinValue
179+ : fullPinyin ;
180+ }
147181 }
148182}
0 commit comments