2
2
from __future__ import absolute_import , unicode_literals
3
3
import struct
4
4
import array
5
- import pdb
6
5
7
6
from . import units
8
7
from .compat import int_from_byte
@@ -122,6 +121,7 @@ def start_edges(self, index, prefix=b""):
122
121
index_stack. Otherwise, leave the stack empty, so next_edge() fails"""
123
122
124
123
self .key = bytearray (prefix )
124
+ self .base_key_len = len (self .key )
125
125
self ._parent_index = index
126
126
self ._sib_index = None
127
127
if self ._guide .size ():
@@ -133,6 +133,7 @@ def start_edges(self, index, prefix=b""):
133
133
if index is not None :
134
134
self ._sib_index = next_index
135
135
self .key .append (child_label )
136
+ self .decoded_key = self .key .decode ('utf-8' )
136
137
return True
137
138
138
139
def next_edge (self ):
@@ -146,9 +147,26 @@ def next_edge(self):
146
147
self ._parent_index )
147
148
if not self ._sib_index :
148
149
return False
149
-
150
- self .key . pop ()
150
+
151
+ self .key = self . key [: self . base_key_len ]
151
152
self .key .append (sibling_label )
153
+ try :
154
+ self .decoded_key = self .key .decode ('utf-8' )
155
+ except UnicodeDecodeError :
156
+ #this sibling is multi-character. keep following its children til
157
+ #something is decodable
158
+ cur_index = self ._sib_index
159
+ while True :
160
+ child_label = self ._guide .child (self ._sib_index )
161
+ cur_index = self ._dic .follow_char (child_label , cur_index )
162
+ if not cur_index :
163
+ return False
164
+ self .key .append (child_label )
165
+ try :
166
+ self .decoded_key = self .key .decode ('utf-8' )
167
+ break
168
+ except UnicodeDecodeError :
169
+ pass
152
170
return True
153
171
154
172
def next (self ):
0 commit comments