diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..ca540e7 --- /dev/null +++ b/tox.ini @@ -0,0 +1,11 @@ +[tox] +envlist = {py27,py34} +skipsdist = True + +[testenv] +basepython = + py27: python2.7 + py34: python3.4 + +commands = + python setup.py test diff --git a/unidecode/__init__.py b/unidecode/__init__.py index 1b04539..778ebc9 100644 --- a/unidecode/__init__.py +++ b/unidecode/__init__.py @@ -13,7 +13,9 @@ >>> unidecode("Κνωσός").encode("ascii") b'Knosos' """ -Cache = {} +Cache_sections = {} +Cache_chars = {} + def unidecode(string): """Transliterate an Unicode object into an ASCII string @@ -25,30 +27,35 @@ def unidecode(string): retval = [] for char in string: - codepoint = ord(char) - - if codepoint < 0x80: # Basic ASCII - retval.append(char) - continue - - if codepoint > 0xeffff: - continue # Characters in Private Use Area and above are ignored - - section = codepoint >> 8 # Chop off the last two hex digits - position = codepoint % 256 # Last two hex digits - try: - table = Cache[section] + retval.append(Cache_chars[char]) except KeyError: - try: - mod = __import__('unidecode.x%03x'%(section), [], [], ['data']) - except ImportError: - Cache[section] = None - continue # No match: ignore this character and carry on. + codepoint = ord(char) + + if codepoint < 0x80: # Basic ASCII + Cache_chars[char] = char + retval.append(char) + continue - Cache[section] = table = mod.data + if codepoint > 0xeffff: + continue # Characters in Private Use Area and above are ignored - if table and len(table) > position: - retval.append( table[position] ) + section = codepoint >> 8 # Chop off the last two hex digits + position = codepoint % 256 # Last two hex digits + + try: + table = Cache_sections[section] + except KeyError: + try: + mod = __import__('unidecode.x%03x' % (section), [], [], ['data']) + except ImportError: + Cache_sections[section] = None + continue # No match: ignore this character and carry on. + + Cache_sections[section] = table = mod.data + + if table and len(table) > position: + Cache_chars[char] = table[position] + retval.append(table[position]) return ''.join(retval)