Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[tox]
envlist = {py27,py34}
skipsdist = True

[testenv]
basepython =
py27: python2.7
py34: python3.4

commands =
python setup.py test
51 changes: 29 additions & 22 deletions unidecode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
>>> unidecode("Κνωσός").encode("ascii")
b'Knosos'
"""
Cache = {}
Cache_sections = {}
Cache_chars = {}


def unidecode(string):
"""Transliterate an Unicode object into an ASCII string
Expand All @@ -25,30 +27,35 @@ def unidecode(string):
retval = []

for char in string:
codepoint = ord(char)

if codepoint < 0x80: # Basic ASCII
retval.append(char)
continue

if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored

section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits

try:
table = Cache[section]
retval.append(Cache_chars[char])
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), [], [], ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.
codepoint = ord(char)

if codepoint < 0x80: # Basic ASCII
Cache_chars[char] = char
retval.append(char)
continue

Cache[section] = table = mod.data
if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored

if table and len(table) > position:
retval.append( table[position] )
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits

try:
table = Cache_sections[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x' % (section), [], [], ['data'])
except ImportError:
Cache_sections[section] = None
continue # No match: ignore this character and carry on.

Cache_sections[section] = table = mod.data

if table and len(table) > position:
Cache_chars[char] = table[position]
retval.append(table[position])

return ''.join(retval)