|  | 
|  | 1 | +""" Modified from https://github.com/keithito/tacotron """ | 
|  | 2 | + | 
|  | 3 | +import inflect | 
|  | 4 | +import re | 
|  | 5 | + | 
|  | 6 | + | 
|  | 7 | +_inflect = inflect.engine() | 
|  | 8 | +_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') | 
|  | 9 | +_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') | 
|  | 10 | +_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') | 
|  | 11 | +_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') | 
|  | 12 | +_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') | 
|  | 13 | +_number_re = re.compile(r'[0-9]+') | 
|  | 14 | + | 
|  | 15 | + | 
|  | 16 | +def _remove_commas(m: re.Match) -> str: | 
|  | 17 | +    return m.group(1).replace(',', '') | 
|  | 18 | + | 
|  | 19 | + | 
|  | 20 | +def _expand_decimal_point(m: re.Match) -> str: | 
|  | 21 | +    return m.group(1).replace('.', ' point ') | 
|  | 22 | + | 
|  | 23 | + | 
|  | 24 | +def _expand_dollars(m: re.Match) -> str: | 
|  | 25 | +    match = m.group(1) | 
|  | 26 | +    parts = match.split('.') | 
|  | 27 | +    if len(parts) > 2: | 
|  | 28 | +        return match + ' dollars'  # Unexpected format | 
|  | 29 | +    dollars = int(parts[0]) if parts[0] else 0 | 
|  | 30 | +    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 | 
|  | 31 | +    if dollars and cents: | 
|  | 32 | +        dollar_unit = 'dollar' if dollars == 1 else 'dollars' | 
|  | 33 | +        cent_unit = 'cent' if cents == 1 else 'cents' | 
|  | 34 | +        return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) | 
|  | 35 | +    elif dollars: | 
|  | 36 | +        dollar_unit = 'dollar' if dollars == 1 else 'dollars' | 
|  | 37 | +        return '%s %s' % (dollars, dollar_unit) | 
|  | 38 | +    elif cents: | 
|  | 39 | +        cent_unit = 'cent' if cents == 1 else 'cents' | 
|  | 40 | +        return '%s %s' % (cents, cent_unit) | 
|  | 41 | +    else: | 
|  | 42 | +        return 'zero dollars' | 
|  | 43 | + | 
|  | 44 | + | 
|  | 45 | +def _expand_ordinal(m: re.Match) -> str: | 
|  | 46 | +    return _inflect.number_to_words(m.group(0)) | 
|  | 47 | + | 
|  | 48 | + | 
|  | 49 | +def _expand_number(m: re.Match) -> str: | 
|  | 50 | +    num = int(m.group(0)) | 
|  | 51 | +    if num > 1000 and num < 3000: | 
|  | 52 | +        if num == 2000: | 
|  | 53 | +            return 'two thousand' | 
|  | 54 | +        elif num > 2000 and num < 2010: | 
|  | 55 | +            return 'two thousand ' + _inflect.number_to_words(num % 100) | 
|  | 56 | +        elif num % 100 == 0: | 
|  | 57 | +            return _inflect.number_to_words(num // 100) + ' hundred' | 
|  | 58 | +        else: | 
|  | 59 | +            return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') | 
|  | 60 | +    else: | 
|  | 61 | +        return _inflect.number_to_words(num, andword='') | 
|  | 62 | + | 
|  | 63 | + | 
|  | 64 | +def normalize_numbers(text: str) -> str: | 
|  | 65 | +    text = re.sub(_comma_number_re, _remove_commas, text) | 
|  | 66 | +    text = re.sub(_pounds_re, r'\1 pounds', text) | 
|  | 67 | +    text = re.sub(_dollars_re, _expand_dollars, text) | 
|  | 68 | +    text = re.sub(_decimal_number_re, _expand_decimal_point, text) | 
|  | 69 | +    text = re.sub(_ordinal_re, _expand_ordinal, text) | 
|  | 70 | +    text = re.sub(_number_re, _expand_number, text) | 
|  | 71 | +    return text | 
0 commit comments