2525#include " src/__support/macros/config.h"
2626#include " src/__support/str_to_num_result.h"
2727#include " src/__support/uint128.h"
28+ #include " src/__support/wctype_utils.h"
2829
2930namespace LIBC_NAMESPACE_DECL {
3031namespace internal {
3132
3233// Returns the idx to the first character in src that is not a whitespace
33- // character (as determined by isspace())
34+ // character (as determined by isspace() / iswspace())
35+ template <typename CharType>
3436LIBC_INLINE size_t
35- first_non_whitespace (const char *__restrict src,
37+ first_non_whitespace (const CharType *__restrict src,
3638 size_t src_len = cpp::numeric_limits<size_t >::max()) {
3739 size_t src_cur = 0 ;
38- while (src_cur < src_len && internal::isspace (src[src_cur])) {
40+ while (src_cur < src_len) {
41+ if constexpr (cpp::is_same_v<CharType, char >) {
42+ if (!internal::isspace (src[src_cur]))
43+ break ;
44+ } else {
45+ if (!internal::iswspace (src[src_cur]))
46+ break ;
47+ }
3948 ++src_cur;
4049 }
4150 return src_cur;
4251}
4352
53+ // Returns +1, -1, or 0 if 'src' starts with (respectively)
54+ // plus sign, minus sign, or neither.
55+ template <typename CharType>
56+ LIBC_INLINE static int get_sign (const CharType *__restrict src) {
57+ if constexpr (cpp::is_same_v<CharType, char >) {
58+ return (src[0 ] == ' +' ) ? 1 : (src[0 ] == ' -' ? -1 : 0 );
59+ } else {
60+ return (src[0 ] == L' +' ) ? 1 : (src[0 ] == L' -' ? -1 : 0 );
61+ }
62+ }
63+
4464// checks if the next 3 characters of the string pointer are the start of a
4565// hexadecimal number. Does not advance the string pointer.
46- LIBC_INLINE bool
47- is_hex_start (const char *__restrict src,
48- size_t src_len = cpp::numeric_limits< size_t >::max() ) {
66+ template < typename CharType>
67+ LIBC_INLINE static bool is_hex_start (const CharType *__restrict src,
68+ size_t src_len ) {
4969 if (src_len < 3 )
5070 return false ;
51- return *src == ' 0' && tolower (*(src + 1 )) == ' x' && isalnum (*(src + 2 )) &&
52- b36_char_to_int (*(src + 2 )) < 16 ;
71+ if constexpr (cpp::is_same_v<CharType, char >) {
72+ return src[0 ] == ' 0' && tolower (src[1 ]) == ' x' && isalnum (src[2 ]) &&
73+ b36_char_to_int (src[2 ]) < 16 ;
74+ } else {
75+ return src[0 ] == L' 0' && towlower (src[1 ]) == L' x' && iswalnum (src[2 ]) &&
76+ b36_wchar_to_int (src[2 ]) < 16 ;
77+ }
5378}
5479
5580// Takes the address of the string pointer and parses the base from the start of
5681// it.
57- LIBC_INLINE int infer_base (const char *__restrict src, size_t src_len) {
82+ template <typename CharType>
83+ LIBC_INLINE static int infer_base (const CharType *__restrict src,
84+ size_t src_len) {
5885 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
5986 // sequence of the decimal digits and the letters a (or A) through f (or F)
6087 // with values 10 through 15 respectively." (C standard 6.4.4.1)
@@ -63,8 +90,15 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
6390 // An octal number is defined as "the prefix 0 optionally followed by a
6491 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
6592 // number that starts with 0, including just 0, is an octal number.
66- if (src_len > 0 && src[0 ] == ' 0' )
67- return 8 ;
93+ if (src_len > 0 ) {
94+ if constexpr (cpp::is_same_v<CharType, char >) {
95+ if (src[0 ] == ' 0' )
96+ return 8 ;
97+ } else {
98+ if (src[0 ] == L' 0' )
99+ return 8 ;
100+ }
101+ }
68102 // A decimal number is defined as beginning "with a nonzero digit and
69103 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70104 return 10 ;
@@ -77,41 +111,34 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
77111// -----------------------------------------------------------------------------
78112// Takes a pointer to a string and the base to convert to. This function is used
79113// as the backend for all of the string to int functions.
80- template <class T >
114+ template <typename T, typename CharType >
81115LIBC_INLINE StrToNumResult<T>
82- strtointeger (const char *__restrict src, int base,
116+ strtointeger (const CharType *__restrict src, int base,
83117 const size_t src_len = cpp::numeric_limits<size_t >::max()) {
84118 using ResultType = make_integral_or_big_int_unsigned_t <T>;
85119
86- ResultType result = 0 ;
87-
88- bool is_number = false ;
89- size_t src_cur = 0 ;
90- int error_val = 0 ;
91-
92120 if (src_len == 0 )
93121 return {0 , 0 , 0 };
94122
95123 if (base < 0 || base == 1 || base > 36 )
96124 return {0 , 0 , EINVAL};
97125
98- src_cur = first_non_whitespace (src, src_len);
99-
100- char result_sign = ' +' ;
101- if (src[src_cur] == ' +' || src[src_cur] == ' -' ) {
102- result_sign = src[src_cur];
103- ++src_cur;
126+ size_t src_cur = first_non_whitespace (src, src_len);
127+ if (src_cur == src_len) {
128+ return {0 , 0 , 0 };
104129 }
105130
131+ int sign = get_sign (src + src_cur);
132+ bool is_positive = (sign >= 0 );
133+ src_cur += (sign != 0 );
134+
106135 if (base == 0 )
107136 base = infer_base (src + src_cur, src_len - src_cur);
108137
109138 if (base == 16 && is_hex_start (src + src_cur, src_len - src_cur))
110139 src_cur = src_cur + 2 ;
111140
112141 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
113- const bool is_positive = (result_sign == ' +' );
114-
115142 ResultType constexpr NEGATIVE_MAX =
116143 !IS_UNSIGNED ? static_cast <ResultType>(cpp::numeric_limits<T>::max ()) + 1
117144 : cpp::numeric_limits<T>::max ();
@@ -120,8 +147,21 @@ strtointeger(const char *__restrict src, int base,
120147 ResultType const abs_max_div_by_base =
121148 abs_max / static_cast <ResultType>(base);
122149
123- while (src_cur < src_len && isalnum (src[src_cur])) {
124- int cur_digit = b36_char_to_int (src[src_cur]);
150+ bool is_number = false ;
151+ int error_val = 0 ;
152+ ResultType result = 0 ;
153+ while (src_cur < src_len) {
154+ int cur_digit;
155+ if constexpr (cpp::is_same_v<CharType, char >) {
156+ if (!isalnum (src[src_cur]))
157+ break ;
158+ cur_digit = b36_char_to_int (src[src_cur]);
159+ } else {
160+ if (!iswalnum (src[src_cur]))
161+ break ;
162+ cur_digit = b36_wchar_to_int (src[src_cur]);
163+ }
164+
125165 if (cur_digit >= base)
126166 break ;
127167
0 commit comments