@@ -77,54 +77,79 @@ cdef build_count_table_{{dtype}}(const {{dtype}}_t[:] values,
7777@cython.wraparound(False)
7878@cython.boundscheck(False)
7979{{if dtype == 'object'}}
80- cpdef value_count_ {{dtype}}(ndarray[{{dtype}}] values, bint dropna):
80+ cpdef stable_value_count_ {{dtype}}(ndarray[{{dtype}}] values, bint dropna):
8181{{else}}
82- cpdef value_count_ {{dtype}}(const {{dtype}}_t[:] values, bint dropna):
82+ cpdef stable_value_count_ {{dtype}}(const {{dtype}}_t[:] values, bint dropna):
8383{{endif}}
8484 cdef:
8585 Py_ssize_t i = 0
86+ Py_ssize_t n = len(values)
87+ size_t unique_key_index = 0
88+ size_t unique_key_count = 0
8689 kh_{{ttype}}_t *table
8790
88- {{if dtype != 'object'}}
89- {{dtype}}_t[:] result_keys
90- int64_t[:] result_counts
91- {{endif}}
92-
9391 # Don't use Py_ssize_t, since table.n_buckets is unsigned
9492 khiter_t k
9593
96- table = kh_init_{{ttype}}()
97- {{if dtype == 'object'}}
98- build_count_table_{{dtype}}(values, table, 1)
94+ {{c_type}} val
95+
96+ int ret = 0
97+
98+ {{if dtype[0]!='u'}}
99+ result_keys = {{dtype.title()}}Vector()
99100 {{else}}
100- build_count_table_{{ dtype}}(values, table, dropna )
101+ result_keys = {{'U'+ dtype[1::].title()}}Vector( )
101102 {{endif}}
102-
103- result_keys = np.empty(table.n_occupied, '{{dtype}}')
104- result_counts = np.zeros(table.n_occupied, dtype=np.int64)
103+ result_counts = Int64Vector()
104+ table = kh_init_{{ttype}}()
105105
106106 {{if dtype == 'object'}}
107- for k in range(table.n_buckets):
108- if kh_exist_{{ttype}}(table, k):
109- result_keys[i] = <{{dtype}}>table.keys[k]
110- result_counts[i] = table.vals[k]
111- i += 1
107+ kh_resize_{{ttype}}(table, n // 10)
108+
109+ for i in range(n):
110+ val = values[i]
111+ if not checknull(val) or not dropna:
112+ k = kh_get_{{ttype}}(table, <PyObject*>val)
113+ if k != table.n_buckets:
114+ unique_key_index = table.vals[k]
115+ result_counts.data.data[unique_key_index] += 1
116+ else:
117+ k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
118+ table.vals[k] = unique_key_count
119+ result_keys.append(val)
120+ result_counts.append(1)
121+ unique_key_count+=1
112122 {{else}}
113- with nogil:
114- for k in range(table.n_buckets):
115- if kh_exist_{{ttype}}(table, k):
116- result_keys[i] = {{to_dtype}}(table.keys[k])
117- result_counts[i] = table.vals[k]
118- i += 1
123+ kh_resize_{{ttype}}(table, n)
124+
125+ for i in range(n):
126+ val = {{to_c_type}}(values[i])
127+
128+ if not is_nan_{{c_type}}(val) or not dropna:
129+ k = kh_get_{{ttype}}(table, val)
130+ if k != table.n_buckets:
131+ unique_key_index = table.vals[k]
132+ result_counts.data.data[unique_key_index] += 1
133+ else:
134+ k = kh_put_{{ttype}}(table, val, &ret)
135+ table.vals[k] = unique_key_count
136+ result_keys.append(val)
137+ result_counts.append(1)
138+ unique_key_count+=1
119139 {{endif}}
120140
121141 kh_destroy_{{ttype}}(table)
122142
123- {{if dtype == 'object'}}
124- return result_keys, result_counts
125- {{else}}
126- return np.asarray(result_keys), np.asarray(result_counts)
127- {{endif}}
143+ return result_keys.to_array(), result_counts.to_array()
144+
145+
146+ {{if dtype == 'object'}}
147+ cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna):
148+ return stable_value_count_{{dtype}}(values, 1)
149+ {{else}}
150+ cpdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna):
151+ return stable_value_count_{{dtype}}(values, dropna)
152+ {{endif}}
128153
129154
130155@cython.wraparound(False)
0 commit comments