Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ if [ "$LINT" ]; then

# pandas/_libs/src is C code, so no need to search there.
echo "Linting *.py"
flake8 pandas --filename=*.py --exclude pandas/_libs/src
flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503,E731
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.py DONE"

echo "Linting setup.py"
flake8 setup.py
flake8 setup.py --ignore=W503
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can just add the global ignores to setup.cfg

if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting setup.py DONE"

echo "Linting *.pyx"
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123
if [ $? -ne "0" ]; then
RET=1
fi
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr):

return (s, e, -m)

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Pairwise correlation/covariance


Expand Down Expand Up @@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):

return result

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# Pairwise Spearman correlation


Expand Down Expand Up @@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):

return result


# generated from template
include "algos_common_helper.pxi"
include "algos_rank_helper.pxi"
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ cdef double nan = NaN


# TODO: aggregate multiple columns in single pass
#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# first, nth, last


Expand Down
8 changes: 7 additions & 1 deletion pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,34 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
free(lens)
return result


cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
return (x << b) | (x >> (64 - b))


cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
p[0] = <uint8_t>(v)
p[1] = <uint8_t>(v >> 8)
p[2] = <uint8_t>(v >> 16)
p[3] = <uint8_t>(v >> 24)


cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil:
u32to8_le(p, <uint32_t>v)
u32to8_le(p + 4, <uint32_t>(v >> 32))


cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
return (<uint64_t>p[0] |
<uint64_t>p[1] << 8 |
<uint64_t>p[1] << 8 |
<uint64_t>p[2] << 16 |
<uint64_t>p[3] << 24 |
<uint64_t>p[4] << 32 |
<uint64_t>p[5] << 40 |
<uint64_t>p[6] << 48 |
<uint64_t>p[7] << 56)


cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
uint64_t* v2, uint64_t* v3) nogil:
v0[0] += v1[0]
Expand All @@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
v1[0] ^= v2[0]
v2[0] = _rotl(v2[0], 32)


cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError(
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ cdef class IndexEngine:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
loc = _bin_search(values, val) # .searchsorted(val, side='left')
loc = _bin_search(values, val) # .searchsorted(val, side='left')
if loc >= len(values):
raise KeyError(val)
if util.get_value_at(values, loc) != val:
Expand Down Expand Up @@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine):
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
other = np.asarray(other).view('i8')
return algos.pad_int64(self._get_index_values(), other,
limit=limit)
return algos.pad_int64(self._get_index_values(), other, limit=limit)

def get_backfill_indexer(self, other, limit=None):
if other.dtype != self._get_box_dtype():
return np.repeat(-1, len(other)).astype('i4')
other = np.asarray(other).view('i8')
return algos.backfill_int64(self._get_index_values(), other,
limit=limit)
limit=limit)


cdef class TimedeltaEngine(DatetimeEngine):
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
import numbers
_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])


cdef class IntervalMixin:
property closed_left:
def __get__(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,


def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
Py_ssize_t max_groups):
Py_ssize_t max_groups):
cdef:
Py_ssize_t i, j, k, count = 0
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
Expand Down
23 changes: 13 additions & 10 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def values_from_object(object o):

return o


cpdef map_indices_list(list index):
"""
Produce a dict mapping the values of the input array to their respective
Expand Down Expand Up @@ -116,7 +117,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
s += arr[i].__sizeof__()
return s

#----------------------------------------------------------------------

# ----------------------------------------------------------------------
# isnull / notnull related

cdef double INF = <double> np.inf
Expand All @@ -125,7 +127,7 @@ cdef double NEGINF = -INF

cpdef bint checknull(object val):
if util.is_float_object(val) or util.is_complex_object(val):
return val != val # and val != INF and val != NEGINF
return val != val # and val != INF and val != NEGINF
elif util.is_datetime64_object(val):
return get_datetime64_value(val) == NPY_NAT
elif val is NaT:
Expand Down Expand Up @@ -990,7 +992,7 @@ def convert_json_to_lines(object arr):
in_quotes = ~in_quotes
if v == backslash or is_escaping:
is_escaping = ~is_escaping
if v == comma: # commas that should be \n
if v == comma: # commas that should be \n
if num_open_brackets_seen == 0 and not in_quotes:
narr[i] = newline
elif v == left_bracket:
Expand All @@ -1015,7 +1017,7 @@ def write_csv_rows(list data, ndarray data_index,
# In crude testing, N>100 yields little marginal improvement
N=100

# pre-allocate rows
# pre-allocate rows
ncols = len(cols)
rows = [[None] * (nlevels + ncols) for x in range(N)]

Expand Down Expand Up @@ -1047,12 +1049,13 @@ def write_csv_rows(list data, ndarray data_index,
if j >= N - 1 and j % N == N - 1:
writer.writerows(rows)

if j >= 0 and (j < N - 1 or (j % N) != N - 1):
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
writer.writerows(rows[:((j + 1) % N)])


#------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Groupby-related functions

@cython.boundscheck(False)
def arrmap(ndarray[object] index, object func):
cdef int length = index.shape[0]
Expand Down Expand Up @@ -1136,7 +1139,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
bins = np.empty(lenbin - 1, dtype=np.int64)

j = 0 # index into values
bc = 0 # bin count
bc = 0 # bin count

# linear scan
if right_closed:
Expand Down Expand Up @@ -1285,9 +1288,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
cdef class _PandasNull:

def __richcmp__(_PandasNull self, object other, int op):
if op == 2: # ==
if op == 2: # ==
return isinstance(other, _PandasNull)
elif op == 3: # !=
elif op == 3: # !=
return not isinstance(other, _PandasNull)
else:
return False
Expand Down Expand Up @@ -1793,7 +1796,7 @@ cdef class BlockPlacement:
stop += other_int

if ((step > 0 and start < 0) or
(step < 0 and stop < step)):
(step < 0 and stop < step)):
raise ValueError("iadd causes length change")

if stop < 0:
Expand Down
22 changes: 15 additions & 7 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h":

# Store words in (potentially ragged) matrix for now, hmm
char **words
int64_t *word_starts # where we are in the stream
int64_t *word_starts # where we are in the stream
int64_t words_len
int64_t words_cap

Expand Down Expand Up @@ -400,7 +400,7 @@ cdef class TextReader:
raise ValueError('only length-1 separators excluded right now')
self.parser.delimiter = ord(delimiter)

#----------------------------------------
# ----------------------------------------
# parser options

self.parser.doublequote = doublequote
Expand Down Expand Up @@ -519,7 +519,7 @@ cdef class TextReader:

self.index_col = index_col

#----------------------------------------
# ----------------------------------------
# header stuff

self.allow_leading_cols = allow_leading_cols
Expand Down Expand Up @@ -810,7 +810,7 @@ cdef class TextReader:
if hr == self.header[-1]:
lc = len(this_header)
ic = (len(self.index_col) if self.index_col
is not None else 0)
is not None else 0)
if lc != unnamed_count and lc - ic > unnamed_count:
hr -= 1
self.parser_start -= 1
Expand Down Expand Up @@ -848,7 +848,7 @@ cdef class TextReader:
# Corner case, not enough lines in the file
if self.parser.lines < data_line + 1:
field_count = len(header[0])
else: # not self.has_usecols:
else: # not self.has_usecols:

field_count = self.parser.line_fields[data_line]

Expand Down Expand Up @@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst):
result.append(x)
return result


cdef asbytes(object o):
if PY3:
return str(o).encode('utf-8')
Expand Down Expand Up @@ -1417,11 +1418,13 @@ def _maybe_upcast(arr):

return arr


cdef enum StringPath:
CSTRING
UTF8
ENCODED


# factored out logic to pick string converter
cdef inline StringPath _string_path(char *encoding):
if encoding != NULL and encoding != b"utf-8":
Expand All @@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding):
return UTF8
else:
return CSTRING


# ----------------------------------------------------------------------
# Type conversions / inference support code


cdef _string_box_factorize(parser_t *parser, int64_t col,
int64_t line_start, int64_t line_end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
parser.sci, parser.thousands, 1)
if errno != 0 or p_end[0] or p_end == word:
if (strcasecmp(word, cinf) == 0 or
strcasecmp(word, cposinf) == 0):
strcasecmp(word, cposinf) == 0):
data[0] = INF
elif strcasecmp(word, cneginf) == 0:
data[0] = NEGINF
Expand All @@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
parser.sci, parser.thousands, 1)
if errno != 0 or p_end[0] or p_end == word:
if (strcasecmp(word, cinf) == 0 or
strcasecmp(word, cposinf) == 0):
strcasecmp(word, cposinf) == 0):
data[0] = INF
elif strcasecmp(word, cneginf) == 0:
data[0] = NEGINF
Expand Down Expand Up @@ -2263,6 +2269,7 @@ def _compute_na_values():
}
return na_values


na_values = _compute_na_values()

for k in list(na_values):
Expand Down Expand Up @@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols):

return recs


cdef _fill_structured_column(char *dst, char* src, int64_t elsize,
int64_t stride, int64_t length, bint incref):
cdef:
Expand Down
Loading