pandas-dev · jreback · Nov 1, 2017 · Oct 31, 2017 · Oct 31, 2017 · Oct 31, 2017
diff --git a/ci/lint.sh b/ci/lint.sh
@@ -10,21 +10,21 @@ if [ "$LINT" ]; then
 
     # pandas/_libs/src is C code, so no need to search there.
     echo "Linting  *.py"
-    flake8 pandas --filename=*.py --exclude pandas/_libs/src
+    flake8 pandas --filename=*.py --exclude pandas/_libs/src --ignore=W503,E731
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting *.py DONE"
 
     echo "Linting setup.py"
-    flake8 setup.py
+    flake8 setup.py --ignore=W503
     if [ $? -ne "0" ]; then
         RET=1
     fi
     echo "Linting setup.py DONE"
 
     echo "Linting *.pyx"
-    flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126
+    flake8 pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123
     if [ $? -ne "0" ]; then
         RET=1
     fi

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -258,7 +258,7 @@ def min_subseq(ndarray[double_t] arr):
 
     return (s, e, -m)
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # Pairwise correlation/covariance
 
 
@@ -322,7 +322,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
 
     return result
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # Pairwise Spearman correlation
 
 
@@ -386,6 +386,7 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
 
     return result
 
+
 # generated from template
 include "algos_common_helper.pxi"
 include "algos_rank_helper.pxi"

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -25,7 +25,7 @@ cdef double nan = NaN
 
 
 # TODO: aggregate multiple columns in single pass
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # first, nth, last
 
 

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -93,29 +93,34 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
     free(lens)
     return result
 
+
 cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil:
     return (x << b) | (x >> (64 - b))
 
+
 cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil:
     p[0] = <uint8_t>(v)
     p[1] = <uint8_t>(v >> 8)
     p[2] = <uint8_t>(v >> 16)
     p[3] = <uint8_t>(v >> 24)
 
+
 cdef inline void u64to8_le(uint8_t* p, uint64_t v) nogil:
     u32to8_le(p, <uint32_t>v)
     u32to8_le(p + 4, <uint32_t>(v >> 32))
 
+
 cdef inline uint64_t u8to64_le(uint8_t* p) nogil:
     return (<uint64_t>p[0] |
-            <uint64_t>p[1] <<  8 |
+            <uint64_t>p[1] << 8 |
             <uint64_t>p[2] << 16 |
             <uint64_t>p[3] << 24 |
             <uint64_t>p[4] << 32 |
             <uint64_t>p[5] << 40 |
             <uint64_t>p[6] << 48 |
             <uint64_t>p[7] << 56)
 
+
 cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
                            uint64_t* v2, uint64_t* v3) nogil:
     v0[0] += v1[0]
@@ -133,6 +138,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,
     v1[0] ^= v2[0]
     v2[0] = _rotl(v2[0], 32)
 
+
 cpdef uint64_t siphash(bytes data, bytes key) except? 0:
     if len(key) != 16:
         raise ValueError(

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -122,7 +122,7 @@ cdef class IndexEngine:
             if not self.is_unique:
                 return self._get_loc_duplicates(val)
             values = self._get_index_values()
-            loc = _bin_search(values, val) # .searchsorted(val, side='left')
+            loc = _bin_search(values, val)  # .searchsorted(val, side='left')
             if loc >= len(values):
                 raise KeyError(val)
             if util.get_value_at(values, loc) != val:
@@ -475,15 +475,14 @@ cdef class DatetimeEngine(Int64Engine):
         if other.dtype != self._get_box_dtype():
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
-        return algos.pad_int64(self._get_index_values(), other,
-                                limit=limit)
+        return algos.pad_int64(self._get_index_values(), other, limit=limit)
 
     def get_backfill_indexer(self, other, limit=None):
         if other.dtype != self._get_box_dtype():
             return np.repeat(-1, len(other)).astype('i4')
         other = np.asarray(other).view('i8')
         return algos.backfill_int64(self._get_index_values(), other,
-                                     limit=limit)
+                                    limit=limit)
 
 
 cdef class TimedeltaEngine(DatetimeEngine):

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -13,6 +13,7 @@ from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
 import numbers
 _VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
 
+
 cdef class IntervalMixin:
     property closed_left:
         def __get__(self):

diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
@@ -147,7 +147,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
 
 
 def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
-                          Py_ssize_t max_groups):
+                    Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
         ndarray[int64_t] left_count, right_count, left_sorter, right_sorter

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -82,6 +82,7 @@ def values_from_object(object o):
 
     return o
 
+
 cpdef map_indices_list(list index):
     """
     Produce a dict mapping the values of the input array to their respective
@@ -116,7 +117,8 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
         s += arr[i].__sizeof__()
     return s
 
-#----------------------------------------------------------------------
+
+# ----------------------------------------------------------------------
 # isnull / notnull related
 
 cdef double INF = <double> np.inf
@@ -125,7 +127,7 @@ cdef double NEGINF = -INF
 
 cpdef bint checknull(object val):
     if util.is_float_object(val) or util.is_complex_object(val):
-        return val != val # and val != INF and val != NEGINF
+        return val != val  # and val != INF and val != NEGINF
     elif util.is_datetime64_object(val):
         return get_datetime64_value(val) == NPY_NAT
     elif val is NaT:
@@ -990,7 +992,7 @@ def convert_json_to_lines(object arr):
             in_quotes = ~in_quotes
         if v == backslash or is_escaping:
             is_escaping = ~is_escaping
-        if v == comma: # commas that should be \n
+        if v == comma:  # commas that should be \n
             if num_open_brackets_seen == 0 and not in_quotes:
                 narr[i] = newline
         elif v == left_bracket:
@@ -1015,7 +1017,7 @@ def write_csv_rows(list data, ndarray data_index,
     # In crude testing, N>100 yields little marginal improvement
     N=100
 
-    # pre-allocate  rows
+    # pre-allocate rows
     ncols = len(cols)
     rows = [[None] * (nlevels + ncols) for x in range(N)]
 
@@ -1047,12 +1049,13 @@ def write_csv_rows(list data, ndarray data_index,
             if j >= N - 1 and j % N == N - 1:
                 writer.writerows(rows)
 
-    if  j >= 0 and (j < N - 1 or (j % N) != N - 1):
+    if j >= 0 and (j < N - 1 or (j % N) != N - 1):
         writer.writerows(rows[:((j + 1) % N)])
 
 
-#------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------
 # Groupby-related functions
+
 @cython.boundscheck(False)
 def arrmap(ndarray[object] index, object func):
     cdef int length = index.shape[0]
@@ -1136,7 +1139,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner,
     bins = np.empty(lenbin - 1, dtype=np.int64)
 
     j = 0  # index into values
-    bc = 0 # bin count
+    bc = 0  # bin count
 
     # linear scan
     if right_closed:
@@ -1285,9 +1288,9 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
 cdef class _PandasNull:
 
     def __richcmp__(_PandasNull self, object other, int op):
-        if op == 2: # ==
+        if op == 2:    # ==
             return isinstance(other, _PandasNull)
-        elif op == 3: # !=
+        elif op == 3:  # !=
             return not isinstance(other, _PandasNull)
         else:
             return False
@@ -1793,7 +1796,7 @@ cdef class BlockPlacement:
             stop += other_int
 
             if ((step > 0 and start < 0) or
-                (step < 0 and stop < step)):
+                    (step < 0 and stop < step)):
                 raise ValueError("iadd causes length change")
 
             if stop < 0:

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -138,7 +138,7 @@ cdef extern from "parser/tokenizer.h":
 
         # Store words in (potentially ragged) matrix for now, hmm
         char **words
-        int64_t *word_starts # where we are in the stream
+        int64_t *word_starts  # where we are in the stream
         int64_t words_len
         int64_t words_cap
 
@@ -400,7 +400,7 @@ cdef class TextReader:
                 raise ValueError('only length-1 separators excluded right now')
             self.parser.delimiter = ord(delimiter)
 
-        #----------------------------------------
+        # ----------------------------------------
         # parser options
 
         self.parser.doublequote = doublequote
@@ -519,7 +519,7 @@ cdef class TextReader:
 
         self.index_col = index_col
 
-        #----------------------------------------
+        # ----------------------------------------
         # header stuff
 
         self.allow_leading_cols = allow_leading_cols
@@ -810,7 +810,7 @@ cdef class TextReader:
                     if hr == self.header[-1]:
                         lc = len(this_header)
                         ic = (len(self.index_col) if self.index_col
-                                                     is not None else 0)
+                              is not None else 0)
                         if lc != unnamed_count and lc - ic > unnamed_count:
                             hr -= 1
                             self.parser_start -= 1
@@ -848,7 +848,7 @@ cdef class TextReader:
         # Corner case, not enough lines in the file
         if self.parser.lines < data_line + 1:
             field_count = len(header[0])
-        else: # not self.has_usecols:
+        else:  # not self.has_usecols:
 
             field_count = self.parser.line_fields[data_line]
 
@@ -1374,6 +1374,7 @@ def _ensure_encoded(list lst):
         result.append(x)
     return result
 
+
 cdef asbytes(object o):
     if PY3:
         return str(o).encode('utf-8')
@@ -1417,11 +1418,13 @@ def _maybe_upcast(arr):
 
     return arr
 
+
 cdef enum StringPath:
     CSTRING
     UTF8
     ENCODED
 
+
 # factored out logic to pick string converter
 cdef inline StringPath _string_path(char *encoding):
     if encoding != NULL and encoding != b"utf-8":
@@ -1430,9 +1433,12 @@ cdef inline StringPath _string_path(char *encoding):
         return UTF8
     else:
         return CSTRING
+
+
 # ----------------------------------------------------------------------
 # Type conversions / inference support code
 
+
 cdef _string_box_factorize(parser_t *parser, int64_t col,
                            int64_t line_start, int64_t line_end,
                            bint na_filter, kh_str_t *na_hashset):
@@ -1782,7 +1788,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
                                            parser.sci, parser.thousands, 1)
                 if errno != 0 or p_end[0] or p_end == word:
                     if (strcasecmp(word, cinf) == 0 or
-                                strcasecmp(word, cposinf) == 0):
+                            strcasecmp(word, cposinf) == 0):
                         data[0] = INF
                     elif strcasecmp(word, cneginf) == 0:
                         data[0] = NEGINF
@@ -1803,7 +1809,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
                                        parser.sci, parser.thousands, 1)
             if errno != 0 or p_end[0] or p_end == word:
                 if (strcasecmp(word, cinf) == 0 or
-                            strcasecmp(word, cposinf) == 0):
+                        strcasecmp(word, cposinf) == 0):
                     data[0] = INF
                 elif strcasecmp(word, cneginf) == 0:
                     data[0] = NEGINF
@@ -2263,6 +2269,7 @@ def _compute_na_values():
     }
     return na_values
 
+
 na_values = _compute_na_values()
 
 for k in list(na_values):
@@ -2362,6 +2369,7 @@ def _to_structured_array(dict columns, object names, object usecols):
 
     return recs
 
+
 cdef _fill_structured_column(char *dst, char* src, int64_t elsize,
                              int64_t stride, int64_t length, bint incref):
     cdef: