55#  This script is intended for both the CI and to check locally that code standards are
66#  respected. We are currently linting (PEP-8 and similar), looking for patterns of
77#  common mistakes (sphinx directives with missing blank lines, old style classes,
8- #  unwanted imports...), and  we also  run doctests here (currently some files only). 
9- #  In the future we may want to add the validation of  docstrings and other checks here .
8+ #  unwanted imports...), we run doctests here (currently some files only), and we 
9+ #  validate formatting error in  docstrings.
1010# 
1111#  Usage:
1212#    $ ./ci/code_checks.sh               # run all checks
1313#    $ ./ci/code_checks.sh lint          # run linting only
1414#    $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
15+ #    $ ./ci/code_checks.sh code          # checks on imported code
1516#    $ ./ci/code_checks.sh doctests      # run doctests
17+ #    $ ./ci/code_checks.sh docstrings    # validate docstring errors
1618#    $ ./ci/code_checks.sh dependencies  # check that dependencies are consistent
1719
18- echo  " inside $0 " 
19- [[ $LINT  ]] ||  { echo  " NOT Linting. To lint use: LINT=true $0  $1 " ;  exit  0;  }
20- [[ -z  " $1 "   ||  " $1 "   ==  " lint"   ||  " $1 "   ==  " patterns"   ||  " $1 "   ==  " doctests"   ||  " $1 "   ==  " dependencies"    ]] \
21-     ||  { echo  " Unknown command $1 . Usage: $0  [lint|patterns|doctests|dependencies]" ;  exit  9999;  }
20+ [[ -z  " $1 "   ||  " $1 "   ==  " lint"   ||  " $1 "   ==  " patterns"   ||  " $1 "   ==  " code"   ||  " $1 "   ==  " doctests"   ||  " $1 "   ==  " docstrings"   ||  " $1 "   ==  " dependencies"   ]] ||  \
21+     { echo  " Unknown command $1 . Usage: $0  [lint|patterns|code|doctests|docstrings|dependencies]" ;  exit  9999;  }
2222
2323BASE_DIR=" $( dirname $0 )  /.." 
2424RET=0
2525CHECK=$1 
2626
27+ function  invgrep  {
28+     #  grep with inverse exist status and formatting for azure-pipelines
29+     # 
30+     #  This function works exactly as grep, but with opposite exit status:
31+     #  - 0 (success) when no patterns are found
32+     #  - 1 (fail) when the patterns are found
33+     # 
34+     #  This is useful for the CI, as we want to fail if one of the patterns
35+     #  that we want to avoid is found by grep.
36+     if  [[ " $AZURE "   ==  " true"   ]];  then 
37+         set  -o pipefail
38+         grep -n " $@ "   |  awk -F " :"   ' {print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}' 
39+     else 
40+         grep " $@ " 
41+     fi 
42+     return  $(( !  $? )) 
43+ }
44+ 
45+ if  [[ " $AZURE "   ==  " true"   ]];  then 
46+     FLAKE8_FORMAT=" ##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s" 
47+ else 
48+     FLAKE8_FORMAT=" default" 
49+ fi 
2750
2851# ## LINTING ###
2952if  [[ -z  " $CHECK "   ||  " $CHECK "   ==  " lint"   ]];  then 
@@ -35,30 +58,30 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
3558
3659    #  pandas/_libs/src is C code, so no need to search there.
3760    MSG=' Linting .py code'   ;  echo  $MSG 
38-     flake8 . 
61+     flake8 --format= " $FLAKE8_FORMAT "   . 
3962    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
4063
4164    MSG=' Linting .pyx code'   ;  echo  $MSG 
42-     flake8 pandas --filename=* .pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
65+     flake8 --format= " $FLAKE8_FORMAT "   pandas --filename=* .pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
4366    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
4467
4568    MSG=' Linting .pxd and .pxi.in'   ;  echo  $MSG 
46-     flake8 pandas/_libs --filename=* .pxi.in,* .pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
69+     flake8 --format= " $FLAKE8_FORMAT "   pandas/_libs --filename=* .pxi.in,* .pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
4770    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
4871
4972    echo  " flake8-rst --version" 
5073    flake8-rst --version
5174
5275    MSG=' Linting code-blocks in .rst documentation'   ;  echo  $MSG 
53-     flake8-rst doc/source --filename=* .rst
76+     flake8-rst doc/source --filename=* .rst --format= " $FLAKE8_FORMAT " 
5477    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
5578
5679    #  Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
5780    #  it doesn't make a difference, but we want to be internally consistent.
5881    #  Note: this grep pattern is (intended to be) equivalent to the python
5982    #  regex r'(?<![ ->])> '
6083    MSG=' Linting .pyx code for spacing conventions in casting'   ;  echo  $MSG 
61-     !  grep  -r -E --include ' *.pyx'   --include ' *.pxi.in'   ' [a-zA-Z0-9*]> '   pandas/_libs
84+     invgrep  -r -E --include ' *.pyx'   --include ' *.pxi.in'   ' [a-zA-Z0-9*]> '   pandas/_libs
6285    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
6386
6487    #  readability/casting: Warnings about C casting instead of C++ casting
@@ -88,43 +111,48 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
88111
89112    #  Check for imports from pandas.core.common instead of `import pandas.core.common as com`
90113    MSG=' Check for non-standard imports'   ;  echo  $MSG 
91-     !  grep  -R --include=" *.py*"   -E " from pandas.core.common import "   pandas
114+     invgrep  -R --include=" *.py*"   -E " from pandas.core.common import "   pandas
92115    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
93116
94117    MSG=' Check for pytest warns'   ;  echo  $MSG 
95-     !  grep  -r -E --include ' *.py'   ' pytest\.warns'   pandas/tests/
118+     invgrep  -r -E --include ' *.py'   ' pytest\.warns'   pandas/tests/
96119    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
97120
98121    #  Check for the following code in testing: `np.testing` and `np.array_equal`
99122    MSG=' Check for invalid testing'   ;  echo  $MSG 
100-     !  grep  -r -E --include ' *.py'   --exclude testing.py ' (numpy|np)(\.testing|\.array_equal)'   pandas/tests/
123+     invgrep  -r -E --include ' *.py'   --exclude testing.py ' (numpy|np)(\.testing|\.array_equal)'   pandas/tests/
101124    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
102125
103126    #  Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
104127    MSG=' Check for invalid EA testing'   ;  echo  $MSG 
105-     !  grep  -r -E --include ' *.py'   --exclude base.py ' tm.assert_(series|frame)_equal'   pandas/tests/extension/base
128+     invgrep  -r -E --include ' *.py'   --exclude base.py ' tm.assert_(series|frame)_equal'   pandas/tests/extension/base
106129    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
107130
108131    MSG=' Check for deprecated messages without sphinx directive'   ;  echo  $MSG 
109-     !  grep  -R --include=" *.py"   --include=" *.pyx"   -E " (DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)"   pandas
132+     invgrep  -R --include=" *.py"   --include=" *.pyx"   -E " (DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)"   pandas
110133    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
111134
112135    MSG=' Check for old-style classes'   ;  echo  $MSG 
113-     !  grep  -R --include=" *.py"   -E " class\s\S*[^)]:"   pandas scripts
136+     invgrep  -R --include=" *.py"   -E " class\s\S*[^)]:"   pandas scripts
114137    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
115138
116139    MSG=' Check for backticks incorrectly rendering because of missing spaces'   ;  echo  $MSG 
117-     !  grep  -R --include=" *.rst"   -E " [a-zA-Z0-9]\`\` ?[a-zA-Z0-9]"   doc/source/
140+     invgrep  -R --include=" *.rst"   -E " [a-zA-Z0-9]\`\` ?[a-zA-Z0-9]"   doc/source/
118141    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
119142
120143    MSG=' Check for incorrect sphinx directives'   ;  echo  $MSG 
121-     !  grep  -R --include=" *.py"   --include=" *.pyx"   --include=" *.rst"   -E " \.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]"   ./pandas ./doc/source
144+     invgrep  -R --include=" *.py"   --include=" *.pyx"   --include=" *.rst"   -E " \.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]"   ./pandas ./doc/source
122145    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
123146
124147    MSG=' Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)'   ;  echo  $MSG 
125-     !  grep  -R --exclude=* .pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
148+     invgrep  -R --exclude=* .pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
126149    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
127150
151+ fi 
152+ 
153+ # ## CODE ###
154+ if  [[ -z  " $CHECK "   ||  " $CHECK "   ==  " code"   ]];  then 
155+ 
128156    MSG=' Check for modules that pandas should not import'   ;  echo  $MSG 
129157    python -c " 
130158import sys 
@@ -135,7 +163,7 @@ blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis',
135163             'tables', 'xlrd', 'xlsxwriter', 'xlwt'} 
136164mods = blacklist & set(m.split('.')[0] for m in sys.modules) 
137165if mods: 
138-     sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods))) 
166+     sys.stderr.write('err:  pandas should not import: {}\n'.format(', '.join(mods))) 
139167    sys.exit(len(mods)) 
140168    "  
141169    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
@@ -157,7 +185,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
157185
158186    MSG=' Doctests generic.py'   ;  echo  $MSG 
159187    pytest -q --doctest-modules pandas/core/generic.py \
160-         -k" -_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs" 
188+         -k" -_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard " 
161189    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
162190
163191    MSG=' Doctests top-level reshaping functions'   ;  echo  $MSG 
@@ -178,11 +206,22 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
178206
179207fi 
180208
209+ # ## DOCSTRINGS ###
210+ if  [[ -z  " $CHECK "   ||  " $CHECK "   ==  " docstrings"   ]];  then 
211+ 
212+     MSG=' Validate docstrings (GL06, SS04, PR03, PR05, EX04)'   ;  echo  $MSG 
213+     $BASE_DIR /scripts/validate_docstrings.py --format=azure --errors=GL06,SS04,PR03,PR05,EX04
214+     RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
215+ 
216+ fi 
217+ 
181218# ## DEPENDENCIES ###
182219if  [[ -z  " $CHECK "   ||  " $CHECK "   ==  " dependencies"   ]];  then 
220+ 
183221    MSG=' Check that requirements-dev.txt has been generated from environment.yml'   ;  echo  $MSG 
184-     $BASE_DIR /scripts/generate_pip_deps_from_conda.py --compare
222+     $BASE_DIR /scripts/generate_pip_deps_from_conda.py --compare --azure 
185223    RET=$(( $RET  +  $? ))   ;  echo  $MSG  " DONE" 
224+ 
186225fi 
187226
188227exit  $RET 
0 commit comments