|
29 | 29 | import pandas.io.date_converters as conv |
30 | 30 | import pandas.io.parsers as parsers |
31 | 31 |
|
| 32 | +# constant |
| 33 | +_DEFAULT_DATETIME = datetime(1, 1, 1) |
| 34 | +# Strategy for hypothesis |
| 35 | +gen_random_datetime = st.dates( |
| 36 | + min_value=date(1900, 1, 1), # on Windows for %y need: year > 1900 |
| 37 | + max_value=date(9999, 12, 31) |
| 38 | +) |
| 39 | + |
32 | 40 |
|
33 | 41 | def test_separator_date_conflict(all_parsers): |
34 | 42 | # Regression test for gh-4678 |
@@ -854,77 +862,79 @@ def test_parse_timezone(all_parsers): |
854 | 862 | tm.assert_frame_equal(result, expected) |
855 | 863 |
|
856 | 864 |
|
857 | | -@pytest.mark.parametrize("datestring", [ |
| 865 | +@pytest.mark.parametrize("date_string", [ |
858 | 866 | "32/32/2019", |
859 | 867 | "02/30/2019", |
860 | 868 | "13/13/2019", |
861 | 869 | "13/2019", |
862 | 870 | "a3/11/2018", |
863 | 871 | "10/11/2o17" |
864 | 872 | ]) |
865 | | -def test_invalid_parse_delimited_date(all_parsers, datestring): |
| 873 | +def test_invalid_parse_delimited_date(all_parsers, date_string): |
866 | 874 | parser = all_parsers |
867 | | - expected = DataFrame({0: [datestring]}, dtype="object") |
868 | | - result = parser.read_csv(StringIO(datestring), |
| 875 | + expected = DataFrame({0: [date_string]}, dtype="object") |
| 876 | + result = parser.read_csv(StringIO(date_string), |
869 | 877 | header=None, parse_dates=[0]) |
870 | 878 | tm.assert_frame_equal(result, expected) |
871 | 879 |
|
872 | 880 |
|
873 | | -@pytest.mark.parametrize("date_format, delimiters", [ |
874 | | - ("%m %d %Y", " -.\\/"), |
875 | | - ("%m %Y", " -\\/") |
876 | | -]) |
877 | | -def test_parse_delimited_date(all_parsers, date_format, delimiters): |
878 | | - parser = all_parsers |
879 | | - date = datetime(2019, 4, 1) |
880 | | - data = '\n'.join(date.strftime(date_format.replace(' ', delim)) |
881 | | - for delim in delimiters) |
882 | | - expected = DataFrame({0: [date] * len(delimiters)}, dtype="datetime64[ns]") |
883 | | - result = parser.read_csv(StringIO(data), header=None, parse_dates=[0]) |
884 | | - tm.assert_frame_equal(result, expected) |
885 | | - |
886 | | - |
887 | | -@pytest.mark.parametrize("datestring,dayfirst,expected", [ |
888 | | - # DD/MM/YYYY; month > 12 thus replacement |
889 | | - ("13/02/2019", False, datetime(2019, 2, 13)), |
890 | | - ("13/02/2019", True, datetime(2019, 2, 13)), |
891 | | - ("02/13/2019", False, datetime(2019, 2, 13)), |
892 | | - ("02/13/2019", True, datetime(2019, 2, 13)), |
893 | | - # DD/MM/YYYY; dayfirst==True thus replacement |
894 | | - ("04/02/2019", True, datetime(2019, 2, 4)) |
| 881 | +@pytest.mark.parametrize("date_string,dayfirst,expected", [ |
| 882 | + # %d/%m/%Y; month > 12 thus replacement |
| 883 | + ("13\\02\\2019", False, datetime(2019, 2, 13)), |
| 884 | + ("13\\02\\2019", True, datetime(2019, 2, 13)), |
| 885 | + # %m/%d/%Y; day > 12 thus there will be no replacement |
| 886 | + ("02\\13\\2019", False, datetime(2019, 2, 13)), |
| 887 | + ("02\\13\\2019", True, datetime(2019, 2, 13)), |
| 888 | + # %d/%m/%Y; dayfirst==True thus replacement |
| 889 | + ("04\\02\\2019", True, datetime(2019, 2, 4)) |
895 | 890 | ]) |
896 | | -def test_parse_delimited_date_swap(all_parsers, datestring, |
| 891 | +def test_parse_delimited_date_swap(all_parsers, date_string, |
897 | 892 | dayfirst, expected): |
898 | 893 | parser = all_parsers |
899 | 894 | expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") |
900 | | - result = parser.read_csv(StringIO(datestring), header=None, |
| 895 | + result = parser.read_csv(StringIO(date_string), header=None, |
901 | 896 | dayfirst=dayfirst, parse_dates=[0]) |
902 | 897 | tm.assert_frame_equal(result, expected) |
903 | 898 |
|
904 | 899 |
|
905 | | -gen_random_datetime = st.dates( |
906 | | - min_value=date(1000, 1, 1), |
907 | | - max_value=date(9999, 12, 31) |
908 | | -) |
909 | | -_DEFAULT_DATETIME = datetime(1, 1, 1) |
| 900 | +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): |
| 901 | + msg, result = None, None |
| 902 | + try: |
| 903 | + result = call(date_string, **kwargs) |
| 904 | + except ValueError as er: |
| 905 | + msg = str(er) |
| 906 | + pass |
| 907 | + return msg, result |
910 | 908 |
|
911 | 909 |
|
912 | 910 | @given(gen_random_datetime) |
913 | 911 | @pytest.mark.parametrize("delimiter", list(" -./")) |
914 | 912 | @pytest.mark.parametrize("dayfirst", [True, False]) |
915 | 913 | @pytest.mark.parametrize("date_format", [ |
916 | | - "%m %d %Y", |
917 | 914 | "%d %m %Y", |
| 915 | + "%m %d %Y", |
918 | 916 | "%m %Y", |
919 | | - "%Y %m %d" |
| 917 | + "%Y %m %d", |
| 918 | + "%y %m %d", |
| 919 | + "%Y%m%d", |
| 920 | + "%y%m%d", |
920 | 921 | ]) |
921 | 922 | def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date): |
922 | 923 | if date_format == "%m %Y" and delimiter == ".": |
923 | 924 | # parse_datetime_string cannot reliably tell whether e.g. %m.%Y |
924 | 925 | # is a float or a date, thus we skip it |
925 | 926 | pytest.skip() |
| 927 | + result, expected = None, None |
| 928 | + except_in_dateutil, except_out_dateutil = None, None |
926 | 929 | date_string = date.strftime(date_format.replace(' ', delimiter)) |
927 | | - result = parse_datetime_string(date_string, dayfirst=dayfirst) |
928 | | - expected = du_parse(date_string, default=_DEFAULT_DATETIME, |
929 | | - dayfirst=dayfirst, yearfirst=False) |
| 930 | + |
| 931 | + except_out_dateutil, result = _helper_hypothesis_delimited_date( |
| 932 | + parse_datetime_string, date_string, |
| 933 | + dayfirst=dayfirst) |
| 934 | + except_in_dateutil, expected = _helper_hypothesis_delimited_date( |
| 935 | + du_parse, date_string, |
| 936 | + default=_DEFAULT_DATETIME, |
| 937 | + dayfirst=dayfirst, yearfirst=False) |
| 938 | + |
| 939 | + assert except_out_dateutil == except_in_dateutil |
930 | 940 | assert result == expected |
0 commit comments