diff --git a/continuous_integration/README.md b/continuous_integration/README.md index bd7c28f..11382f5 100644 --- a/continuous_integration/README.md +++ b/continuous_integration/README.md @@ -17,7 +17,7 @@ docker build -t daskdev/hdfs3dev continuous_integration/ - Start the container and wait for it to be ready: ```bash -source continuous_integration/startup_hdfs.sh +source continuous_integration/setup_hdfs.sh ``` - Start a bash session in the running container: @@ -36,3 +36,10 @@ docker exec -it $CONTAINER_ID bash python setup.py install py.test hdfs3 -s -vv ``` + +To run the tests on Python 2.7: + +```bash +/opt/conda/envs/py27/bin/python setup.py install +/opt/conda/envs/py27/bin/py.test hdfs3 -s -vv +``` diff --git a/hdfs3/core.py b/hdfs3/core.py index c97b9c3..b767c68 100644 --- a/hdfs3/core.py +++ b/hdfs3/core.py @@ -39,7 +39,7 @@ class HDFileSystem(object): _first_pid = None def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True, - pars=None, **kwargs): + pars=None, crc=True, **kwargs): """ Parameters ---------- @@ -52,6 +52,8 @@ def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True, autoconf: bool (True) Whether to use the configuration found in the conf module as the set of defaults + crc: bool (True) + Enable/disable CRC verification pars : {str: str} any parameters for hadoop, that you can find in hdfs-site.xml, https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml @@ -77,6 +79,8 @@ def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True, self.conf['host'] = host if port is not MyNone: self.conf['port'] = port + if not crc: + self.conf['input.read.default.verify'] = '0' self._handle = None diff --git a/hdfs3/tests/test_hdfs3.py b/hdfs3/tests/test_hdfs3.py index 5b6f46d..908dab9 100644 --- a/hdfs3/tests/test_hdfs3.py +++ b/hdfs3/tests/test_hdfs3.py @@ -78,6 +78,31 @@ def test_idempotent_connect(hdfs): hdfs.connect() +def test_disable_crc(): + hdfs = HDFileSystem(host=test_host, port=test_port, + pars={'rpc.client.connect.retry': '2'}, crc=False) + + assert 'input.read.default.verify' in hdfs.conf + + if hdfs.exists('/tmp/test'): + hdfs.rm('/tmp/test') + hdfs.mkdir('/tmp/test') + + data = b'a' * (10 * 2**20) + + with hdfs.open(a, 'wb', replication=1) as f: + f.write(data) + + with hdfs.open(a, 'rb') as f: + out = f.read(len(data)) + assert len(data) == len(out) + assert out == data + + if hdfs.exists('/tmp/test'): + hdfs.rm('/tmp/test', recursive=True) + hdfs.disconnect() + + def test_ls_touch(hdfs): assert not hdfs.ls('/tmp/test') hdfs.touch(a)