diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 8779fa8ac97..cb83554b30e 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -14,5 +14,4 @@ Reference Guide pip_show pip_search pip_wheel - - + pip_hash diff --git a/docs/reference/pip_hash.rst b/docs/reference/pip_hash.rst new file mode 100644 index 00000000000..72052bc22dc --- /dev/null +++ b/docs/reference/pip_hash.rst @@ -0,0 +1,49 @@ +.. _`pip hash`: + +pip hash +------------ + +.. contents:: + +Usage +***** + +.. pip-command-usage:: hash + + +Description +*********** + +.. pip-command-description:: hash + + +Overview +++++++++ +``pip hash`` is a convenient way to get a hash digest for use with +:ref:`hash-checking mode`, especially for packages with multiple archives. The +error message from ``pip install --require-hashes ...`` will give you one +hash, but, if there are multiple archives (like source and binary ones), you +will need to manually download and compute a hash for the others. Otherwise, a +spurious hash mismatch could occur when :ref:`pip install` is passed a +different set of options, like :ref:`--no-binary `. + + +Options +******* + +.. pip-command-options:: hash + + +Example +******** + +Compute the hash of a downloaded archive:: + + $ pip download SomePackage + Collecting SomePackage + Downloading SomePackage-2.2.tar.gz + Saved ./pip_downloads/SomePackage-2.2.tar.gz + Successfully downloaded SomePackage + $ pip hash ./pip_downloads/SomePackage-2.2.tar.gz + ./pip_downloads/SomePackage-2.2.tar.gz: + --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 diff --git a/docs/reference/pip_install.rst b/docs/reference/pip_install.rst index 489e0b09ff1..1137da64101 100644 --- a/docs/reference/pip_install.rst +++ b/docs/reference/pip_install.rst @@ -101,7 +101,7 @@ and the newline following it is effectively ignored. Comments are stripped *before* line continuations are processed. -Additionally, the following Package Index Options are supported: +The following options are supported: * :ref:`-i, --index-url <--index-url>` * :ref:`--extra-index-url <--extra-index-url>` @@ -109,6 +109,7 @@ Additionally, the following Package Index Options are supported: * :ref:`-f, --find-links <--find-links>` * :ref:`--no-binary ` * :ref:`--only-binary ` + * :ref:`--require-hashes <--require-hashes>` For example, to specify :ref:`--no-index <--no-index>` and 2 :ref:`--find-links <--find-links>` locations: @@ -380,8 +381,8 @@ See the :ref:`pip install Examples`. SSL Certificate Verification ++++++++++++++++++++++++++++ -Starting with v1.3, pip provides SSL certificate verification over https, for the purpose -of providing secure, certified downloads from PyPI. +Starting with v1.3, pip provides SSL certificate verification over https, to +prevent man-in-the-middle attacks against PyPI downloads. .. _`Caching`: @@ -389,7 +390,7 @@ of providing secure, certified downloads from PyPI. Caching +++++++ -Starting with v6.0, pip provides an on by default cache which functions +Starting with v6.0, pip provides an on-by-default cache which functions similarly to that of a web browser. While the cache is on by default and is designed do the right thing by default you can disable the cache and always access PyPI by utilizing the ``--no-cache-dir`` option. @@ -425,14 +426,14 @@ Windows .. _`Wheel cache`: -Wheel cache -*********** +Wheel Cache +~~~~~~~~~~~ -Pip will read from the subdirectory ``wheels`` within the pip cache dir and use -any packages found there. This is disabled via the same ``no-cache-dir`` option -that disables the HTTP cache. The internal structure of that cache is not part -of the pip API. As of 7.0 pip uses a subdirectory per sdist that wheels were -built from, and wheels within that subdirectory. +Pip will read from the subdirectory ``wheels`` within the pip cache directory +and use any packages found there. This is disabled via the same +``--no-cache-dir`` option that disables the HTTP cache. The internal structure +of that is not part of the pip API. As of 7.0, pip makes a subdirectory for +each sdist that wheels are built from and places the resulting wheels inside. Pip attempts to choose the best wheels from those built in preference to building a new wheel. Note that this means when a package has both optional @@ -445,19 +446,123 @@ When no wheels are found for an sdist, pip will attempt to build a wheel automatically and insert it into the wheel cache. -Hash Verification -+++++++++++++++++ - -PyPI provides md5 hashes in the hash fragment of package download urls. +.. _`hash-checking mode`: -pip supports checking this, as well as any of the -guaranteed hashlib algorithms (sha1, sha224, sha384, sha256, sha512, md5). - -The hash fragment is case sensitive (i.e. sha1 not SHA1). +Hash-Checking Mode +++++++++++++++++++ -This check is only intended to provide basic download corruption protection. -It is not intended to provide security against tampering. For that, -see :ref:`SSL Certificate Verification` +Since version 8.0, pip can check downloaded package archives against local +hashes to protect against remote tampering. To verify a package against one or +more hashes, add them to the end of the line:: + + FooProject == 1.2 --hash:sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 \ + --hash:sha256=486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7 + +(The ability to use multiple hashes is important when a package has both +binary and source distributions or when it offers binary distributions for a +variety of platforms.) + +The recommended hash algorithm at the moment is sha256, but stronger ones are +allowed, including all those supported by ``hashlib``. However, weaker ones +such as md5, sha1, and sha224 are excluded to avoid giving a false sense of +security. + +Hash verification is an all-or-nothing proposition. Specifying a ``--hash`` +against any requirement not only checks that hash but also activates a global +*hash-checking mode*, which imposes several other security restrictions: + +* Hashes are required for all requirements. This is because a partially-hashed + requirements file is of little use and thus likely an error: a malicious + actor could slip bad code into the installation via one of the unhashed + requirements. Note that hashes embedded in URL-style requirements via the + ``#md5=...`` syntax suffice to satisfy this rule (regardless of hash + strength, for legacy reasons), though you should use a stronger + hash like sha256 whenever possible. +* Hashes are required for all dependencies. An error results if there is a + dependency that is not spelled out and hashed in the requirements file. +* Requirements that take the form of project names (rather than URLs or local + filesystem paths) must be pinned to a specific version using ``==``. This + prevents a surprising hash mismatch upon the release of a new version + that matches the requirement specifier. +* ``--egg`` is disallowed, because it delegates installation of dependencies + to setuptools, giving up pip's ability to enforce any of the above. + +.. _`--require-hashes`: + +Hash-checking mode can be forced on with the ``--require-hashes`` command-line +option:: + + $ pip install --require-hashes -r requirements.txt + ... + Hashes are required in --require-hashes mode (implicitly on when a hash is + specified for any package). These requirements were missing hashes, + leaving them open to tampering. These are the hashes the downloaded + archives actually had. You can add lines like these to your requirements + files to prevent tampering. + pyelasticsearch==1.0 --hash=sha256:44ddfb1225054d7d6b1d02e9338e7d4809be94edbe9929a2ec0807d38df993fa + more-itertools==2.2 --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 + +This can be useful in deploy scripts, to ensure that the author of the +requirements file provided hashes. It is also a convenient way to bootstrap +your list of hashes, since it shows the hashes of the packages it fetched. It +fetches only the preferred archive for each package, so you may still need to +add hashes for alternatives archives using :ref:`pip hash`: for instance if +there is both a binary and a source distribution. + +The :ref:`wheel cache ` is disabled in hash-checking mode to +prevent spurious hash mismatch errors. These would otherwise occur while +installing sdists that had already been automatically built into cached wheels: +those wheels would be selected for installation, but their hashes would not +match the sdist ones from the requirements file. A further complication is that +locally built wheels are nondeterministic: contemporary modification times make +their way into the archive, making hashes unpredictable across machines and +cache flushes. Compilation of C code adds further nondeterminism, as many +compilers include random-seeded values in their output. However, wheels fetched +from index servers are the same every time. They land in pip's HTTP cache, not +its wheel cache, and are used normally in hash-checking mode. The only downside +of having the the wheel cache disabled is thus extra build time for sdists, and +this can be solved by making sure pre-built wheels are available from the index +server. + +Hash-checking mode also works with :ref:`pip download` and :ref:`pip wheel`. A +:ref:`comparison of hash-checking mode with other repeatability strategies +` is available in the User Guide. + +.. warning:: + Beware of the ``setup_requires`` keyword arg in :file:`setup.py`. The + (rare) packages that use it will cause those dependencies to be downloaded + by setuptools directly, skipping pip's hash-checking. If you need to use + such a package, see :ref:`Controlling + setup_requires`. + +.. warning:: + Be careful not to nullify all your security work when you install your + actual project by using setuptools directly: for example, by calling + ``python setup.py install``, ``python setup.py develop``, or + ``easy_install``. Setuptools will happily go out and download, unchecked, + anything you missed in your requirements file—and it’s easy to miss things + as your project evolves. To be safe, install your project using pip and + :ref:`--no-deps `. + + Instead of ``python setup.py develop``, use... :: + + pip install --no-deps -e . + + Instead of ``python setup.py install``, use... :: + + pip install --no-deps . + + +Hashes from PyPI +~~~~~~~~~~~~~~~~ + +PyPI provides an MD5 hash in the fragment portion of each package download URL, +like ``#md5=123...``, which pip checks as a protection against download +corruption. Other hash algorithms that have guaranteed support from ``hashlib`` +are also supported here: sha1, sha224, sha384, sha256, and sha512. Since this +hash originates remotely, it is not a useful guard against tampering and thus +does not satisfy the ``--require-hashes`` demand that every package have a +local hash. .. _`editable-installs`: diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 5a775b8dd3d..90f941cc20d 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -23,6 +23,8 @@ Specifiers` For more information and examples, see the :ref:`pip install` reference. +.. _PyPI: http://pypi.python.org/pypi + .. _`Requirements Files`: @@ -71,7 +73,6 @@ In practice, there are 4 common uses of Requirements files: pkg2 pkg3>=1.0,<=2.0 - 3. Requirements files are used to force pip to install an alternate version of a sub-dependency. For example, suppose `ProjectA` in your requirements file requires `ProjectB`, but the latest version (v1.3) has a bug, you can force @@ -591,44 +592,81 @@ From within a real python, where ``SomePackage`` *is* installed globally, and is Ensuring Repeatability ********************** -Four things are required to fully guarantee a repeatable installation using requirements files. +pip can achieve various levels of repeatability: + +Pinned Version Numbers +---------------------- + +Pinning the versions of your dependencies in the requirements file +protects you from bugs or incompatibilities in newly released versions:: + + SomePackage == 1.2.3 + DependencyOfSomePackage == 4.5.6 -1. The requirements file was generated by ``pip freeze`` or you're sure it only - contains requirements that specify a specific version. +Using :ref:`pip freeze` to generate the requirements file will ensure that not +only the top-level dependencies are included but their sub-dependencies as +well, and so on. Perform the installation using :ref:`--no-deps +` for an extra dose of insurance against installing +anything not explicitly listed. -2. The installation is performed using :ref:`--no-deps `. - This guarantees that only what is explicitly listed in the requirements file is - installed. +This strategy is easy to implement and works across OSes and architectures. +However, it trusts PyPI and the certificate authority chain. It +also relies on indices and find-links locations not allowing +packages to change without a version increase. (PyPI does protect +against this.) -3. None of the packages to be installed utilize the setup_requires keyword. See - :ref:`Controlling setup_requires`. +Hash-checking Mode +------------------ + +Beyond pinning version numbers, you can add hashes against which to verify +downloaded packages:: -4. The installation is performed against an index or find-links location that is - guaranteed to *not* allow archives to be changed and updated without a - version increase. While this is safe on PyPI, it may not be safe for other - indices. If you are working with an unsafe index, consider the `peep project - `_ which offers this feature on top of pip - using requirements file comments. + FooProject == 1.2 --hash:sha256=2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 +This protects against a compromise of PyPI or the HTTPS +certificate chain. It also guards against a package changing +without its version number changing (on indexes that allow this). +This approach is a good fit for automated server deployments. -.. _PyPI: http://pypi.python.org/pypi/ +Hash-checking mode is a labor-saving alternative to running a private index +server containing approved packages: it removes the need to upload packages, +maintain ACLs, and keep an audit trail (which a VCS gives you on the +requirements file for free). It can also substitute for a vendor library, +providing easier upgrades and less VCS noise. It does not, of course, +provide the availability benefits of a private index or a vendor library. +For more, see :ref:`pip install\'s discussion of hash-checking mode `. .. _`Installation Bundle`: -Create an Installation Bundle with Compiled Dependencies -******************************************************** +Installation Bundles +-------------------- -You can create a simple bundle that contains all of the dependencies you wish -to install using:: +Using :ref:`pip wheel`, you can bundle up all of a project's dependencies, with +any compilation done, into a single archive. This allows installation when +index servers are unavailable and avoids time-consuming recompilation. Create +an archive like this:: $ tempdir=$(mktemp -d /tmp/wheelhouse-XXXXX) $ pip wheel -r requirements.txt --wheel-dir=$tempdir $ cwd=`pwd` $ (cd "$tempdir"; tar -cjvf "$cwd/bundled.tar.bz2" *) -Once you have a bundle, you can then install it using:: +You can then install from the archive like this:: $ tempdir=$(mktemp -d /tmp/wheelhouse-XXXXX) $ (cd $tempdir; tar -xvf /path/to/bundled.tar.bz2) $ pip install --force-reinstall --ignore-installed --upgrade --no-index --no-deps $tempdir/* + +Note that compiled packages are typically OS- and architecture-specific, so +these archives are not necessarily portable across machines. + +Hash-checking mode can be used along with this method to ensure that future +archives are built with identical packages. + +.. warning:: + Finally, beware of the ``setup_requires`` keyword arg in :file:`setup.py`. + The (rare) packages that use it will cause those dependencies to be + downloaded by setuptools directly, skipping pip's protections. If you need + to use such a package, see :ref:`Controlling + setup_requires`. diff --git a/pip/basecommand.py b/pip/basecommand.py index 3627e9143fa..ce3ad40eb04 100644 --- a/pip/basecommand.py +++ b/pip/basecommand.py @@ -280,6 +280,9 @@ def populate_requirement_set(requirement_set, args, options, finder, wheel_cache=wheel_cache): found_req_in_file = True requirement_set.add_requirement(req) + # If --require-hashes was a line in a requirements file, tell + # RequirementSet about it: + requirement_set.require_hashes = options.require_hashes if not (args or options.editables or found_req_in_file): opts = {'name': name} diff --git a/pip/cmdoptions.py b/pip/cmdoptions.py index 889862ac4b5..73a805e598f 100644 --- a/pip/cmdoptions.py +++ b/pip/cmdoptions.py @@ -17,6 +17,7 @@ PyPI, FormatControl, fmt_ctl_handle_mutual_exclude, fmt_ctl_no_binary, fmt_ctl_no_use_wheel) from pip.locations import CA_BUNDLE_PATH, USER_CACHE_DIR, src_prefix +from pip.utils.hashes import STRONG_HASHES def make_option_group(group, parser): @@ -523,6 +524,47 @@ def only_binary(): ) +def _merge_hash(option, opt_str, value, parser): + """Given a value spelled "algo:digest", append the digest to a list + pointed to in a dict by the algo name.""" + if not parser.values.hashes: + parser.values.hashes = {} + try: + algo, digest = value.split(':', 1) + except ValueError: + parser.error('Arguments to %s must be a hash name ' + 'followed by a value, like --hash=sha256:abcde...' % + opt_str) + if algo not in STRONG_HASHES: + parser.error('Allowed hash algorithms for %s are %s.' % + (opt_str, ', '.join(STRONG_HASHES))) + parser.values.hashes.setdefault(algo, []).append(digest) + + +hash = partial( + Option, + '--hash', + # Hash values eventually end up in InstallRequirement.hashes due to + # __dict__ copying in process_line(). + dest='hashes', + action='callback', + callback=_merge_hash, + type='string', + help="Verify that the package's archive matches this " + 'hash before installing. Example: --hash=sha256:abcdef...') + + +require_hashes = partial( + Option, + '--require-hashes', + dest='require_hashes', + action='store_true', + default=False, + help='Require a hash to check each requirement against, for ' + 'repeatable installs. This option is implied when any package in a ' + 'requirements file has a --hash option.') + + ########## # groups # ########## diff --git a/pip/commands/__init__.py b/pip/commands/__init__.py index dc3418f1038..6910f517be5 100644 --- a/pip/commands/__init__.py +++ b/pip/commands/__init__.py @@ -6,6 +6,7 @@ from pip.commands.completion import CompletionCommand from pip.commands.download import DownloadCommand from pip.commands.freeze import FreezeCommand +from pip.commands.hash import HashCommand from pip.commands.help import HelpCommand from pip.commands.list import ListCommand from pip.commands.search import SearchCommand @@ -18,6 +19,7 @@ commands_dict = { CompletionCommand.name: CompletionCommand, FreezeCommand.name: FreezeCommand, + HashCommand.name: HashCommand, HelpCommand.name: HelpCommand, SearchCommand.name: SearchCommand, ShowCommand.name: ShowCommand, @@ -38,6 +40,7 @@ ShowCommand, SearchCommand, WheelCommand, + HashCommand, HelpCommand, ] diff --git a/pip/commands/download.py b/pip/commands/download.py index 0dcf1ea3ada..e1af9c3a52e 100644 --- a/pip/commands/download.py +++ b/pip/commands/download.py @@ -54,8 +54,9 @@ def __init__(self, *args, **kw): cmd_opts.add_option(cmdoptions.no_binary()) cmd_opts.add_option(cmdoptions.only_binary()) cmd_opts.add_option(cmdoptions.src()) - cmd_opts.add_option(cmdoptions.no_clean()) cmd_opts.add_option(cmdoptions.pre()) + cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) cmd_opts.add_option( '-d', '--dest', '--destination-dir', '--destination-directory', @@ -104,6 +105,7 @@ def run(self, options, args): ignore_dependencies=options.ignore_dependencies, session=session, isolated=options.isolated_mode, + require_hashes=options.require_hashes ) self.populate_requirement_set( requirement_set, diff --git a/pip/commands/hash.py b/pip/commands/hash.py new file mode 100644 index 00000000000..39ca37c8d0f --- /dev/null +++ b/pip/commands/hash.py @@ -0,0 +1,58 @@ +from __future__ import absolute_import + +import hashlib +import logging +import sys + +from pip.basecommand import Command +from pip.status_codes import ERROR +from pip.utils import read_chunks +from pip.utils.hashes import FAVORITE_HASH, STRONG_HASHES + + +logger = logging.getLogger(__name__) + + +class HashCommand(Command): + """ + Compute a hash of a local package archive. + + These can be used with --hash in a requirements file to do repeatable + installs. + + """ + name = 'hash' + usage = '%prog [options] ...' + summary = 'Compute hashes of package archives.' + + def __init__(self, *args, **kw): + super(HashCommand, self).__init__(*args, **kw) + self.cmd_opts.add_option( + '-a', '--algorithm', + dest='algorithm', + choices=STRONG_HASHES, + action='store', + default=FAVORITE_HASH, + help='The hash algorithm to use: one of %s' % + ', '.join(STRONG_HASHES)) + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options, args): + if not args: + self.parser.print_usage(sys.stderr) + return ERROR + + algorithm = options.algorithm + for path in args: + logger.info('%s:\n--hash=%s:%s' % (path, + algorithm, + _hash_of_file(path, algorithm))) + + +def _hash_of_file(path, algorithm): + """Return the hash digest of a file.""" + with open(path, 'rb') as archive: + hash = hashlib.new(algorithm) + for chunk in read_chunks(archive): + hash.update(chunk) + return hash.hexdigest() diff --git a/pip/commands/install.py b/pip/commands/install.py index 46cd9f22ef2..faab4aad37a 100644 --- a/pip/commands/install.py +++ b/pip/commands/install.py @@ -156,8 +156,8 @@ def __init__(self, *args, **kw): cmd_opts.add_option(cmdoptions.no_binary()) cmd_opts.add_option(cmdoptions.only_binary()) cmd_opts.add_option(cmdoptions.pre()) - cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) index_opts = cmdoptions.make_option_group( cmdoptions.index_group, @@ -266,6 +266,7 @@ def run(self, options, args): pycompile=options.compile, isolated=options.isolated_mode, wheel_cache=wheel_cache, + require_hashes=options.require_hashes, ) self.populate_requirement_set( diff --git a/pip/commands/wheel.py b/pip/commands/wheel.py index 0af377cc4ad..33dab46fe26 100644 --- a/pip/commands/wheel.py +++ b/pip/commands/wheel.py @@ -92,6 +92,7 @@ def __init__(self, *args, **kw): ) cmd_opts.add_option(cmdoptions.no_clean()) + cmd_opts.add_option(cmdoptions.require_hashes()) index_opts = cmdoptions.make_option_group( cmdoptions.index_group, @@ -159,7 +160,6 @@ def run(self, options, args): options.build_dir = os.path.abspath(options.build_dir) with self._build_session(options) as session: - finder = self._build_package_finder(options, session) build_delete = (not (options.no_clean or options.build_dir)) wheel_cache = WheelCache(options.cache_dir, options.format_control) @@ -174,7 +174,8 @@ def run(self, options, args): isolated=options.isolated_mode, session=session, wheel_cache=wheel_cache, - wheel_download_dir=options.wheel_dir + wheel_download_dir=options.wheel_dir, + require_hashes=options.require_hashes ) self.populate_requirement_set( diff --git a/pip/download.py b/pip/download.py index a083316464f..9dd3a42a4ba 100644 --- a/pip/download.py +++ b/pip/download.py @@ -2,7 +2,6 @@ import cgi import email.utils -import hashlib import getpass import json import logging @@ -29,7 +28,7 @@ from pip.models import PyPI from pip.utils import (splitext, rmtree, format_size, display_path, backup_dir, ask_path_exists, unpack_file, - call_subprocess, ARCHIVE_EXTENSIONS) + ARCHIVE_EXTENSIONS, consume, call_subprocess) from pip.utils.filesystem import check_path_owner from pip.utils.logging import indent_log from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner @@ -339,7 +338,7 @@ def __init__(self, *args, **kwargs): # We want to _only_ cache responses on securely fetched origins. We do # this because we can't validate the response of an insecurely fetched # origin, and we don't want someone to be able to poison the cache and - # require manual evication from the cache to fix it. + # require manual eviction from the cache to fix it. if cache: secure_adapter = CacheControlAdapter( cache=SafeFileCache(cache, use_dir_lock=True), @@ -485,57 +484,22 @@ def is_file_url(link): return link.url.lower().startswith('file:') -def _check_hash(download_hash, link): - if download_hash.digest_size != hashlib.new(link.hash_name).digest_size: - logger.critical( - "Hash digest size of the package %d (%s) doesn't match the " - "expected hash name %s!", - download_hash.digest_size, link, link.hash_name, - ) - raise HashMismatch('Hash name mismatch for package %s' % link) - if download_hash.hexdigest() != link.hash: - logger.critical( - "Hash of the package %s (%s) doesn't match the expected hash %s!", - link, download_hash.hexdigest(), link.hash, - ) - raise HashMismatch( - 'Bad %s hash for package %s' % (link.hash_name, link) - ) +def is_dir_url(link): + """Return whether a file:// Link points to a directory. + ``link`` must not have any other scheme but file://. Call is_file_url() + first. -def _get_hash_from_file(target_file, link): - try: - download_hash = hashlib.new(link.hash_name) - except (ValueError, TypeError): - logger.warning( - "Unsupported hash name %s for package %s", link.hash_name, link, - ) - return None - - with open(target_file, 'rb') as fp: - while True: - chunk = fp.read(4096) - if not chunk: - break - download_hash.update(chunk) - return download_hash + """ + link_path = url_to_path(link.url_without_fragment) + return os.path.isdir(link_path) def _progress_indicator(iterable, *args, **kwargs): return iterable -def _download_url(resp, link, content_file): - download_hash = None - if link.hash and link.hash_name: - try: - download_hash = hashlib.new(link.hash_name) - except ValueError: - logger.warning( - "Unsupported hash name %s for package %s", - link.hash_name, link, - ) - +def _download_url(resp, link, content_file, hashes): try: total_length = int(resp.headers['content-length']) except (ValueError, KeyError, TypeError): @@ -561,7 +525,7 @@ def resp_read(chunk_size): # Special case for urllib3. for chunk in resp.raw.stream( chunk_size, - # We use decode_content=False here because we do + # We use decode_content=False here because we don't # want urllib3 to mess with the raw bytes we get # from the server. If we decompress inside of # urllib3 then we cannot verify the checksum @@ -593,6 +557,11 @@ def resp_read(chunk_size): break yield chunk + def written_chunks(chunks): + for chunk in chunks: + content_file.write(chunk) + yield chunk + progress_indicator = _progress_indicator if link.netloc == PyPI.netloc: @@ -602,12 +571,8 @@ def resp_read(chunk_size): if show_progress: # We don't show progress on cached responses if total_length: - logger.info( - "Downloading %s (%s)", url, format_size(total_length), - ) - progress_indicator = DownloadProgressBar( - max=total_length, - ).iter + logger.info("Downloading %s (%s)", url, format_size(total_length)) + progress_indicator = DownloadProgressBar(max=total_length).iter else: logger.info("Downloading %s", url) progress_indicator = DownloadProgressSpinner().iter @@ -618,13 +583,12 @@ def resp_read(chunk_size): logger.debug('Downloading from URL %s', link) - for chunk in progress_indicator(resp_read(4096), 4096): - if download_hash is not None: - download_hash.update(chunk) - content_file.write(chunk) - if link.hash and link.hash_name: - _check_hash(download_hash, link) - return download_hash + downloaded_chunks = written_chunks(progress_indicator(resp_read(4096), + 4096)) + if hashes: + hashes.check_against_chunks(downloaded_chunks) + else: + consume(downloaded_chunks) def _copy_file(filename, location, content_type, link): @@ -652,7 +616,8 @@ def _copy_file(filename, location, content_type, link): logger.info('Saved %s', display_path(download_location)) -def unpack_http_url(link, location, download_dir=None, session=None): +def unpack_http_url(link, location, download_dir=None, + session=None, hashes=None): if session is None: raise TypeError( "unpack_http_url() missing 1 required keyword argument: 'session'" @@ -663,14 +628,19 @@ def unpack_http_url(link, location, download_dir=None, session=None): # If a download dir is specified, is the file already downloaded there? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir(link, download_dir) + already_downloaded_path = _check_download_dir(link, + download_dir, + hashes) if already_downloaded_path: from_path = already_downloaded_path content_type = mimetypes.guess_type(from_path)[0] else: # let's download to a tmp dir - from_path, content_type = _download_http_url(link, session, temp_dir) + from_path, content_type = _download_http_url(link, + session, + temp_dir, + hashes) # unpack the archive to the build dir location. even when only downloading # archives, they have to be unpacked to parse dependencies @@ -685,15 +655,16 @@ def unpack_http_url(link, location, download_dir=None, session=None): rmtree(temp_dir) -def unpack_file_url(link, location, download_dir=None): +def unpack_file_url(link, location, download_dir=None, hashes=None): """Unpack link into location. - If download_dir is provided and link points to a file, make a copy - of the link file inside download_dir.""" + If download_dir is provided and link points to a file, make a copy + of the link file inside download_dir. + """ link_path = url_to_path(link.url_without_fragment) # If it's a url to a local directory - if os.path.isdir(link_path): + if is_dir_url(link): if os.path.isdir(location): rmtree(location) shutil.copytree(link_path, location, symlinks=True) @@ -701,15 +672,20 @@ def unpack_file_url(link, location, download_dir=None): logger.info('Link is a directory, ignoring download_dir') return - # if link has a hash, let's confirm it matches - if link.hash: - link_path_hash = _get_hash_from_file(link_path, link) - _check_hash(link_path_hash, link) + # If --require-hashes is off, `hashes` is either empty, the + # link's embeddded hash, or MissingHashes; it is required to + # match. If --require-hashes is on, we are satisfied by any + # hash in `hashes` matching: a URL-based or an option-based + # one; no internet-sourced hash will be in `hashes`. + if hashes: + hashes.check_against_path(link_path) # If a download dir is specified, is the file already there and valid? already_downloaded_path = None if download_dir: - already_downloaded_path = _check_download_dir(link, download_dir) + already_downloaded_path = _check_download_dir(link, + download_dir, + hashes) if already_downloaded_path: from_path = already_downloaded_path @@ -795,7 +771,7 @@ def request(self, host, handler, request_body, verbose=False): def unpack_url(link, location, download_dir=None, - only_download=False, session=None): + only_download=False, session=None, hashes=None): """Unpack link. If link is a VCS link: if only_download, export into download_dir and ignore location @@ -804,6 +780,11 @@ def unpack_url(link, location, download_dir=None, - unpack into location - if download_dir, copy the file into download_dir - if only_download, mark location for deletion + + :param hashes: A Hashes object, one of whose embedded hashes must match, + or HashMismatch will be raised. If the Hashes is empty, no matches are + required, and unhashable types of requirements (like VCS ones, which + would ordinarily raise HashUnsupported) are allowed. """ # non-editable vcs urls if is_vcs_url(link): @@ -811,7 +792,7 @@ def unpack_url(link, location, download_dir=None, # file urls elif is_file_url(link): - unpack_file_url(link, location, download_dir) + unpack_file_url(link, location, download_dir, hashes=hashes) # http urls else: @@ -823,12 +804,13 @@ def unpack_url(link, location, download_dir=None, location, download_dir, session, + hashes=hashes ) if only_download: write_delete_marker_file(location) -def _download_http_url(link, session, temp_dir): +def _download_http_url(link, session, temp_dir, hashes): """Download link url into temp_dir using provided session""" target_url = link.url.split('#', 1)[0] try: @@ -883,11 +865,11 @@ def _download_http_url(link, session, temp_dir): filename += ext file_path = os.path.join(temp_dir, filename) with open(file_path, 'wb') as content_file: - _download_url(resp, link, content_file) + _download_url(resp, link, content_file, hashes) return file_path, content_type -def _check_download_dir(link, download_dir): +def _check_download_dir(link, download_dir, hashes): """ Check download_dir for previously downloaded file with correct hash If a correct file is found return its path else None """ @@ -895,14 +877,13 @@ def _check_download_dir(link, download_dir): if os.path.exists(download_path): # If already downloaded, does its hash match? logger.info('File was already downloaded %s', download_path) - if link.hash: - download_hash = _get_hash_from_file(download_path, link) + if hashes: try: - _check_hash(download_hash, link) + hashes.check_against_path(download_path) except HashMismatch: logger.warning( - 'Previously-downloaded file %s has bad hash, ' - 're-downloading.', + 'Previously-downloaded file %s has bad hash. ' + 'Re-downloading.', download_path ) os.unlink(download_path) diff --git a/pip/exceptions.py b/pip/exceptions.py index a4cb15e1318..e9b639f54bc 100644 --- a/pip/exceptions.py +++ b/pip/exceptions.py @@ -1,6 +1,10 @@ """Exceptions used throughout package""" from __future__ import absolute_import +from itertools import chain, groupby, repeat + +from pip._vendor.six import iteritems + class PipError(Exception): """Base pip exception""" @@ -39,13 +43,194 @@ class PreviousBuildDirError(PipError): """Raised when there's a previous conflicting build directory""" -class HashMismatch(InstallationError): - """Distribution file hash values don't match.""" - - class InvalidWheelFilename(InstallationError): """Invalid wheel filename.""" class UnsupportedWheel(InstallationError): """Unsupported wheel.""" + + +class HashErrors(InstallationError): + """Multiple HashError instances rolled into one for reporting""" + + def __init__(self): + self.errors = [] + + def append(self, error): + self.errors.append(error) + + def __str__(self): + lines = [] + self.errors.sort(key=lambda e: e.order) + for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__): + lines.append(cls.head) + lines.extend(e.body() for e in errors_of_cls) + if lines: + return '\n'.join(lines) + + def __nonzero__(self): + return bool(self.errors) + + def __bool__(self): + return self.__nonzero__() + + +class HashError(InstallationError): + """ + A failure to verify a package against known-good hashes + + :cvar order: An int sorting hash exception classes by difficulty of + recovery (lower being harder), so the user doesn't bother fretting + about unpinned packages when he has deeper issues, like VCS + dependencies, to deal with. Also keeps error reports in a + deterministic order. + :cvar head: A section heading for display above potentially many + exceptions of this kind + :ivar req: The InstallRequirement that triggered this error. This is + pasted on after the exception is instantiated, because it's not + typically available earlier. + + """ + req = None + head = '' + + def body(self): + """Return a summary of me for display under the heading. + + This default implementation simply prints a description of the + triggering requirement. + + :param req: The InstallRequirement that provoked this error, with + populate_link() having already been called + + """ + return ' %s' % self._requirement_name() + + def __str__(self): + return '%s\n%s' % (self.head, self.body()) + + def _requirement_name(self): + """Return a description of the requirement that triggered me. + + This default implementation returns long description of the req, with + line numbers + + """ + return str(self.req) if self.req else 'unknown package' + + +class VcsHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 0 + head = ("Can't verify hashes for these requirements because we don't " + "have a way to hash version control repositories:") + + +class DirectoryUrlHashUnsupported(HashError): + """A hash was provided for a version-control-system-based requirement, but + we don't have a method for hashing those.""" + + order = 1 + head = ("Can't verify hashes for these file:// requirements because they " + "point to directories:") + + +class HashMissing(HashError): + """A hash was needed for a requirement but is absent.""" + + order = 2 + head = ('Hashes are required in --require-hashes mode, but they are ' + 'missing from some requirements. Here is a list of those ' + 'requirements along with the hashes their downloaded archives ' + 'actually had. Add lines like these to your requirements files to ' + 'prevent tampering. (If you did not enable --require-hashes ' + 'manually, note that it turns on automatically when any package ' + 'has a hash.)') + + def __init__(self, gotten_hash): + """ + :param gotten_hash: The hash of the (possibly malicious) archive we + just downloaded + """ + self.gotten_hash = gotten_hash + + def body(self): + from pip.utils.hashes import FAVORITE_HASH # Dodge circular import. + + package_name = (self.req.req if self.req and + # In case someone feeds something + # downright stupid to + # InstallRequirement's constructor: + getattr(self.req, 'req', None) + else 'unknown package') + return ' %s --hash=%s:%s' % (package_name, + FAVORITE_HASH, + self.gotten_hash) + + +class HashUnpinned(HashError): + """A requirement had a hash specified but was not pinned to a specific + version.""" + + order = 3 + head = ('In --require-hashes mode, all requirements must have their ' + 'versions pinned with ==. These do not:') + + +class HashMismatch(HashError): + """ + Distribution file hash values don't match. + + :ivar package_name: The name of the package that triggered the hash + mismatch. Feel free to write to this after the exception is raise to + improve its error message. + + """ + order = 4 + head = ('THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS ' + 'FILE. If you have updated the package versions, please update ' + 'the hashes. Otherwise, examine the package contents carefully; ' + 'someone may have tampered with them.') + + def __init__(self, allowed, gots): + """ + :param allowed: A dict of algorithm names pointing to lists of allowed + hex digests + :param gots: A dict of algorithm names pointing to hashes we + actually got from the files under suspicion + """ + self.allowed = allowed + self.gots = gots + + def body(self): + return ' %s:\n%s' % (self._requirement_name(), + self._hash_comparison()) + + def _hash_comparison(self): + """ + Return a comparison of actual and expected hash values. + + Example:: + + Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde + or 123451234512345123451234512345123451234512345 + Got bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef + + """ + def hash_then_or(hash_name): + # For now, all the decent hashes have 6-char names, so we can get + # away with hard-coding space literals. + return chain([hash_name], repeat(' or')) + + lines = [] + for hash_name, expecteds in iteritems(self.allowed): + prefix = hash_then_or(hash_name) + lines.extend((' Expected %s %s' % (next(prefix), e)) + for e in expecteds) + lines.append(' Got %s\n' % + self.gots[hash_name].hexdigest()) + prefix = ' or' + return '\n'.join(lines) diff --git a/pip/req/req_file.py b/pip/req/req_file.py index 7a1aa53e7bd..defbd7aaf04 100644 --- a/pip/req/req_file.py +++ b/pip/req/req_file.py @@ -46,12 +46,14 @@ cmdoptions.pre, cmdoptions.process_dependency_links, cmdoptions.trusted_host, + cmdoptions.require_hashes, ] # options to be passed to requirements SUPPORTED_OPTIONS_REQ = [ cmdoptions.install_options, - cmdoptions.global_options + cmdoptions.global_options, + cmdoptions.hash, ] # the 'dest' string values @@ -122,6 +124,7 @@ def process_line(line, filename, line_number, finder=None, comes_from=None, affect the finder. :param constraint: If True, parsing a constraints file. + :param options: OptionParser options that we may update """ parser = build_parser() defaults = parser.get_default_values() @@ -186,6 +189,10 @@ def process_line(line, filename, line_number, finder=None, comes_from=None, for req in parser: yield req + # percolate hash-checking option upward + elif opts.require_hashes: + options.require_hashes = opts.require_hashes + # set finder options elif finder: if opts.allow_external: diff --git a/pip/req/req_install.py b/pip/req/req_install.py index b2378ef9691..0adf632d159 100644 --- a/pip/req/req_install.py +++ b/pip/req/req_install.py @@ -32,6 +32,7 @@ call_subprocess, read_text_file, FakeFile, _make_build_dir, ensure_dir, get_installed_version, canonicalize_name ) +from pip.utils.hashes import Hashes from pip.utils.logging import indent_log from pip.req.req_uninstall import UninstallPathSet from pip.vcs import vcs @@ -76,7 +77,7 @@ def __init__(self, req, comes_from, source_dir=None, editable=False, self.editable_options = editable_options self._wheel_cache = wheel_cache - self.link = link + self.link = self.original_link = link self.as_egg = as_egg self.markers = markers self._egg_info_path = None @@ -238,33 +239,40 @@ def __repr__(self): return '<%s object: %s editable=%r>' % ( self.__class__.__name__, str(self), self.editable) - def populate_link(self, finder, upgrade): + def populate_link(self, finder, upgrade, require_hashes): """Ensure that if a link can be found for this, that it is found. Note that self.link may still be None - if Upgrade is False and the requirement is already installed. + + If require_hashes is True, don't use the wheel cache, because cached + wheels, always built locally, have different hashes than the files + downloaded from the index server and thus throw false hash mismatches. + Furthermore, cached wheels at present have undeterministic contents due + to file modification times. """ if self.link is None: self.link = finder.find_requirement(self, upgrade) - - @property - def link(self): - return self._link - - @link.setter - def link(self, link): - # Lookup a cached wheel, if possible. - if self._wheel_cache is None: - self._link = link - else: - self._link = self._wheel_cache.cached_wheel(link, self.name) - if self._link != link: - logger.debug('Using cached wheel link: %s', self._link) + if self._wheel_cache is not None and not require_hashes: + old_link = self.link + self.link = self._wheel_cache.cached_wheel(self.link, self.name) + if old_link != self.link: + logger.debug('Using cached wheel link: %s', self.link) @property def specifier(self): return self.req.specifier + @property + def is_pinned(self): + """Return whether I am pinned to an exact version. + + For example, some-package==1.2 is pinned; some-package>1.2 is not. + """ + specifiers = self.specifier + return (len(specifiers) == 1 and + next(iter(specifiers)).operator in ('==', '===')) + def from_path(self): if self.req is None: return None @@ -1014,6 +1022,37 @@ def get_dist(self): project_name=dist_name, metadata=metadata) + @property + def has_hash_options(self): + """Return whether any known-good hashes are specified as options. + + These activate --require-hashes mode; hashes specified as part of a + URL do not. + + """ + return bool(self.options.get('hashes', {})) + + def hashes(self, trust_internet=True): + """Return a hash-comparer that considers my option- and URL-based + hashes to be known-good. + + Hashes in URLs--ones embedded in the requirements file, not ones + downloaded from an index server--are almost peers with ones from + flags. They satisfy --require-hashes (whether it was implicitly or + explicitly activated) but do not activate it. md5 and sha224 are not + allowed in flags, which should nudge people toward good algos. We + always OR all hashes together, even ones from URLs. + + :param trust_internet: Whether to trust URL-based (#md5=...) hashes + downloaded from the internet, as by populate_link() + + """ + good_hashes = self.options.get('hashes', {}).copy() + link = self.link if trust_internet else self.original_link + if link and link.hash: + good_hashes.setdefault(link.hash_name, []).append(link.hash) + return Hashes(good_hashes) + def _strip_postfix(req): """ diff --git a/pip/req/req_set.py b/pip/req/req_set.py index 382a432f946..10312dff229 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -1,8 +1,7 @@ from __future__ import absolute_import from collections import defaultdict -import functools -import itertools +from itertools import chain import logging import os @@ -10,12 +9,16 @@ from pip._vendor import requests from pip.compat import expanduser -from pip.download import (url_to_path, unpack_url) +from pip.download import (is_file_url, is_dir_url, is_vcs_url, url_to_path, + unpack_url) from pip.exceptions import (InstallationError, BestVersionAlreadyInstalled, - DistributionNotFound, PreviousBuildDirError) + DistributionNotFound, PreviousBuildDirError, + HashError, HashErrors, HashUnpinned, + DirectoryUrlHashUnsupported, VcsHashUnsupported) from pip.req.req_install import InstallRequirement from pip.utils import ( display_path, dist_in_usersite, ensure_dir, normalize_path) +from pip.utils.hashes import MissingHashes from pip.utils.logging import indent_log from pip.vcs import vcs @@ -141,7 +144,7 @@ def __init__(self, build_dir, src_dir, download_dir, upgrade=False, ignore_dependencies=False, force_reinstall=False, use_user_site=False, session=None, pycompile=True, isolated=False, wheel_download_dir=None, - wheel_cache=None): + wheel_cache=None, require_hashes=False): """Create a RequirementSet. :param wheel_download_dir: Where still-packed .whl files should be @@ -187,6 +190,7 @@ def __init__(self, build_dir, src_dir, download_dir, upgrade=False, wheel_download_dir = normalize_path(wheel_download_dir) self.wheel_download_dir = wheel_download_dir self._wheel_cache = wheel_cache + self.require_hashes = require_hashes # Maps from install_req -> dependencies_of_install_req self._dependencies = defaultdict(list) @@ -316,23 +320,6 @@ def uninstall(self, auto_confirm=False): req.uninstall(auto_confirm=auto_confirm) req.commit_uninstall() - def _walk_req_to_install(self, handler): - """Call handler for all pending reqs. - - :param handler: Handle a single requirement. Should take a requirement - to install. Can optionally return an iterable of additional - InstallRequirements to cover. - """ - # The list() here is to avoid potential mutate-while-iterating bugs. - discovered_reqs = [] - reqs = itertools.chain( - list(self.unnamed_requirements), list(self.requirements.values()), - discovered_reqs) - for req_to_install in reqs: - more_reqs = handler(req_to_install) - if more_reqs: - discovered_reqs.extend(more_reqs) - def prepare_files(self, finder): """ Prepare process. Create temp directories, download and/or unpack files. @@ -341,8 +328,36 @@ def prepare_files(self, finder): if self.wheel_download_dir: ensure_dir(self.wheel_download_dir) - self._walk_req_to_install( - functools.partial(self._prepare_file, finder)) + # If any top-level requirement has a hash specified, enter + # hash-checking mode, which requires hashes from all. + root_reqs = self.unnamed_requirements + self.requirements.values() + require_hashes = (self.require_hashes or + any(req.has_hash_options for req in root_reqs)) + if require_hashes and self.as_egg: + raise InstallationError( + '--egg is not allowed with --require-hashes mode, since it ' + 'delegates dependency resolution to setuptools and could thus ' + 'result in installation of unhashed packages.') + + # Actually prepare the files, and collect any exceptions. Most hash + # exceptions cannot be checked ahead of time, because + # req.populate_link() needs to be called before we can make decisions + # based on link type. + discovered_reqs = [] + hash_errors = HashErrors() + for req in chain(root_reqs, discovered_reqs): + try: + discovered_reqs.extend(self._prepare_file( + finder, + req, + require_hashes=require_hashes, + ignore_dependencies=self.ignore_dependencies)) + except HashError as exc: + exc.req = req + hash_errors.append(exc) + + if hash_errors: + raise hash_errors def _check_skip_installed(self, req_to_install, finder): """Check if req_to_install should be skipped. @@ -396,10 +411,14 @@ def _check_skip_installed(self, req_to_install, finder): else: return None - def _prepare_file(self, finder, req_to_install): - """Prepare a single requirements files. + def _prepare_file(self, + finder, + req_to_install, + require_hashes=False, + ignore_dependencies=False): + """Prepare a single requirements file. - :return: A list of addition InstallRequirements to also install. + :return: A list of additional InstallRequirements to also install. """ # Tell user what we are doing for this requirement: # obtain (editable), skipping, processing (local url), collecting @@ -409,6 +428,9 @@ def _prepare_file(self, finder, req_to_install): req_to_install.prepared = True + # ###################### # + # # print log messages # # + # ###################### # if req_to_install.editable: logger.info('Obtaining %s', req_to_install) else: @@ -440,6 +462,11 @@ def _prepare_file(self, finder, req_to_install): # # vcs update or unpack archive # # # ################################ # if req_to_install.editable: + if require_hashes: + raise InstallationError( + 'The editable requirement %s cannot be installed when ' + 'requiring hashes, because there is no single file to ' + 'hash.' % req_to_install) req_to_install.ensure_has_source_dir(self.src_dir) req_to_install.update_editable(not self.is_download) abstract_dist = make_abstract_dist(req_to_install) @@ -447,6 +474,12 @@ def _prepare_file(self, finder, req_to_install): if self.is_download: req_to_install.archive(self.download_dir) elif req_to_install.satisfied_by: + if require_hashes: + logger.debug( + 'Since it is already installed, we are trusting this ' + 'package without checking its hash. To ensure a ' + 'completely repeatable environment, install into an ' + 'empty virtualenv.') abstract_dist = Installed(req_to_install) else: # @@ if filesystem packages are not marked @@ -469,7 +502,8 @@ def _prepare_file(self, finder, req_to_install): "can delete this. Please delete it and try again." % (req_to_install, req_to_install.source_dir) ) - req_to_install.populate_link(finder, self.upgrade) + req_to_install.populate_link( + finder, self.upgrade, require_hashes) # We can't hit this spot and have populate_link return None. # req_to_install.satisfied_by is None here (because we're # guarded) and upgrade has no impact except when satisfied_by @@ -478,6 +512,41 @@ def _prepare_file(self, finder, req_to_install): # If no new versions are found, DistributionNotFound is raised, # otherwise a result is guaranteed. assert req_to_install.link + link = req_to_install.link + + # Now that we have the real link, we can tell what kind of + # requirements we have and raise some more informative errors + # than otherwise. (For example, we can raise VcsHashUnsupported + # for a VCS URL rather than HashMissing.) + if require_hashes: + # We could check these first 2 conditions inside + # unpack_url and save repetition of conditions, but then + # we would report less-useful error messages for + # unhashable requirements, complaining that there's no + # hash provided. + if is_vcs_url(link): + raise VcsHashUnsupported() + elif is_file_url(link) and is_dir_url(link): + raise DirectoryUrlHashUnsupported() + if (not req_to_install.original_link and + not req_to_install.is_pinned): + # Unpinned packages are asking for trouble when a new + # version is uploaded. This isn't a security check, but + # it saves users a surprising hash mismatch in the + # future. + # + # file:/// URLs aren't pinnable, so don't complain + # about them not being pinned. + raise HashUnpinned() + hashes = req_to_install.hashes( + trust_internet=not require_hashes) + if require_hashes and not hashes: + # Known-good hashes are missing for this requirement, so + # shim it with a facade object that will provoke hash + # computation and then raise a HashMissing exception + # showing the user what the hash should be. + hashes = MissingHashes() + try: download_dir = self.download_dir # We always delete unpacked sdists after pip ran. @@ -499,7 +568,7 @@ def _prepare_file(self, finder, req_to_install): unpack_url( req_to_install.link, req_to_install.source_dir, download_dir, autodelete_unpacked, - session=self.session) + session=self.session, hashes=hashes) except requests.HTTPError as exc: logger.critical( 'Could not install requirement %s because ' @@ -562,7 +631,7 @@ def add_req(subreq): # 'unnamed' requirements will get added here self.add_requirement(req_to_install, None) - if not self.ignore_dependencies: + if not ignore_dependencies: if (req_to_install.extras): logger.debug( "Installing extra requirements: %r", diff --git a/pip/utils/__init__.py b/pip/utils/__init__.py index 30f5e28b458..d1ffda4f9ac 100644 --- a/pip/utils/__init__.py +++ b/pip/utils/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from collections import deque import contextlib import errno import locale @@ -220,6 +221,15 @@ def file_contents(filename): return fp.read().decode('utf-8') +def read_chunks(file, size=4096): + """Yield pieces of data from a file-like object until EOF.""" + while True: + chunk = file.read(size) + if not chunk: + break + yield chunk + + def split_leading_dir(path): path = path.lstrip('/').lstrip('\\') if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or @@ -831,3 +841,8 @@ def get_installed_version(dist_name): def canonicalize_name(name): """Convert an arbitrary string to a canonical name used for comparison""" return pkg_resources.safe_name(name).lower() + + +def consume(iterator): + """Consume an iterable at C speed.""" + deque(iterator, maxlen=0) diff --git a/pip/utils/appdirs.py b/pip/utils/appdirs.py index 7406cc7a115..163c92201fa 100644 --- a/pip/utils/appdirs.py +++ b/pip/utils/appdirs.py @@ -1,6 +1,6 @@ """ This code was taken from https://github.com/ActiveState/appdirs and modified -to suite our purposes. +to suit our purposes. """ from __future__ import absolute_import diff --git a/pip/utils/hashes.py b/pip/utils/hashes.py new file mode 100644 index 00000000000..960297007ae --- /dev/null +++ b/pip/utils/hashes.py @@ -0,0 +1,92 @@ +from __future__ import absolute_import + +import hashlib + +from pip.exceptions import HashMismatch, HashMissing, InstallationError +from pip.utils import read_chunks +from pip._vendor.six import iteritems, iterkeys, itervalues + + +# The recommended hash algo of the moment. Change this whenever the state of +# the art changes; it won't hurt backward compatibility. +FAVORITE_HASH = 'sha256' + + +# Names of hashlib algorithms allowed by the --hash option and ``pip hash`` +# Currently, those are the ones at least as collision-resistant as sha256. +STRONG_HASHES = ['sha256', 'sha384', 'sha512'] + + +class Hashes(object): + """A wrapper that builds multiple hashes at once and checks them against + known-good values + + """ + def __init__(self, hashes=None): + """ + :param hashes: A dict of algorithm names pointing to lists of allowed + hex digests + """ + self._allowed = {} if hashes is None else hashes + + def check_against_chunks(self, chunks): + """Check good hashes against ones built from iterable of chunks of + data. + + Raise HashMismatch if none match. + + """ + gots = {} + for hash_name in iterkeys(self._allowed): + try: + gots[hash_name] = hashlib.new(hash_name) + except (ValueError, TypeError): + raise InstallationError('Unknown hash name: %s' % hash_name) + + for chunk in chunks: + for hash in itervalues(gots): + hash.update(chunk) + + for hash_name, got in iteritems(gots): + if got.hexdigest() in self._allowed[hash_name]: + return + self._raise(gots) + + def _raise(self, gots): + raise HashMismatch(self._allowed, gots) + + def check_against_file(self, file): + """Check good hashes against a file-like object + + Raise HashMismatch if none match. + + """ + return self.check_against_chunks(read_chunks(file)) + + def check_against_path(self, path): + with open(path, 'rb') as file: + return self.check_against_file(file) + + def __nonzero__(self): + """Return whether I know any known-good hashes.""" + return bool(self._allowed) + + def __bool__(self): + return self.__nonzero__() + + +class MissingHashes(Hashes): + """A workalike for Hashes used when we're missing a hash for a requirement + + It computes the actual hash of the requirement and raises a HashMissing + exception showing it to the user. + + """ + def __init__(self): + """Don't offer the ``hashes`` kwarg.""" + # Pass our favorite hash in to generate a "gotten hash". With the + # empty list, it will never match, so an error will always raise. + super(MissingHashes, self).__init__(hashes={FAVORITE_HASH: []}) + + def _raise(self, gots): + raise HashMissing(gots[FAVORITE_HASH].hexdigest()) diff --git a/pip/wheel.py b/pip/wheel.py index 5ccebf042e7..4d39e68c301 100644 --- a/pip/wheel.py +++ b/pip/wheel.py @@ -31,7 +31,8 @@ from pip.locations import distutils_scheme, PIP_DELETE_MARKER_FILENAME from pip import pep425tags from pip.utils import ( - call_subprocess, ensure_dir, captured_stdout, rmtree, canonicalize_name) + call_subprocess, ensure_dir, captured_stdout, rmtree, canonicalize_name, + read_chunks) from pip.utils.logging import indent_log from pip._vendor.distlib.scripts import ScriptMaker from pip._vendor import pkg_resources @@ -149,11 +150,9 @@ def rehash(path, algo='sha256', blocksize=1 << 20): h = hashlib.new(algo) length = 0 with open(path, 'rb') as f: - block = f.read(blocksize) - while block: + for block in read_chunks(f, size=blocksize): length += len(block) h.update(block) - block = f.read(blocksize) digest = 'sha256=' + urlsafe_b64encode( h.digest() ).decode('latin1').rstrip('=') @@ -721,7 +720,7 @@ def _clean_one(self, req): def build(self, autobuilding=False): """Build wheels. - :param unpack: If True, replace the sdist we built from the with the + :param unpack: If True, replace the sdist we built from with the newly built wheel, in preparation for installation. :return: True if all the wheels built correctly. """ diff --git a/tests/functional/test_hash.py b/tests/functional/test_hash.py new file mode 100644 index 00000000000..9fc0d6e9108 --- /dev/null +++ b/tests/functional/test_hash.py @@ -0,0 +1,32 @@ +"""Tests for the ``pip hash`` command""" + + +def test_basic(script, tmpdir): + """Run 'pip hash' through its default behavior.""" + expected = ('--hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425' + 'e73043362938b9824') + result = script.pip('hash', _hello_file(tmpdir)) + assert expected in str(result) + + +def test_good_algo_option(script, tmpdir): + """Make sure the -a option works.""" + expected = ('--hash=sha512:9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caad' + 'ae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e' + '5c3adef46f73bcdec043') + result = script.pip('hash', '-a', 'sha512', _hello_file(tmpdir)) + assert expected in str(result) + + +def test_bad_algo_option(script, tmpdir): + """Make sure the -a option raises an error when given a bad operand.""" + result = script.pip('hash', '-a', 'poppycock', _hello_file(tmpdir), + expect_error=True) + assert "invalid choice: 'poppycock'" in str(result) + + +def _hello_file(tmpdir): + """Return a temp file to hash containing "hello".""" + file = tmpdir / 'hashable' + file.write('hello') + return file diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 73beb74a304..142b8e154c5 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1,4 +1,3 @@ - import os import textwrap import glob @@ -9,7 +8,8 @@ from pip.utils import appdirs, rmtree from tests.lib import (pyversion, pyversion_tuple, - _create_test_package, _create_svn_repo, path_to_url) + _create_test_package, _create_svn_repo, path_to_url, + requirements_file) from tests.lib.local_repos import local_checkout from tests.lib.path import Path @@ -217,6 +217,43 @@ def test_install_from_local_directory(script, data): assert egg_info_folder in result.files_created, str(result) +def test_hashed_install_success(script, data, tmpdir): + """ + Test that installing various sorts of requirements with correct hashes + works. + + Test file URLs and index packages (which become HTTP URLs behind the + scenes). + + """ + file_url = path_to_url( + (data.packages / 'simple-1.0.tar.gz').abspath) + with requirements_file( + 'simple2==1.0 --hash=sha256:9336af72ca661e6336eb87bc7de3e8844d853e' + '3848c2b9bbd2e8bf01db88c2c7\n' + '{simple} --hash=sha256:393043e672415891885c9a2a0929b1af95fb866d6c' + 'a016b42d2e6ce53619b653'.format(simple=file_url), + tmpdir) as reqs_file: + script.pip_install_local('-r', reqs_file.abspath, expect_error=False) + + +def test_hashed_install_failure(script, data, tmpdir): + """Test that wrong hashes stop installation. + + This makes sure prepare_files() is called in the course of installation + and so has the opportunity to halt if hashes are wrong. Checks on various + kinds of hashes are in test_req.py. + + """ + with requirements_file('simple2==1.0 --hash=sha256:9336af72ca661e6336eb87b' + 'c7de3e8844d853e3848c2b9bbd2e8bf01db88c2c\n', + tmpdir) as reqs_file: + result = script.pip_install_local('-r', + reqs_file.abspath, + expect_error=True) + assert len(result.files_created) == 0 + + def test_install_from_local_directory_with_symlinks_to_directories( script, data): """ diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py index f1ba71b1b71..5c2ed0c6493 100644 --- a/tests/functional/test_install_reqs.py +++ b/tests/functional/test_install_reqs.py @@ -3,7 +3,7 @@ import pytest -from tests.lib import (pyversion, path_to_url, +from tests.lib import (pyversion, path_to_url, requirements_file, _create_test_package_with_subdirectory) from tests.lib.local_repos import local_checkout @@ -313,3 +313,33 @@ def test_constrained_to_url_install_same_url(script, data): 'install', '--no-index', '-f', data.find_links, '-c', script.scratch_path / 'constraints.txt', to_install) assert 'Running setup.py install for singlemodule' in result.stdout + + +@pytest.mark.network +def test_double_install_spurious_hash_mismatch(script, tmpdir): + """Make sure installing the same hashed sdist twice doesn't throw hash + mismatch errors. + + Really, this is a test that we disable reads from the wheel cache in + hash-checking mode. Locally, implicitly built wheels of sdists obviously + have different hashes from the original archives. Comparing against those + causes spurious mismatch errors. + + """ + script.pip('install', 'wheel') # Otherwise, it won't try to build wheels. + with requirements_file('simple==1.0 --hash=sha256:393043e672415891885c9a2a' + '0929b1af95fb866d6ca016b42d2e6ce53619b653', + tmpdir) as reqs_file: + # Install a package (and build its wheel): + result = script.pip_install_local( + '-r', reqs_file.abspath, expect_error=False) + assert 'Successfully installed simple-1.0' in str(result) + + # Uninstall it: + script.pip('uninstall', '-y', 'simple', expect_error=False) + + # Then install it again. We should not hit a hash mismatch, and the + # package should install happily. + result = script.pip_install_local( + '-r', reqs_file.abspath, expect_error=False) + assert 'Successfully installed simple-1.0' in str(result) diff --git a/tests/lib/__init__.py b/tests/lib/__init__.py index c96746a769a..31d684b4205 100644 --- a/tests/lib/__init__.py +++ b/tests/lib/__init__.py @@ -1,5 +1,6 @@ from __future__ import absolute_import +from contextlib import contextmanager import os import sys import re @@ -569,7 +570,22 @@ def assert_raises_regexp(exception, reg, run, *args, **kwargs): try: run(*args, **kwargs) assert False, "%s should have been thrown" % exception - except Exception: + except exception: e = sys.exc_info()[1] p = re.compile(reg) assert p.search(str(e)), str(e) + + +@contextmanager +def requirements_file(contents, tmpdir): + """Return a Path to a requirements file of given contents. + + As long as the context manager is open, the requirements file will exist. + + :param tmpdir: A Path to the folder in which to create the file + + """ + path = tmpdir / 'reqs.txt' + path.write(contents) + yield path + path.remove() diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 00ba356b331..dd1a6d11ad8 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -16,6 +16,7 @@ unpack_file_url, ) from pip.index import Link +from pip.utils.hashes import Hashes def test_unpack_http_url_with_urllib_response_without_content_type(data): @@ -105,6 +106,7 @@ def test_unpack_http_url_bad_downloaded_checksum(mock_unpack_file): 'location', download_dir=download_dir, session=session, + hashes=Hashes({'sha1': [download_hash.hexdigest()]}) ) # despite existence of downloaded file with bad hash, downloaded again @@ -209,7 +211,9 @@ def test_unpack_file_url_bad_hash(self, tmpdir, data, self.prep(tmpdir, data) self.dist_url.url = "%s#md5=bogus" % self.dist_url.url with pytest.raises(HashMismatch): - unpack_file_url(self.dist_url, self.build_dir) + unpack_file_url(self.dist_url, + self.build_dir, + hashes=Hashes({'md5': ['bogus']})) def test_unpack_file_url_download_bad_hash(self, tmpdir, data, monkeypatch): @@ -235,7 +239,8 @@ def test_unpack_file_url_download_bad_hash(self, tmpdir, data, dist_path_md5 ) unpack_file_url(self.dist_url, self.build_dir, - download_dir=self.download_dir) + download_dir=self.download_dir, + hashes=Hashes({'md5': [dist_path_md5]})) # confirm hash is for simple1-1.0 # the previous bad download has been removed diff --git a/tests/unit/test_download_hashes.py b/tests/unit/test_download_hashes.py deleted file mode 100644 index 061e768ebbd..00000000000 --- a/tests/unit/test_download_hashes.py +++ /dev/null @@ -1,263 +0,0 @@ -import pytest - -from pip.download import _get_hash_from_file, _check_hash -from pip.exceptions import InstallationError -from pip.index import Link - - -def test_get_hash_from_file_md5(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 16 - assert download_hash.hexdigest() == "d41d8cd98f00b204e9800998ecf8427e" - - -def test_get_hash_from_file_sha1(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha1=da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 20 - assert download_hash.hexdigest() == ( - "da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - -def test_get_hash_from_file_sha224(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha224=d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 28 - assert download_hash.hexdigest() == ( - "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f" - ) - - -def test_get_hash_from_file_sha384(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha384=38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e" - "1da274edebfe76f65fbd51ad2f14898b95b" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 48 - assert download_hash.hexdigest() == ( - "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274e" - "debfe76f65fbd51ad2f14898b95b" - ) - - -def test_get_hash_from_file_sha256(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852" - "b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 32 - assert download_hash.hexdigest() == ( - "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" - ) - - -def test_get_hash_from_file_sha512(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha512=cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36" - "ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash.digest_size == 64 - assert download_hash.hexdigest() == ( - "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0" - "d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - -def test_get_hash_from_file_unknown(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#unknown_hash=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - assert download_hash is None - - -def test_check_hash_md5_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_md5_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#md5=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha1_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha1=da39a3ee5e6b4b0d3255bfef95601890afd80709" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha1_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha1=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha224_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha224=d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f'" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha224_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha224=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha384_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha384=38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6" - "e1da274edebfe76f65fbd51ad2f14898b95b" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha384_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha384=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha256_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b785" - "2b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha256_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha256=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hash_sha512_valid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha512=cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36c" - "e9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - _check_hash(download_hash, file_link) - - -def test_check_hash_sha512_invalid(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link("http://testserver/gmpy-1.15.tar.gz#sha512=deadbeef") - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, file_link) - - -def test_check_hasher_mismsatch(data): - file_path = data.packages.join("gmpy-1.15.tar.gz") - file_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#md5=d41d8cd98f00b204e9800998ecf8427e" - ) - other_link = Link( - "http://testserver/gmpy-1.15.tar.gz" - "#sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b785" - "2b855" - ) - - download_hash = _get_hash_from_file(file_path, file_link) - - with pytest.raises(InstallationError): - _check_hash(download_hash, other_link) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index fe91ecba3a1..6ddfb5d43a5 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -6,15 +6,18 @@ import pytest from mock import Mock, patch, mock_open +from pip.commands.install import InstallCommand from pip.exceptions import (PreviousBuildDirError, InvalidWheelFilename, UnsupportedWheel) -from pip.download import PipSession +from pip.download import path_to_url, PipSession +from pip.exceptions import HashErrors, InstallationError from pip.index import PackageFinder from pip.req import (InstallRequirement, RequirementSet, Requirements) +from pip.req.req_file import process_line from pip.req.req_install import parse_editable from pip.utils import read_text_file from pip._vendor import pkg_resources -from tests.lib import assert_raises_regexp +from tests.lib import assert_raises_regexp, requirements_file class TestRequirementSet(object): @@ -26,12 +29,13 @@ def setup(self): def teardown(self): shutil.rmtree(self.tempdir, ignore_errors=True) - def basic_reqset(self): + def basic_reqset(self, **kwargs): return RequirementSet( build_dir=os.path.join(self.tempdir, 'build'), src_dir=os.path.join(self.tempdir, 'src'), download_dir=None, session=PipSession(), + **kwargs ) def test_no_reuse_existing_build_dir(self, data): @@ -69,6 +73,205 @@ def test_environment_marker_extras(self, data): else: assert not reqset.has_requirement('simple') + @pytest.mark.network + def test_missing_hash_checking(self, data): + """Make sure prepare_files() raises an error when a requirement has no + hash in implicit hash-checking mode. + """ + reqset = self.basic_reqset() + # No flags here. This tests that detection of later flags nonetheless + # requires earlier packages to have hashes: + reqset.add_requirement( + list(process_line('blessings==1.0', 'file', 1))[0]) + # This flag activates --require-hashes mode: + reqset.add_requirement( + list(process_line('tracefront==0.1 --hash=sha256:somehash', + 'file', + 2))[0]) + # This hash should be accepted because it came from the reqs file, not + # from the internet: + reqset.add_requirement( + list(process_line('https://pypi.python.org/packages/source/m/more-' + 'itertools/more-itertools-1.0.tar.gz#md5=b21850c' + '3cfa7efbb70fd662ab5413bdd', 'file', 3))[0]) + finder = PackageFinder([], + ['https://pypi.python.org/simple'], + session=PipSession()) + assert_raises_regexp( + HashErrors, + r'Hashes are required in --require-hashes mode, but they are ' + r'missing .*\n' + r' blessings==1.0 --hash=sha256:[0-9a-f]+\n' + r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n' + r' tracefront==0.1 .*:\n' + r' Expected sha256 somehash\n' + r' Got [0-9a-f]+$', + reqset.prepare_files, + finder) + + def test_missing_hash_with_require_hashes(self, data): + """Setting --require-hashes explicitly should raise errors if hashes + are missing. + """ + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line('simple==1.0', 'file', 1))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r'Hashes are required in --require-hashes mode, but they are ' + r'missing .*\n' + r' simple==1.0 --hash=sha256:393043e672415891885c9a2a0929b1af95' + r'fb866d6ca016b42d2e6ce53619b653$', + reqset.prepare_files, + finder) + + def test_missing_hash_with_require_hashes_in_reqs_file(self, data, tmpdir): + """--require-hashes in a requirements file should make its way to the + RequirementSet. + """ + req_set = self.basic_reqset(require_hashes=False) + session = PipSession() + finder = PackageFinder([data.find_links], [], session=session) + command = InstallCommand() + with requirements_file('--require-hashes', tmpdir) as reqs_file: + options, args = command.parse_args(['-r', reqs_file]) + command.populate_requirement_set( + req_set, args, options, finder, session, command.name, + wheel_cache=None) + assert req_set.require_hashes + + def test_unsupported_hashes(self, data): + """VCS and dir links should raise errors when --require-hashes is + on. + + In addition, complaints about the type of requirement (VCS or dir) + should trump the presence or absence of a hash. + + """ + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line( + 'git+git://github.com/pypa/pip-test-package --hash=sha256:123', + 'file', + 1))[0]) + dir_path = data.packages.join('FSPkg') + reqset.add_requirement( + list(process_line( + 'file://%s' % (dir_path,), + 'file', + 2))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r"Can't verify hashes for these requirements because we don't " + r"have a way to hash version control repositories:\n" + r" git\+git://github\.com/pypa/pip-test-package \(from -r file " + r"\(line 1\)\)\n" + r"Can't verify hashes for these file:// requirements because they " + r"point to directories:\n" + r" file:///.*/data/packages/FSPkg \(from -r file \(line 2\)\)", + reqset.prepare_files, + finder) + + def test_unpinned_hash_checking(self, data): + """Make sure prepare_files() raises an error when a requirement is not + version-pinned in hash-checking mode. + """ + reqset = self.basic_reqset() + # Test that there must be exactly 1 specifier: + reqset.add_requirement( + list(process_line('simple --hash=sha256:a90427ae31f5d1d0d7ec06ee97' + 'd9fcf2d0fc9a786985250c1c83fd68df5911dd', + 'file', + 1))[0]) + # Test that the operator must be ==: + reqset.add_requirement(list(process_line( + 'simple2>1.0 --hash=sha256:3ad45e1e9aa48b4462af0' + '123f6a7e44a9115db1ef945d4d92c123dfe21815a06', + 'file', + 2))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + # Make sure all failing requirements are listed: + r'versions pinned with ==. These do not:\n' + r' simple .* \(from -r file \(line 1\)\)\n' + r' simple2>1.0 .* \(from -r file \(line 2\)\)', + reqset.prepare_files, + finder) + + def test_hash_mismatch(self, data): + """A hash mismatch should raise an error.""" + file_url = path_to_url( + (data.packages / 'simple-1.0.tar.gz').abspath) + reqset = self.basic_reqset(require_hashes=True) + reqset.add_requirement( + list(process_line('%s --hash=sha256:badbad' % file_url, + 'file', + 1))[0]) + finder = PackageFinder([data.find_links], [], session=PipSession()) + assert_raises_regexp( + HashErrors, + r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n' + r' file:///.*/data/packages/simple-1\.0\.tar\.gz .*:\n' + r' Expected sha256 badbad\n' + r' Got 393043e672415891885c9a2a0929b1af95fb866d' + r'6ca016b42d2e6ce53619b653$', + reqset.prepare_files, + finder) + + def test_unhashed_deps_on_require_hashes(self, data): + """Make sure unhashed, unpinned, or otherwise unrepeatable + dependencies get complained about when --require-hashes is on.""" + reqset = self.basic_reqset() + finder = PackageFinder([data.find_links], [], session=PipSession()) + reqset.add_requirement(next(process_line( + 'TopoRequires2==0.0.1 ' # requires TopoRequires + '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' + 'e3591d14f7896bdbefcf48543720c970', + 'file', 1))) + assert_raises_regexp( + HashErrors, + r'In --require-hashes mode, all requirements must have their ' + r'versions pinned.*\n' + r' TopoRequires from .*$', + reqset.prepare_files, + finder) + + def test_hashed_deps_on_require_hashes(self, data): + """Make sure hashed dependencies get installed when --require-hashes + is on. + + (We actually just check that no "not all dependencies are hashed!" + error gets raised while preparing; there is no reason to expect + installation to then fail, as the code paths are the same as ever.) + + """ + reqset = self.basic_reqset() + reqset.add_requirement(next(process_line( + 'TopoRequires2==0.0.1 ' # requires TopoRequires + '--hash=sha256:eaf9a01242c9f2f42cf2bd82a6a848cd' + 'e3591d14f7896bdbefcf48543720c970', + 'file', 1))) + reqset.add_requirement(next(process_line( + 'TopoRequires==0.0.1 ' + '--hash=sha256:d6dd1e22e60df512fdcf3640ced3039b3b02a56ab2cee81ebcb' + '3d0a6d4e8bfa6', + 'file', 2))) + + def test_no_egg_on_require_hashes(self, data): + """Make sure --egg is illegal with --require-hashes. + + --egg would cause dependencies to always be installed, since it cedes + control directly to setuptools. + + """ + reqset = self.basic_reqset(require_hashes=True, as_egg=True) + finder = PackageFinder([data.find_links], [], session=PipSession()) + with pytest.raises(InstallationError): + reqset.prepare_files(finder) + @pytest.mark.parametrize(('file_contents', 'expected'), [ (b'\xf6\x80', b'\xc3\xb6\xe2\x82\xac'), # cp1252 diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index d714baacd16..6429c8644d1 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -14,6 +14,7 @@ from pip.req.req_file import (parse_requirements, process_line, join_lines, ignore_comments, break_args_options, skip_regex, preprocess) +from tests.lib import requirements_file @pytest.fixture @@ -251,6 +252,28 @@ def test_options_on_a_requirement_line(self): 'global_options': ['yo3', 'yo4'], 'install_options': ['yo1', 'yo2']} + def test_hash_options(self): + """Test the --hash option: mostly its value storage. + + Make sure it reads and preserve multiple hashes. + + """ + line = ('SomeProject --hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b1' + '61e5c1fa7425e73043362938b9824 ' + '--hash=sha384:59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c' + '3553bcdb9c666fa90125a3c79f90397bdf5f6a13de828684f ' + '--hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8' + 'e5a6c65260e9cb8a7') + filename = 'filename' + req = list(process_line(line, filename, 1))[0] + assert req.options == {'hashes': { + 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e730433' + '62938b9824', + '486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65' + '260e9cb8a7'], + 'sha384': ['59e1748777448c69de6b800d7a33bbfb9ff1b463e44354c3553bcd' + 'b9c666fa90125a3c79f90397bdf5f6a13de828684f']}} + def test_set_isolated(self, options): line = 'SomeProject' filename = 'filename' @@ -552,12 +575,11 @@ def test_install_requirements_with_options(self, tmpdir, finder, session, --install-option "{install_option}" '''.format(global_option=global_option, install_option=install_option) - req_path = tmpdir.join('requirements.txt') - with open(req_path, 'w') as fh: - fh.write(content) - - req = next(parse_requirements( - req_path, finder=finder, options=options, session=session)) + with requirements_file(content, tmpdir) as reqs_file: + req = next(parse_requirements(reqs_file.abspath, + finder=finder, + options=options, + session=session)) req.source_dir = os.curdir with patch.object(subprocess, 'Popen') as popen: diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index bb025a3f013..179055fbe14 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -12,9 +12,12 @@ import pytest from mock import Mock, patch +from pip.exceptions import HashMismatch, HashMissing, InstallationError from pip.utils import (egg_link_path, Inf, get_installed_distributions, untar_file, unzip_file, rmtree, normalize_path) +from pip.utils.hashes import Hashes, MissingHashes from pip.operations.freeze import freeze_excludes +from pip._vendor.six import BytesIO class Tests_EgglinkPath: @@ -406,3 +409,47 @@ def test_resolve_symlinks(self, tmpdir): ) == os.path.join(tmpdir, 'file_link') finally: os.chdir(orig_working_dir) + + +class TestHashes(object): + """Tests for pip.utils.hashes""" + + def test_success(self, tmpdir): + """Make sure no error is raised when at least one hash matches. + + Test check_against_path because it calls everything else. + + """ + file = tmpdir / 'to_hash' + file.write('hello') + hashes = Hashes({ + 'sha256': ['2cf24dba5fb0a30e26e83b2ac5b9e29e' + '1b161e5c1fa7425e73043362938b9824'], + 'sha224': ['wrongwrong'], + 'md5': ['5d41402abc4b2a76b9719d911017c592']}) + hashes.check_against_path(file) + + def test_failure(self): + """Hashes should raise HashMismatch when no hashes match.""" + hashes = Hashes({'sha256': ['wrongwrong']}) + with pytest.raises(HashMismatch): + hashes.check_against_file(BytesIO(b'hello')) + + def test_missing_hashes(self): + """MissingHashes should raise HashMissing when any check is done.""" + with pytest.raises(HashMissing): + MissingHashes().check_against_file(BytesIO(b'hello')) + + def test_unknown_hash(self): + """Hashes should raise InstallationError when it encounters an unknown + hash.""" + hashes = Hashes({'badbad': ['dummy']}) + with pytest.raises(InstallationError): + hashes.check_against_file(BytesIO(b'hello')) + + def test_non_zero(self): + """Test that truthiness tests tell whether any known-good hashes + exist.""" + assert Hashes({'sha256': 'dummy'}) + assert not Hashes() + assert not Hashes({})