diff --git a/.github/actions/build_wheel_and_sdist/action.yml b/.github/actions/build_wheel_and_sdist/action.yml new file mode 100644 index 00000000..8c2b3b8e --- /dev/null +++ b/.github/actions/build_wheel_and_sdist/action.yml @@ -0,0 +1,47 @@ +name: + Build a Wheel and source distribution. + +description: + Run pyproject-build on the repo + + +runs: + using: "composite" + steps: + # The PyShp repo is required to already be checked out into ., + # e.g. by the calling workflow using: + # steps: + # - uses: actions/checkout@v4 + # + # and then calling this Action with: + # - name: Run tests + # uses: .github/actions/build_wheel_and_sdist + + # The Python to be tested with is required to already be setup, + # with "python" and "pip" on the system Path + # (so that this custom Action can be used with both reproducible + # Pythons from Python docker images, and more frequently deprecated Pythons + # from - uses: actions/setup-python@v5) + + - name: Install build (PyPA's pyproject-build) + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install build + + - name: Show versions for logs. + shell: bash + run: | + python --version + python -m build --version + + - name: Build the default distributions (wheel & sdist, in dist/). + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + run: python -m build + + - name: Upload built distributions + uses: actions/upload-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist \ No newline at end of file diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index c6ca65a4..e1dc3ca9 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -17,9 +17,6 @@ inputs: description: Path to where the PyShp repo was checked out to (to keep separate from Shapefiles & artefacts repo). required: false default: '.' - python-version: - description: Set to "2.7" to use caddy instead of python -m SimpleHTTPServer - required: true @@ -27,12 +24,18 @@ runs: using: "composite" steps: # The PyShp repo is required to already be checked out into pyshp_repo_directory, + # and the wheel to be tested in ./dist within it. # e.g. by the calling workflow using: # steps: # - uses: actions/checkout@v4 # with: # path: ./Pyshp + # + # - name: Build distribution artefacts (wheel and sdist) + # uses: ./Pyshp/.github/actions/build_wheel_and_sdist + # # and then calling this Action with: + # # - name: Run tests # uses: ./Pyshp/.github/actions/test # with: @@ -40,7 +43,12 @@ runs: # replace_remote_urls_with_localhost: 'yes' # pyshp_repo_directory: ./Pyshp - # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + # The Python to be tested with is required to already be setup, + # with "python" and "pip" on the system Path + # (so that this custom Action can be used with both reproducible + # Pythons from Python docker images, and more frequently deprecated Pythons + # from - uses: actions/setup-python@v5) + - name: Checkout shapefiles and zip file artefacts repo if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} @@ -50,7 +58,7 @@ runs: path: ./PyShp_test_shapefile - name: Serve shapefiles and zip file artefacts on localhost - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version != '2.7'}} + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} shell: bash working-directory: ./PyShp_test_shapefile run: | @@ -58,50 +66,39 @@ runs: echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV sleep 4 # give server time to start - - name: Download and unzip Caddy binary - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} - working-directory: . - shell: bash - run: | - curl -L https://github.com/caddyserver/caddy/releases/download/v2.10.0/caddy_2.10.0_linux_amd64.tar.gz --output caddy.tar.gz - tar -xzf caddy.tar.gz + - name: Download wheel and sdist (built in previous jobs) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist - - name: Serve shapefiles and zip file artefacts on localhost using Caddy - if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} + - name: Install PyShp from the wheel (downloaded in prev step) shell: bash - working-directory: . + working-directory: dist/ run: | - ./caddy file-server --root ./PyShp_test_shapefile --listen :8000 & - echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV - sleep 2 # give server time to start + WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) + python -m pip install $WHEEL_NAME[test] - name: Doctests shell: bash working-directory: ${{ inputs.pyshp_repo_directory }} env: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} - run: python shapefile.py ${{ inputs.extra_args }} + run: python test_shapefile.py ${{ inputs.extra_args }} - - name: Install test dependencies. + - name: Show Python and Pytest versions for logs. shell: bash - working-directory: ${{ inputs.pyshp_repo_directory }} run: | - python -m pip install --upgrade pip - pip install -r requirements.test.txt + python --version + python -m pytest --version - - name: Pytest + - name: Run Pytest shell: bash working-directory: ${{ inputs.pyshp_repo_directory }} env: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} - run: | - pytest -rA --tb=short ${{ inputs.extra_args }} + run: pytest -rA --tb=short ${{ inputs.extra_args }} - - name: Show versions for logs. - shell: bash - run: | - python --version - python -m pytest --version # - name: Test http server diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c66adc89..0ddd8b71 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,16 +13,13 @@ on: types: [published] jobs: - test: - # In general, tests should be run after building a distribution, to test that distribution. - # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) - # then this would only test the packaging process, not so much the code as there are - # no binaries. - uses: ./.github/workflows/run_tests_hooks_and_tools.yml + format_type_check_lint_build_and_test: + # Builds and uploads wheel and sdist + uses: ./.github/workflows/run_checks_build_and_test.yml deploy: - # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. - needs: test + # Prevent deployment of releases that fail any tests or checks (e.g. linting). + needs: format_type_check_lint_build_and_test runs-on: ubuntu-latest steps: @@ -33,13 +30,13 @@ jobs: python-version: '3.x' + - name: Download wheel and sdist (built in previous job) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist + - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - name: Publish package if: github.repository == 'GeospatialPython/pyshp' @@ -47,3 +44,4 @@ jobs: with: user: __token__ password: ${{ secrets.PYPI_INTEGRATION }} + packages-dir: dist/ diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_checks_build_and_test.yml similarity index 52% rename from .github/workflows/run_tests_hooks_and_tools.yml rename to .github/workflows/run_checks_build_and_test.yml index 468b2e2b..0d471263 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_checks_build_and_test.yml @@ -5,7 +5,6 @@ name: Run pre-commit hooks and tests on: push: pull_request: - branches: [ master, ] workflow_call: workflow_dispatch: @@ -17,55 +16,16 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - pylint: + build_wheel_and_sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - - name: install Pylint and plugin - run: | - python -m pip install --upgrade pip - pip install pytest pylint pylint-per-file-ignores - - name: run Pylint for errors and warnings only, on test_shapefile.py - run: | - pylint --disable=R,C test_shapefile.py - - test_on_EOL_Pythons: - strategy: - fail-fast: false - matrix: - python-version: [ - "2.7", - "3.5", - "3.6", - "3.7", - "3.8", - ] - - runs-on: ubuntu-latest - container: - image: python:${{ matrix.python-version }} - - steps: - - uses: actions/checkout@v4 - with: - path: ./Pyshp - - - name: Non-network tests - uses: ./Pyshp/.github/actions/test - with: - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - - - name: Network tests - uses: ./Pyshp/.github/actions/test - with: - extra_args: '-m network' - replace_remote_urls_with_localhost: 'yes' - pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} + - name: Build wheel from the project repo + uses: ./.github/actions/build_wheel_and_sdist test_on_supported_Pythons: + needs: build_wheel_and_sdist strategy: fail-fast: false matrix: @@ -98,12 +58,11 @@ jobs: uses: ./Pyshp/.github/actions/test with: pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} - name: Network tests uses: ./Pyshp/.github/actions/test with: extra_args: '-m network' replace_remote_urls_with_localhost: 'yes' + # Checkout to ./PyShp, as the test job also needs to check out the artefact repo pyshp_repo_directory: ./Pyshp - python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml index 3dfd5046..46e7e47a 100644 --- a/.github/workflows/speed_test.yml +++ b/.github/workflows/speed_test.yml @@ -8,7 +8,17 @@ on: jobs: + + build_wheel_and_sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Build wheel from the project repo + uses: ./.github/actions/build_wheel_and_sdist + run_speed_tests: + needs: build_wheel_and_sdist strategy: fail-fast: false matrix: @@ -28,17 +38,23 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Download wheel and sdist (built in previous job) + uses: actions/download-artifact@v4 + with: + name: PyShp_wheel_and_sdist + path: dist + + - name: Install PyShp + test deps from the wheel (downloaded in prev step) + shell: bash + working-directory: dist/ + run: | + WHEEL_NAME=$(ls pyshp-*py3-none-any.whl) + python -m pip install $WHEEL_NAME[test] - uses: actions/checkout@v4 with: path: ./Pyshp - - name: Install PyShp + test deps - shell: bash - working-directory: ./Pyshp - run: | - python -m pip install -r requirements.test.txt - python -m pip install -e . - name: Checkout shapefiles and zip file artefacts repo @@ -50,7 +66,7 @@ jobs: - name: Run Speed tests. env: PYSHP_TEST_REPO: ./PyShp_test_shapefile - run: python Pyshp/run_benchmarks.py + run: python ./Pyshp/run_benchmarks.py diff --git a/.gitignore b/.gitignore index d1734202..41085890 100644 --- a/.gitignore +++ b/.gitignore @@ -22,5 +22,8 @@ dist/ .vscode .dmypy.json .python-version -.venv -venv +.venv/ +venv/ +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffe59bf6..e12617ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,24 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.4 + rev: v0.12.7 hooks: + # Run the linter + - id: ruff-check + args: [ --fix ] + # Run the formatter - id: ruff-format - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 6.0.1 hooks: - id: isort name: isort (python) + args: ["--profile", "black"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v5.0.0 hooks: - id: check-yaml - id: trailing-whitespace +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.17.1 + hooks: + - id: mypy \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 19d54f29..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include *.md *.txt *.TXT -recursive-include shapefiles *.dbf *.sbn *.sbx *.shp *.shx diff --git a/README.md b/README.md index ad8ffca7..4e83f581 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) -- **Version**: 2.3.1 -- **Date**: 28 July, 2022 +- **Version**: 3.0.0-alpha +- **Date**: 3rd August, 2025 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents @@ -74,8 +74,6 @@ Both the Esri and XBase file-formats are very simple in design and memory efficient which is part of the reason the shapefile format remains popular despite the numerous ways to store and exchange GIS data available today. -Pyshp is compatible with Python 2.7-3.x. - This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). @@ -95,6 +93,33 @@ part of your geospatial project. # Version Changes +## 3.0.0-alpha + +### Breaking Changes: +- Python 2 and Python 3.8 support dropped. +- Field info tuple is now a namedtuple (Field) instead of a list. +- Field type codes are now FieldType 'enum' members. +- bbox, mbox and zbox attributes are all new Namedtuples. +- Writer does not mutate Shapes. +- New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and + Writer are still compatible with their base class, Shape, as before). +- Shape sub classes are creatable from, and serializable to bytes streams, + as per the shapefile spec. + +### Improvements: +- Speeded up writing shapefiles by up to another ~27% (on top of the recent ~39% improvement in 2.4.1). + +### Code quality +- Statically typed, and checked with Mypy +- Checked with Ruff. +- f-strings +- Remove Python 2 specific functions. +- Run doctests against wheels. +- Testing of wheels before publishing them +- pyproject.toml src layout +- Slow test marked. + ## 2.4.1 ### Improvements: @@ -412,7 +437,7 @@ and the bounding box area the shapefile covers: >>> len(sf) 663 >>> sf.bbox - [-122.515048, 37.652916, -122.327622, 37.863433] + (-122.515048, 37.652916, -122.327622, 37.863433) Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose @@ -449,7 +474,8 @@ index which is 7. >>> s = sf.shape(7) >>> s - Shape #7: POLYGON + Polygon #7 + >>> # Read the bbox of the 8th shape to verify >>> # Round coordinates to 3 decimal places @@ -458,17 +484,19 @@ index which is 7. Each shape record (except Points) contains the following attributes. Records of shapeType Point do not have a bounding box 'bbox'. - +# TODO!! Fix attributes >>> for name in dir(shapes[3]): ... if not name.startswith('_'): ... name 'bbox' + 'from_byte_stream' 'oid' 'parts' 'points' 'shapeType' 'shapeTypeName' + 'write_to_byte_stream' * `oid`: The shape's index position in the original shapefile. @@ -557,45 +585,34 @@ in the shp geometry file and the dbf attribute file. The field names of a shapefile are available as soon as you read a shapefile. You can call the "fields" attribute of the shapefile as a Python list. Each -field is a Python list with the following information: +field is a Python namedtuple (Field) with the following information: - * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * name: the name describing the data at this column index (a string). + * field_type: a FieldType enum member determining the type of data at this column index. Names can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). * "L": Logical, for boolean True/False values. * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. - * Field length: the length of the data found at this column index. Older GIS + * size: Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" fields. - * Decimal length: the number of decimal places found in "Number" fields. + * deci: Decimal length. The number of decimal places found in "Number" fields. + +A new field can be created directly from the type enum member etc., or as follows: + + >>> shapefile.Field.from_unchecked("Population", "N", 10,0) + Field(name="Population", field_type=FieldType.N, size=10, decimal=0) + +Using this method the conversion from string to enum is done automatically. To see the fields for the Reader object above (sf) call the "fields" attribute: - >>> fields = sf.fields - - >>> assert fields == [("DeletionFlag", "C", 1, 0), ["AREA", "N", 18, 5], - ... ["BKG_KEY", "C", 12, 0], ["POP1990", "N", 9, 0], ["POP90_SQMI", "N", 10, 1], - ... ["HOUSEHOLDS", "N", 9, 0], - ... ["MALES", "N", 9, 0], ["FEMALES", "N", 9, 0], ["WHITE", "N", 9, 0], - ... ["BLACK", "N", 8, 0], ["AMERI_ES", "N", 7, 0], ["ASIAN_PI", "N", 8, 0], - ... ["OTHER", "N", 8, 0], ["HISPANIC", "N", 8, 0], ["AGE_UNDER5", "N", 8, 0], - ... ["AGE_5_17", "N", 8, 0], ["AGE_18_29", "N", 8, 0], ["AGE_30_49", "N", 8, 0], - ... ["AGE_50_64", "N", 8, 0], ["AGE_65_UP", "N", 8, 0], - ... ["NEVERMARRY", "N", 8, 0], ["MARRIED", "N", 9, 0], ["SEPARATED", "N", 7, 0], - ... ["WIDOWED", "N", 8, 0], ["DIVORCED", "N", 8, 0], ["HSEHLD_1_M", "N", 8, 0], - ... ["HSEHLD_1_F", "N", 8, 0], ["MARHH_CHD", "N", 8, 0], - ... ["MARHH_NO_C", "N", 8, 0], ["MHH_CHILD", "N", 7, 0], - ... ["FHH_CHILD", "N", 7, 0], ["HSE_UNITS", "N", 9, 0], ["VACANT", "N", 7, 0], - ... ["OWNER_OCC", "N", 8, 0], ["RENTER_OCC", "N", 8, 0], - ... ["MEDIAN_VAL", "N", 7, 0], ["MEDIANRENT", "N", 4, 0], - ... ["UNITS_1DET", "N", 8, 0], ["UNITS_1ATT", "N", 7, 0], ["UNITS2", "N", 7, 0], - ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], - ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] + >>> sf.fields + [Field(name="DeletionFlag", field_type=FieldType.C, size=1, decimal=0), Field(name="AREA", field_type=FieldType.N, size=18, decimal=5), Field(name="BKG_KEY", field_type=FieldType.C, size=12, decimal=0), Field(name="POP1990", field_type=FieldType.N, size=9, decimal=0), Field(name="POP90_SQMI", field_type=FieldType.N, size=10, decimal=1), Field(name="HOUSEHOLDS", field_type=FieldType.N, size=9, decimal=0), Field(name="MALES", field_type=FieldType.N, size=9, decimal=0), Field(name="FEMALES", field_type=FieldType.N, size=9, decimal=0), Field(name="WHITE", field_type=FieldType.N, size=9, decimal=0), Field(name="BLACK", field_type=FieldType.N, size=8, decimal=0), Field(name="AMERI_ES", field_type=FieldType.N, size=7, decimal=0), Field(name="ASIAN_PI", field_type=FieldType.N, size=8, decimal=0), Field(name="OTHER", field_type=FieldType.N, size=8, decimal=0), Field(name="HISPANIC", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_UNDER5", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_5_17", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_18_29", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_30_49", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_50_64", field_type=FieldType.N, size=8, decimal=0), Field(name="AGE_65_UP", field_type=FieldType.N, size=8, decimal=0), Field(name="NEVERMARRY", field_type=FieldType.N, size=8, decimal=0), Field(name="MARRIED", field_type=FieldType.N, size=9, decimal=0), Field(name="SEPARATED", field_type=FieldType.N, size=7, decimal=0), Field(name="WIDOWED", field_type=FieldType.N, size=8, decimal=0), Field(name="DIVORCED", field_type=FieldType.N, size=8, decimal=0), Field(name="HSEHLD_1_M", field_type=FieldType.N, size=8, decimal=0), Field(name="HSEHLD_1_F", field_type=FieldType.N, size=8, decimal=0), Field(name="MARHH_CHD", field_type=FieldType.N, size=8, decimal=0), Field(name="MARHH_NO_C", field_type=FieldType.N, size=8, decimal=0), Field(name="MHH_CHILD", field_type=FieldType.N, size=7, decimal=0), Field(name="FHH_CHILD", field_type=FieldType.N, size=7, decimal=0), Field(name="HSE_UNITS", field_type=FieldType.N, size=9, decimal=0), Field(name="VACANT", field_type=FieldType.N, size=7, decimal=0), Field(name="OWNER_OCC", field_type=FieldType.N, size=8, decimal=0), Field(name="RENTER_OCC", field_type=FieldType.N, size=8, decimal=0), Field(name="MEDIAN_VAL", field_type=FieldType.N, size=7, decimal=0), Field(name="MEDIANRENT", field_type=FieldType.N, size=4, decimal=0), Field(name="UNITS_1DET", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS_1ATT", field_type=FieldType.N, size=7, decimal=0), Field(name="UNITS2", field_type=FieldType.N, size=7, decimal=0), Field(name="UNITS3_9", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS10_49", field_type=FieldType.N, size=8, decimal=0), Field(name="UNITS50_UP", field_type=FieldType.N, size=8, decimal=0), Field(name="MOBILEHOME", field_type=FieldType.N, size=7, decimal=0)] The first field of a dbf file is always a 1-byte field called "DeletionFlag", which indicates records that have been deleted but not removed. However, @@ -923,8 +940,8 @@ You can also add attributes using keyword arguments where the keys are field nam >>> w = shapefile.Writer('shapefiles/test/dtype') - >>> w.field('FIRST_FLD','C','40') - >>> w.field('SECOND_FLD','C','40') + >>> w.field('FIRST_FLD','C', 40) + >>> w.field('SECOND_FLD','C', 40) >>> w.null() >>> w.null() >>> w.record('First', 'Line') @@ -1379,7 +1396,7 @@ Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') >>> r.mbox # the lower and upper bound of M-values in the shapefile - [0.0, 3.0] + (0.0, 3.0) >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] @@ -1412,7 +1429,7 @@ To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') >>> r.zbox # the lower and upper bound of Z-values in the shapefile - [0.0, 22.0] + (0.0, 22.0) >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] @@ -1452,7 +1469,7 @@ ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiP The testing framework is pytest, and the tests are located in test_shapefile.py. This includes an extensive set of unit tests of the various pyshp features, and tests against various input data. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run: ```shell python -m pytest @@ -1461,12 +1478,21 @@ python -m pytest Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run: ```shell python shapefile.py ``` +This tests the code inside shapefile.py itself. To test an installed PyShp wheel against +the doctests, the same special routine can be invoked (in an env with the wheel and pytest +installed) from the test file: + + +```shell +python test_shapefile.py +``` + Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order to correct line endings in README.md, if Git has not automatically changed them. @@ -1503,8 +1529,6 @@ REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && python shapefile.py The network tests alone can also be run (without also running all the tests that don't make network requests) using: `pytest -m network` (or the doctests using: `python shapefile.py -m network`). -(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as -yet unknown, shapefile.py's Reader class in Python 2 Pytest, can't connect to a Python 2 SimpleHTTPServer. # Contributors diff --git a/changelog.txt b/changelog.txt index 79a367f8..bf7eb922 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,33 @@ +VERSION 3.0.0-alpha + +2025-08-03 + Breaking Changes: + * Python 2 and Python 3.8 support dropped. + * Field info tuple is now a namedtuple (Field) instead of a list. + * Field type codes are now FieldType 'enum' members. + * bbox, mbox and zbox attributes are all new Namedtuples. + * Writer does not mutate Shapes. + * New custom subclasses for each shape type: Null, Multipatch, Point, Polyline, + Multipoint, and Polygon, plus the latter 4's M and Z variants (Reader and + Writer are still compatible with their base class, Shape, as before). + * Shape sub classes are creatable from, and serializable to bytes streams, + as per the shapefile spec. + + Code quality + * Statically typed and checked with Mypy + * Checked with Ruff. + * Type hints + * f-strings + * Remove Python 2 specific functions. + * Run doctests against wheels. + * Testing of wheels before publishing them + * pyproject.toml src layout + * Slow test marked. + + Improvements: + * Speeded up writing shapefiles by up to another ~27% (on top of the recent ~39% improvement in 2.4.1). + + VERSION 2.4.1 2025-07-30 diff --git a/pyproject.toml b/pyproject.toml index df8e737f..00a755fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,56 @@ [build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" +requires = ["hatchling"] +build-backend = "hatchling.build" +[project] +name = "pyshp" +authors = [ + {name = "Joel Lawhead", email = "jlawhead@geospatialpython.com"}, +] +maintainers = [ + {name = "Karim Bahgat", email = "karim.bahgat.norway@gmail.com"} +] +readme = "README.md" +keywords = ["gis", "geospatial", "geographic", "shapefile", "shapefiles"] +description = "Pure Python read/write support for ESRI Shapefile format" +license = "MIT" +license-files = ["LICENSE.TXT"] +dynamic = ["version"] +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: GIS", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ +] + +[project.optional-dependencies] +dev = ["pyshp[test]", "pre-commit", "ruff"] +test = ["pytest"] + +[project.urls] +Repository = "https://github.com/GeospatialPython/pyshp" + +[tool.hatch.build.targets.sdist] +only-include = ["src", "shapefiles", "test_shapefile.py"] + +[tool.hatch.build.targets.wheel] +only-include = ["src"] +sources = {"src" = ""} # move from "src" directory for wheel + +[tool.hatch.version] +path = "src/shapefile.py" + +[tool.pytest.ini_options] +markers = [ + "network: marks tests requiring network access", + "slow: marks other tests that cause bottlenecks", +] [tool.ruff] # Exclude a variety of commonly ignored directories. @@ -39,7 +88,7 @@ line-length = 88 indent-width = 4 # Assume Python 3.9 -target-version = "py37" +target-version = "py39" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. @@ -67,23 +116,3 @@ skip-magic-trailing-comma = false line-ending = "auto" - -[tool.pylint.MASTER] -load-plugins=[ - "pylint_per_file_ignores", -] - -[tool.pylint.'MESSAGES CONTROL'] -# Silence warning: shapefile.py:2076:20: W0212: Access to a protected -# member _from_geojson of a client class (protected-access) -# -# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: -# W0212: Access to a protected member _offsets of a -# client class (protected-access) -# -# Toml multi-line string used instead of array due to: -# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 -per-file-ignores = """ - shapefile.py:W0212 - test_shapefile.py:W0212 -""" \ No newline at end of file diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index dbc031ba..00000000 --- a/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -markers = - network: marks tests requiring network access diff --git a/requirements.test.txt b/requirements.test.txt deleted file mode 100644 index 11141738..00000000 --- a/requirements.test.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest >= 3.7 -setuptools diff --git a/run_benchmarks.py b/run_benchmarks.py index edc2119a..ac31e1cb 100644 --- a/run_benchmarks.py +++ b/run_benchmarks.py @@ -41,7 +41,7 @@ def benchmark( time_taken = timeit.timeit(func, number=run_count) print("\b" * len(placeholder), end="") time_suffix = " s" - print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print(f"{time_taken:{col_widths[1] - len(time_suffix)}.3g}{time_suffix}", end="") print() return time_taken diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 906abd3a..00000000 --- a/setup.cfg +++ /dev/null @@ -1,30 +0,0 @@ -[metadata] -name = pyshp -version = attr: shapefile.__version__ -description = Pure Python read/write support for ESRI Shapefile format -long_description = file: README.md -long_description_content_type = text/markdown -author = Joel Lawhead -author_email = jlawhead@geospatialpython.com -maintainer = Karim Bahgat -maintainer_email = karim.bahgat.norway@gmail.com -url = https://github.com/GeospatialPython/pyshp -download_url = https://pypi.org/project/pyshp/ -license = MIT -license_files = LICENSE.TXT -keywords = gis, geospatial, geographic, shapefile, shapefiles -classifiers = - Development Status :: 5 - Production/Stable - Programming Language :: Python - Programming Language :: Python :: 2.7 - Programming Language :: Python :: 3 - Topic :: Scientific/Engineering :: GIS - Topic :: Software Development :: Libraries - Topic :: Software Development :: Libraries :: Python Modules - -[options] -py_modules = shapefile -python_requires = >=2.7 - -[bdist_wheel] -universal=1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 60684932..00000000 --- a/setup.py +++ /dev/null @@ -1,3 +0,0 @@ -from setuptools import setup - -setup() diff --git a/shapefile.py b/shapefile.py deleted file mode 100644 index 62405b82..00000000 --- a/shapefile.py +++ /dev/null @@ -1,2947 +0,0 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -authors: jlawheadgeospatialpython.com -maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x -""" - -__version__ = "2.4.1" - -import array -import io -import logging -import os -import sys -import tempfile -import time -import zipfile -from datetime import date -from struct import Struct, calcsize, error, pack, unpack - -# Create named logger -logger = logging.getLogger(__name__) - -# Module settings -VERBOSE = True - -# Test config (for the Doctest runner and test_shapefile.py) -REPLACE_REMOTE_URLS_WITH_LOCALHOST = ( - os.getenv("REPLACE_REMOTE_URLS_WITH_LOCALHOST", "").lower() == "yes" -) - -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -SHAPETYPE_LOOKUP = { - 0: "NULL", - 1: "POINT", - 3: "POLYLINE", - 5: "POLYGON", - 8: "MULTIPOINT", - 11: "POINTZ", - 13: "POLYLINEZ", - 15: "POLYGONZ", - 18: "MULTIPOINTZ", - 21: "POINTM", - 23: "POLYLINEM", - 25: "POLYGONM", - 28: "MULTIPOINTM", - 31: "MULTIPATCH", -} - -TRIANGLE_STRIP = 0 -TRIANGLE_FAN = 1 -OUTER_RING = 2 -INNER_RING = 3 -FIRST_RING = 4 -RING = 5 - -PARTTYPE_LOOKUP = { - 0: "TRIANGLE_STRIP", - 1: "TRIANGLE_FAN", - 2: "OUTER_RING", - 3: "INNER_RING", - 4: "FIRST_RING", - 5: "RING", -} - - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.error import HTTPError - from urllib.parse import urlparse, urlunparse - from urllib.request import Request, urlopen - -else: - from itertools import izip - - from urllib2 import HTTPError, Request, urlopen - from urlparse import urlparse, urlunparse - - -# Helpers - -MISSING = [None, ""] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. - -if PYTHON3: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - - def b(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding="utf-8", encodingErrors="strict"): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - - -if sys.version_info[0:2] >= (3, 6): - - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path - - -# Begin - - -class _Array(array.array): - """Converts python tuples to lists of the appropriate type. - Used to unpack different shapefile header parts.""" - - def __repr__(self): - return str(self.tolist()) - - -def signed_area(coords, fast=False): - """Return the signed area enclosed by a ring using the linear time - algorithm. A value >= 0 indicates a counter-clockwise oriented ring. - A faster version is possible by setting 'fast' to True, which returns - 2x the area, e.g. if you're only interested in the sign of the area. - """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - xs.append(xs[1]) - ys.append(ys[1]) - area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) - if fast: - return area2 - else: - return area2 / 2.0 - - -def is_cw(coords): - """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. - """ - area2 = signed_area(coords, fast=True) - return area2 < 0 - - -def rewind(coords): - """Returns the input coords in reversed order.""" - return list(reversed(coords)) - - -def ring_bbox(coords): - """Calculates and returns the bounding box of a ring.""" - xs, ys = zip(*coords) - bbox = min(xs), min(ys), max(xs), max(ys) - return bbox - - -def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean""" - xmin1, ymin1, xmax1, ymax1 = bbox1 - xmin2, ymin2, xmax2, ymax2 = bbox2 - overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 - return overlap - - -def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean""" - xmin1, ymin1, xmax1, ymax1 = bbox1 - xmin2, ymin2, xmax2, ymax2 = bbox2 - contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 - return contains - - -def ring_contains_point(coords, p): - """Fast point-in-polygon crossings algorithm, MacMartin optimization. - - Adapted from code by Eric Haynes - http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - - Original description: - Shoot a test ray along +X axis. The strategy, from MacMartin, is to - compare vertex Y values to the testing point's Y and quickly discard - edges which are entirely to one side of the test ray. - """ - tx, ty = p - - # get initial test bit for above/below X axis - vtx0 = coords[0] - yflag0 = vtx0[1] >= ty - - inside_flag = False - for vtx1 in coords[1:]: - yflag1 = vtx1[1] >= ty - # check if endpoints straddle (are on opposite sides) of X axis - # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: - xflag0 = vtx0[0] >= tx - # check if endpoints are on same side of the Y axis (i.e. X's - # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == (vtx1[0] >= tx): - # if edge's X values both right of the point, must hit - if xflag0: - inside_flag = not inside_flag - else: - # compute intersection of pgon segment with +X ray, note - # if >= point's X; if so, the ray hits it. - if ( - vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) - ) >= tx: - inside_flag = not inside_flag - - # move to next pair of vertices, retaining info as possible - yflag0 = yflag1 - vtx0 = vtx1 - - return inside_flag - - -def ring_sample(coords, ccw=False): - """Return a sample point guaranteed to be within a ring, by efficiently - finding the first centroid of a coordinate triplet whose orientation - matches the orientation of the ring and passes the point-in-ring test. - The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. - """ - triplet = [] - - def itercoords(): - # iterate full closed ring - for p in coords: - yield p - # finally, yield the second coordinate to the end to allow checking the last triplet - yield coords[1] - - for p in itercoords(): - # add point to triplet (but not if duplicate) - if p not in triplet: - triplet.append(p) - - # new triplet, try to get sample - if len(triplet) == 3: - # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * ( - triplet[0][0] - triplet[2][0] - ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) - if not is_straight_line: - # get triplet orientation - closed_triplet = triplet + [triplet[0]] - triplet_ccw = not is_cw(closed_triplet) - # check that triplet has the same orientation as the ring (means triangle is inside the ring) - if ccw == triplet_ccw: - # get triplet centroid - xs, ys = zip(*triplet) - xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 - # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean, ymean)): - return xmean, ymean - - # failed to get sample point from this triplet - # remove oldest triplet coord to allow iterating to next triplet - triplet.pop(0) - - else: - raise Exception("Unexpected error: Unable to find a ring sample point.") - - -def ring_contains_ring(coords1, coords2): - """Returns True if all vertexes in coords2 are fully inside coords1.""" - return all((ring_contains_point(coords1, p2) for p2 in coords2)) - - -def organize_polygon_rings(rings, return_errors=None): - """Organize a list of coordinate rings into one or more polygons with holes. - Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. - - Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). - Rings are determined as exteriors if they run in clockwise direction, or interior - holes if they run in counter-clockwise direction. This method is used to construct - GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. - """ - # first iterate rings and classify as exterior or hole - exteriors = [] - holes = [] - for ring in rings: - # shapefile format defines a polygon as a sequence of rings - # where exterior rings are clockwise, and holes counterclockwise - if is_cw(ring): - # ring is exterior - exteriors.append(ring) - else: - # ring is a hole - holes.append(ring) - - # if only one exterior, then all holes belong to that exterior - if len(exteriors) == 1: - # exit early - poly = [exteriors[0]] + holes - polys = [poly] - return polys - - # multiple exteriors, ie multi-polygon, have to group holes with correct exterior - # shapefile format does not specify which holes belong to which exteriors - # so have to do efficient multi-stage checking of hole-to-exterior containment - elif len(exteriors) > 1: - # exit early if no holes - if not holes: - polys = [] - for ext in exteriors: - poly = [ext] - polys.append(poly) - return polys - - # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) - exterior_bboxes = [ring_bbox(ring) for ring in exteriors] - for hole_i in hole_exteriors.keys(): - hole_bbox = ring_bbox(holes[hole_i]) - for ext_i, ext_bbox in enumerate(exterior_bboxes): - if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append(ext_i) - - # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i, exterior_candidates in hole_exteriors.items(): - if len(exterior_candidates) > 1: - # get hole sample point - ccw = not is_cw(holes[hole_i]) - hole_sample = ring_sample(holes[hole_i], ccw=ccw) - # collect new exterior candidates - new_exterior_candidates = [] - for ext_i in exterior_candidates: - # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point( - exteriors[ext_i], hole_sample - ) - if hole_in_exterior: - new_exterior_candidates.append(ext_i) - - # set new exterior candidates - hole_exteriors[hole_i] = new_exterior_candidates - - # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i, exterior_candidates in hole_exteriors.items(): - if len(exterior_candidates) > 1: - # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted( - exterior_candidates, - key=lambda x: abs(signed_area(exteriors[x], fast=True)), - )[0] - hole_exteriors[hole_i] = [ext_i] - - # separate out holes that are orphaned (not contained by any exterior) - orphan_holes = [] - for hole_i, exterior_candidates in list(hole_exteriors.items()): - if not exterior_candidates: - orphan_holes.append(hole_i) - del hole_exteriors[hole_i] - continue - - # each hole should now only belong to one exterior, group into exterior-holes polygons - polys = [] - for ext_i, ext in enumerate(exteriors): - poly = [ext] - # find relevant holes - poly_holes = [] - for hole_i, exterior_candidates in list(hole_exteriors.items()): - # hole is relevant if previously matched with this exterior - if exterior_candidates[0] == ext_i: - poly_holes.append(holes[hole_i]) - poly += poly_holes - polys.append(poly) - - # add orphan holes as exteriors - for hole_i in orphan_holes: - ext = holes[hole_i] - # add as single exterior without any holes - poly = [ext] - polys.append(poly) - - if orphan_holes and return_errors is not None: - return_errors["polygon_orphaned_holes"] = len(orphan_holes) - - return polys - - # no exteriors, be nice and assume due to incorrect winding order - else: - if return_errors is not None: - return_errors["polygon_only_holes"] = len(holes) - exteriors = holes - # add as single exterior without any holes - polys = [[ext] for ext in exteriors] - return polys - - -class Shape(object): - def __init__( - self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None - ): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example vertices in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. - """ - self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] - if partTypes: - self.partTypes = partTypes - - # and a dict to silently record any errors encountered - self._errors = {} - - # add oid - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - - @property - def __geo_interface__(self): - if self.shapeType in [POINT, POINTM, POINTZ]: - # point - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Point", "coordinates": tuple()} - else: - return {"type": "Point", "coordinates": tuple(self.points[0])} - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "MultiPoint", "coordinates": []} - else: - # multipoint - return { - "type": "MultiPoint", - "coordinates": [tuple(p) for p in self.points], - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "LineString", "coordinates": []} - elif len(self.parts) == 1: - # linestring - return { - "type": "LineString", - "coordinates": [tuple(p) for p in self.points], - } - else: - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps is None: - ps = part - continue - else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) - ps = part - else: - coordinates.append([tuple(p) for p in self.points[part:]]) - return {"type": "MultiLineString", "coordinates": coordinates} - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {"type": "Polygon", "coordinates": []} - else: - # get all polygon rings - rings = [] - for i in xrange(len(self.parts)): - # get indexes of start and end points of the ring - start = self.parts[i] - try: - end = self.parts[i + 1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( - self.oid - ) - orphans = self._errors.get("polygon_orphaned_holes", None) - if orphans: - msg = ( - header - + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ -but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ -orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - only_holes = self._errors.get("polygon_only_holes", None) - if only_holes: - msg = ( - header - + "Shapefile format requires that polygons contain at least one exterior ring, \ -but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes." - ) - logger.warning(msg) - - # return as geojson - if len(polys) == 1: - return {"type": "Polygon", "coordinates": polys[0]} - else: - return {"type": "MultiPolygon", "coordinates": polys} - - else: - raise Exception( - 'Shape type "%s" cannot be represented as GeoJSON.' - % SHAPETYPE_LOOKUP[self.shapeType] - ) - - @staticmethod - def _from_geojson(geoj): - # create empty shape - shape = Shape() - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON - else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) - shape.shapeType = shapeType - - # set points and parts - if geojType == "Point": - shape.points = [geoj["coordinates"]] - shape.parts = [0] - elif geojType in ("MultiPoint", "LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon"): - points = [] - parts = [] - index = 0 - for i, ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString"): - points = [] - parts = [] - index = 0 - for linestring in geoj["coordinates"]: - points.extend(linestring) - parts.append(index) - index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon"): - points = [] - parts = [] - index = 0 - for polygon in geoj["coordinates"]: - for i, ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape - - @property - def oid(self): - """The index position of the shape in the original shapefile""" - return self.__oid - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def __repr__(self): - return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) - - -class _Record(list): - """ - A class to hold a record. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, the values of the record - can also be retrieved using the field's name. For example if the dbf contains - a field ID at position 0, the ID can be retrieved with the position, the field name - as a key, or the field name as an attribute. - - >>> # Create a Record with one field, normally the record is created by the Reader class - >>> r = _Record({'ID': 0}, [0]) - >>> print(r[0]) - >>> print(r['ID']) - >>> print(r.ID) - """ - - def __init__(self, field_positions, values, oid=None): - """ - A Record should be created by the Reader class - - :param field_positions: A dict mapping field names to field positions - :param values: A sequence of values - :param oid: The object id, an int (optional) - """ - self.__field_positions = field_positions - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - list.__init__(self, values) - - def __getattr__(self, item): - """ - __getattr__ is called if an attribute is used that does - not exist in the normal sense. For example r=Record(...), r.ID - calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) - :param item: The field name, used as attribute - :return: Value of the field - :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's - corresponding value in the Record does not exist - """ - try: - if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() - raise AttributeError("_Record does not implement __setstate__") - index = self.__field_positions[item] - return list.__getitem__(self, index) - except KeyError: - raise AttributeError("{} is not a field name".format(item)) - except IndexError: - raise IndexError( - "{} found as a field but not enough values available.".format(item) - ) - - def __setattr__(self, key, value): - """ - Sets a value of a field attribute - :param key: The field name - :param value: the value of that field - :return: None - :raises: AttributeError, if key is not a field of the shapefile - """ - if key.startswith("_"): # Prevent infinite loop when setting mangled attribute - return list.__setattr__(self, key, value) - try: - index = self.__field_positions[key] - return list.__setitem__(self, index, value) - except KeyError: - raise AttributeError("{} is not a field name".format(key)) - - def __getitem__(self, item): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID'], r[0] - :param item: Either the position of the value or the name of a field - :return: the value of the field - """ - try: - return list.__getitem__(self, item) - except TypeError: - try: - index = self.__field_positions[item] - except KeyError: - index = None - if index is not None: - return list.__getitem__(self, index) - else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) - - def __setitem__(self, key, value): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID']=2, r[0]=2 - :param key: Either the position of the value or the name of a field - :param value: the new value of the field - """ - try: - return list.__setitem__(self, key, value) - except TypeError: - index = self.__field_positions.get(key) - if index is not None: - return list.__setitem__(self, index, value) - else: - raise IndexError("{} is not a field name and not an int".format(key)) - - @property - def oid(self): - """The index position of the record in the original shapefile""" - return self.__oid - - def as_dict(self, date_strings=False): - """ - Returns this Record as a dictionary using the field names as keys - :return: dict - """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) - if date_strings: - for k, v in dct.items(): - if isinstance(v, date): - dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) - return dct - - def __repr__(self): - return "Record #{}: {}".format(self.__oid, list(self)) - - def __dir__(self): - """ - Helps to show the field names in an interactive environment like IPython. - See: http://ipython.readthedocs.io/en/stable/config/integrating.html - - :return: List of method names and fields - """ - default = list( - dir(type(self)) - ) # default list methods and attributes of this class - fnames = list( - self.__field_positions.keys() - ) # plus field names (random order if Python version < 3.6) - return default + fnames - - def __eq__(self, other): - if isinstance(other, self.__class__): - if self.__field_positions != other.__field_positions: - return False - return list.__eq__(self, other) - - -class ShapeRecord(object): - """A ShapeRecord object containing a shape along with its attributes. - Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" - - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - - @property - def __geo_interface__(self): - return { - "type": "Feature", - "properties": self.record.as_dict(date_strings=True), - "geometry": None - if self.shape.shapeType == NULL - else self.shape.__geo_interface__, - } - - -class Shapes(list): - """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a GeometryCollection dictionary.""" - - def __repr__(self): - return "Shapes: {}".format(list(self)) - - @property - def __geo_interface__(self): - # Note: currently this will fail if any of the shapes are null-geometries - # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = { - "type": "GeometryCollection", - "geometries": [shape.__geo_interface__ for shape in self], - } - return collection - - -class ShapeRecords(list): - """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a FeatureCollection dictionary.""" - - def __repr__(self): - return "ShapeRecords: {}".format(list(self)) - - @property - def __geo_interface__(self): - collection = { - "type": "FeatureCollection", - "features": [shaperec.__geo_interface__ for shaperec in self], - } - return collection - - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - - pass - - -class Reader(object): - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self._files_to_close = [] - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.numShapes = None - self.fields = [] - self.__dbfHdrLength = 0 - self.__fieldLookup = {} - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") - # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) - if is_string(path): - if ".zip" in path: - # Shapefile is inside a zipfile - if path.count(".zip") > 1: - # Multiple nested zipfiles - raise ShapefileException( - "Reading from multiple nested zipfiles is not supported: %s" - % path - ) - # Split into zipfile and shapefile paths - if path.endswith(".zip"): - zpath = path - shapefile = None - else: - zpath = path[: path.find(".zip") + 4] - shapefile = path[path.find(".zip") + 4 + 1 :] - # Create a zip file handle - if zpath.startswith("http"): - # Zipfile is from a url - # Download to a temporary url and treat as normal zipfile - req = Request( - zpath, - headers={ - "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" - }, - ) - resp = urlopen(req) - # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile( - mode="w+b", suffix=".zip", delete=True - ) - zipfileobj.write(resp.read()) - zipfileobj.seek(0) - else: - # Zipfile is from a file - zipfileobj = open(zpath, mode="rb") - # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, "r") as archive: - if not shapefile: - # Only the zipfile path is given - # Inspect zipfile contents to find the full shapefile path - shapefiles = [ - name - for name in archive.namelist() - if (name.endswith(".SHP") or name.endswith(".shp")) - ] - # The zipfile must contain exactly one shapefile - if len(shapefiles) == 0: - raise ShapefileException( - "Zipfile does not contain any shapefiles" - ) - elif len(shapefiles) == 1: - shapefile = shapefiles[0] - else: - raise ShapefileException( - "Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open." - % shapefiles - ) - # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[ - 0 - ] # root shapefile name - for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: - try: - member = archive.open(shapefile + "." + ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext.lower(), fileobj) - self._files_to_close.append(fileobj) - except: - pass - # Close and delete the temporary zipfile - try: - zipfileobj.close() - except: - pass - # Try to load shapefile - if self.shp or self.dbf: - # Load and exit early - self.load() - return - else: - raise ShapefileException( - "No shp or dbf file found in zipfile: %s" % path - ) - - elif path.startswith("http"): - # Shapefile is from a url - # Download each file to temporary path and treat as normal shapefile path - urlinfo = urlparse(path) - urlpath = urlinfo[2] - urlpath, _ = os.path.splitext(urlpath) - shapefile = os.path.basename(urlpath) - for ext in ["shp", "shx", "dbf"]: - try: - _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + "." + ext - _path = urlunparse(_urlinfo) - req = Request( - _path, - headers={ - "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" - }, - ) - resp = urlopen(req) - # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile( - mode="w+b", delete=True - ) - fileobj.write(resp.read()) - fileobj.seek(0) - setattr(self, ext, fileobj) - self._files_to_close.append(fileobj) - except HTTPError: - pass - if self.shp or self.dbf: - # Load and exit early - self.load() - return - else: - raise ShapefileException( - "No shp or dbf file found at url: %s" % path - ) - - else: - # Local file path to a shapefile - # Load and exit early - self.load(path) - return - - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs: - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs: - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) - - if "dbf" in kwargs: - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) - - # Load the files - if self.shp or self.dbf: - self.load() - - def __str__(self): - """ - Use some general info on the shapefile as __str__ - """ - info = ["shapefile Reader"] - if self.shp: - info.append( - " {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType] - ) - ) - if self.dbf: - info.append( - " {} records ({} fields)".format(len(self), len(self.fields)) - ) - return "\n".join(info) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, close opened files. - """ - self.close() - - def __len__(self): - """Returns the number of shapes/records in the shapefile.""" - if self.dbf: - # Preferably use dbf record count - if self.numRecords is None: - self.__dbfHeader() - - return self.numRecords - - elif self.shp: - # Otherwise use shape count - if self.shx: - if self.numShapes is None: - self.__shxHeader() - - return self.numShapes - - else: - # Index file not available, iterate all shapes to get total count - if self.numShapes is None: - # Determine length of shp file - shp = self.shp - checkpoint = shp.tell() - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until end of file. - unpack = Struct(">2i").unpack - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - pos += 8 + (2 * recLength) - shp.seek(pos) - # Set numShapes and offset indices - self.numShapes = len(offsets) - self._offsets = offsets - # Return to previous file position - shp.seek(checkpoint) - - return self.numShapes - - else: - # No file loaded yet, treat as 'empty' shapefile - return 0 - - def __iter__(self): - """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec - - @property - def __geo_interface__(self): - shaperecords = self.shapeRecords() - fcollection = shaperecords.__geo_interface__ - fcollection["bbox"] = list(self.bbox) - return fcollection - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file name as an argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - self.load_shp(shapeName) - self.load_shx(shapeName) - self.load_dbf(shapeName) - if not (self.shp or self.dbf): - raise ShapefileException( - "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) - ) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - if self.shx: - self.__shxHeader() - - def load_shp(self, shapefile_name): - """ - Attempts to load file with .shp extension as both lower and upper case - """ - shp_ext = "shp" - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") - self._files_to_close.append(self.shp) - except IOError: - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") - self._files_to_close.append(self.shp) - except IOError: - pass - - def load_shx(self, shapefile_name): - """ - Attempts to load file with .shx extension as both lower and upper case - """ - shx_ext = "shx" - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") - self._files_to_close.append(self.shx) - except IOError: - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") - self._files_to_close.append(self.shx) - except IOError: - pass - - def load_dbf(self, shapefile_name): - """ - Attempts to load file with .dbf extension as both lower and upper case - """ - dbf_ext = "dbf" - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") - self._files_to_close.append(self.dbf) - except IOError: - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") - self._files_to_close.append(self.dbf) - except IOError: - pass - - def __del__(self): - self.close() - - def close(self): - # Close any files that the reader opened (but not those given by user) - for attribute in self._files_to_close: - if hasattr(attribute, "close"): - try: - attribute.close() - except IOError: - pass - self._files_to_close = [] - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object." - ) - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError( - "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) - ) - if i < 0: - i = range(self.numRecords)[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp file.""" - if not self.shp: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shp file found" - ) - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType = unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) - - def __shape(self, oid=None, bbox=None): - """Returns the header info and geometry for a single shape.""" - f = self.__getFileObj(self.shp) - record = Shape(oid=oid) - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - (mmin, mmax) = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - # Read a single point - if shapeType in (1, 11, 21): - record.points = [_Array("d", unpack("<2d", f.read(16)))] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - point_bbox = list(record.points[0] + record.points[0]) - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, point_bbox): - f.seek(next) - return None - # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = [m] - else: - record.m = [None] - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - f.seek(next) - return record - - def __shxHeader(self): - """Reads the header information from a .shx file.""" - shx = self.shx - if not shx: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shx file found" - ) - # File length (16-bit word * 2 = bytes) - header length - shx.seek(24) - shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 - self.numShapes = shxRecordLength // 8 - - def __shxOffsets(self): - """Reads the shape offset positions from a .shx file""" - shx = self.shx - if not shx: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no shx file found" - ) - # Jump to the first record. - shx.seek(100) - # Each index record consists of two nrs, we only want the first one - shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) - if sys.byteorder != "big": - shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] - - def __shapeIndex(self, i=None): - """Returns the offset in a .shp file for a shape based on information - in the .shx index file.""" - shx = self.shx - # Return None if no shx or no index requested - if not shx or i is None: - return None - # At this point, we know the shx file exists - if not self._offsets: - self.__shxOffsets() - return self._offsets[i] - - def shape(self, i=0, bbox=None): - """Returns a shape object for a shape in the geometry - record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. - """ - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available. - # Determine length of shp file - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until the requested index or end of file. - unpack = Struct(">2i").unpack - _i = 0 - offset = shp.tell() - while offset < shpLength: - if _i == i: - # Reached the requested index, exit loop with the offset value - break - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - offset += 8 + (2 * recLength) - shp.seek(offset) - _i += 1 - # If the index was not found, it likely means the .shp file is incomplete - if _i != i: - raise ShapefileException( - "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( - i, _i - ) - ) - - # Seek to the offset and read the shape - shp.seek(offset) - return self.__shape(oid=i, bbox=bbox) - - def shapes(self, bbox=None): - """Returns all shapes in a shapefile. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shapes = Shapes() - shapes.extend(self.iterShapes(bbox=bbox)) - return shapes - - def iterShapes(self, bbox=None): - """Returns a generator of shapes in a shapefile. Useful - for handling large shapefiles. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0, 2) - shpLength = shp.tell() - shp.seek(100) - - if self.numShapes: - # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? - shape = self.__shape(oid=i, bbox=bbox) - if shape: - yield shape - else: - # No shx file, unknown nr of shapes - # Instead iterate until reach end of file - # Collect the offset indices during iteration - i = 0 - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - shape = self.__shape(oid=i, bbox=bbox) - pos = shp.tell() - if shape: - yield shape - i += 1 - # Entire shp file consumed - # Update the number of shapes and list of offsets - assert i == len(offsets) - self.numShapes = i - self._offsets = offsets - - def __dbfHeader(self): - """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - if not self.dbf: - raise ShapefileException( - "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" - ) - dbf = self.dbf - # read relevant header parts - dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( - " self.numRecords: - raise IndexError( - "abs(stop): %s exceeds number of records: %s." - % (abs(stop), self.numRecords) - ) - elif stop < 0: - stop = range(self.numRecords)[stop] - recSize = self.__recordLength - f.seek(self.__dbfHdrLength + (start * recSize)) - fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(start, stop): - r = self.__record( - oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct - ) - if r: - yield r - - def shapeRecord(self, i=0, fields=None, bbox=None): - """Returns a combination geometry and attribute record for the - supplied record index. - To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. - """ - i = self.__restrictIndex(i) - shape = self.shape(i, bbox=bbox) - if shape: - record = self.record(i, fields=fields) - return ShapeRecord(shape=shape, record=record) - - def shapeRecords(self, fields=None, bbox=None): - """Returns a list of combination geometry/attribute records for - all records in a shapefile. - To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) - - def iterShapeRecords(self, fields=None, bbox=None): - """Returns a generator of combination geometry/attribute records for - all records in a shapefile. - To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - if bbox is None: - # iterate through all shapes and records - for shape, record in izip( - self.iterShapes(), self.iterRecords(fields=fields) - ): - yield ShapeRecord(shape=shape, record=record) - else: - # only iterate where shape.bbox overlaps with the given bbox - # TODO: internal __record method should be faster but would have to - # make sure to seek to correct file location... - - # fieldTuples,recLookup,recStruct = self.__recordFields(fields) - for shape in self.iterShapes(bbox=bbox): - if shape: - # record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) - record = self.record(i=shape.oid, fields=fields) - yield ShapeRecord(shape=shape, record=record) - - -class Writer(object): - """Provides write support for ESRI Shapefiles.""" - - def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): - self.target = target - self.autoBalance = autoBalance - self.fields = [] - self.shapeType = shapeType - self.shp = self.shx = self.dbf = None - self._files_to_close = [] - if target: - target = pathlike_obj(target) - if not is_string(target): - raise Exception( - "The target filepath {} must be of type str/unicode or path-like, not {}.".format( - repr(target), type(target) - ) - ) - self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") - self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") - self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") - elif kwargs.get("shp") or kwargs.get("shx") or kwargs.get("dbf"): - shp, shx, dbf = kwargs.get("shp"), kwargs.get("shx"), kwargs.get("dbf") - if shp: - self.shp = self.__getFileObj(shp) - if shx: - self.shx = self.__getFileObj(shx) - if dbf: - self.dbf = self.__getFileObj(dbf) - else: - raise Exception( - "Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile." - ) - # Initiate with empty headers, to be finalized upon closing - if self.shp: - self.shp.write(b"9" * 100) - if self.shx: - self.shx.write(b"9" * 100) - # Geometry record offsets and lengths for writing shx file. - self.recNum = 0 - self.shpNum = 0 - self._bbox = None - self._zbox = None - self._mbox = None - # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. - self.deletionFlag = 0 - # Encoding - self.encoding = kwargs.pop("encoding", "utf-8") - self.encodingErrors = kwargs.pop("encodingErrors", "strict") - - def __len__(self): - """Returns the current number of features written to the shapefile. - If shapes and records are unbalanced, the length is considered the highest - of the two.""" - return max(self.recNum, self.shpNum) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, finish writing and close the files. - """ - self.close() - - def __del__(self): - self.close() - - def close(self): - """ - Write final shp, shx, and dbf headers, close opened files. - """ - # Check if any of the files have already been closed - shp_open = self.shp and not (hasattr(self.shp, "closed") and self.shp.closed) - shx_open = self.shx and not (hasattr(self.shx, "closed") and self.shx.closed) - dbf_open = self.dbf and not (hasattr(self.dbf, "closed") and self.dbf.closed) - - # Balance if already not balanced - if self.shp and shp_open and self.dbf and dbf_open: - if self.autoBalance: - self.balance() - if self.recNum != self.shpNum: - raise ShapefileException( - "When saving both the dbf and shp file, " - "the number of records (%s) must correspond " - "with the number of shapes (%s)" % (self.recNum, self.shpNum) - ) - # Fill in the blank headers - if self.shp and shp_open: - self.__shapefileHeader(self.shp, headerType="shp") - if self.shx and shx_open: - self.__shapefileHeader(self.shx, headerType="shx") - - # Update the dbf header with final length etc - if self.dbf and dbf_open: - self.__dbfHeader() - - # Flush files - for attribute in (self.shp, self.shx, self.dbf): - if hasattr(attribute, "flush") and not ( - hasattr(attribute, "closed") and attribute.closed - ): - try: - attribute.flush() - except IOError: - pass - - # Close any files that the writer opened (but not those given by user) - for attribute in self._files_to_close: - if hasattr(attribute, "close"): - try: - attribute.close() - except IOError: - pass - self._files_to_close = [] - - def __getFileObj(self, f): - """Safety handler to verify file-like objects""" - if not f: - raise ShapefileException("No file-like object available.") - elif hasattr(f, "write"): - return f - else: - pth = os.path.split(f)[0] - if pth and not os.path.exists(pth): - os.makedirs(pth) - fp = open(f, "wb+") - self._files_to_close.append(fp) - return fp - - def __shpFileLength(self): - """Calculates the file length of the shp file.""" - # Remember starting position - start = self.shp.tell() - # Calculate size of all shapes - self.shp.seek(0, 2) - size = self.shp.tell() - # Calculate size as 16-bit words - size //= 2 - # Return to start - self.shp.seek(start) - return size - - def __bbox(self, s): - x = [] - y = [] - if len(s.points) > 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise Exception( - "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." - % s.shapeType - ) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [ - min(bbox[0], self._bbox[0]), - min(bbox[1], self._bbox[1]), - max(bbox[2], self._bbox[2]), - max(bbox[3], self._bbox[3]), - ] - else: - # first time bbox is being set - self._bbox = bbox - return bbox - - def __zbox(self, s): - z = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = [min(z), max(z)] - # update global - if self._zbox: - # compare with existing - self._zbox = [min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])] - else: - # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s): - mpos = 3 if s.shapeType in (11, 13, 15, 18, 31) else 2 - m = [] - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: - # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = [min(m), max(m)] - # update global - if self._mbox: - # compare with existing - self._mbox = [min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])] - else: - # first time mbox is being set - self._mbox = mbox - return mbox - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def bbox(self): - """Returns the current bounding box for the shapefile which is - the lower-left and upper-right corners. It does not contain the - elevation or measure extremes.""" - return self._bbox - - def zbox(self): - """Returns the current z extremes for the shapefile.""" - return self._zbox - - def mbox(self): - """Returns the current m extremes for the shapefile.""" - return self._mbox - - def __shapefileHeader(self, fileObj, headerType="shp"): - """Writes the specified header type to the specified file-like object. - Several of the shapefile formats are so similar that a single generic - method to read or write them is warranted.""" - f = self.__getFileObj(fileObj) - f.seek(0) - # File code, Unused bytes - f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == "shp": - f.write(pack(">i", self.__shpFileLength())) - elif headerType == "shx": - f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) - # Version, Shape type - if self.shapeType is None: - self.shapeType = NULL - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - bbox = self.bbox() - if bbox is None: - # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. - # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. - # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. - # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0, 0, 0, 0] - f.write(pack("<4d", *bbox)) - except error: - raise ShapefileException( - "Failed to write shapefile bounding box. Floats required." - ) - else: - f.write(pack("<4d", 0, 0, 0, 0)) - # Elevation - if self.shapeType in (11, 13, 15, 18): - # Z values are present in Z type - zbox = self.zbox() - if zbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0, 0] - else: - # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0, 0] - # Measure - if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): - # M values are present in M or Z type - mbox = self.mbox() - if mbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0, 0] - else: - # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0, 0] - # Try writing - try: - f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) - except error: - raise ShapefileException( - "Failed to write shapefile elevation and measure values. Floats required." - ) - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != "DeletionFlag"] - # Ensure has at least one field - if not fields: - raise ShapefileException( - "Shapefile dbf file must contain at least one field." - ) - numRecs = self.recNum - numFields = len(fields) - headerLength = numFields * 32 + 33 - if headerLength >= 65535: - raise ShapefileException( - "Shapefile dbf header length exceeds maximum length." - ) - recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack( - "2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception( - "The shape's type (%s) must match the type of the shapefile (%s)." - % (s.shapeType, self.shapeType) - ) - f.write(pack(" 2 else 0 for p in s.points] - f.write(pack("<%sd" % len(zs), *zs)) - except error: - raise ShapefileException( - "Failed to write elevation values for record %s. Expected floats." - % self.shpNum - ) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13, 15, 18, 23, 25, 28, 31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException( - "Failed to write measure extremes for record %s. Expected floats" - % self.shpNum - ) - try: - if hasattr(s, "m"): - # if m values are stored in attribute - # fmt: off - f.write( - pack( - "<%sd" % len(s.m), - *[m if m is not None else NODATA for m in s.m] - ) - ) - # fmt: on - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 - [ - f.write( - pack( - " mpos and p[mpos] is not None - else NODATA, - ) - ) - for p in s.points - ] - except error: - raise ShapefileException( - "Failed to write measure values for record %s. Expected floats" - % self.shpNum - ) - # Write a single point - if s.shapeType in (1, 11, 21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException( - "Failed to write point for record %s. Expected floats." - % self.shpNum - ) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack("i", length)) - f.seek(finish) - return offset, length - - def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException( - "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." - ) - f.write(pack(">i", length)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - field() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - # Balance if already not balanced - if self.autoBalance and self.recNum > self.shpNum: - self.balance() - - fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) - if recordList: - record = list(recordList) - while len(record) < fieldCount: - record.append("") - elif recordDict: - record = [] - for field in self.fields: - if field[0] == "DeletionFlag": - continue # ignore deletionflag field in case it was specified - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - else: - record.append("") # need empty value for missing dict entries - else: - # Blank fields for empty record - record = ["" for _ in range(fieldCount)] - self.__dbfRecord(record) - - def __dbfRecord(self, record): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b" ") - # begin - self.recNum += 1 - fields = ( - field for field in self.fields if field[0] != "DeletionFlag" - ) # ignore deletionflag field in case it was specified - for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write - fieldType = fieldType.upper() - size = int(size) - if fieldType in ("N", "F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*" * size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust( - size - ) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, ".%sf" % deci)[:size].rjust( - size - ) # caps the size if exceeds the field size - elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = "{:04d}{:02d}{:02d}".format( - value.year, value.month, value.day - ) - elif isinstance(value, list) and len(value) == 3: - value = "{:04d}{:02d}{:02d}".format(*value) - elif value in MISSING: - value = b"0" * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: - pass # value is already a date string - else: - raise ShapefileException( - "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." - ) - elif fieldType == "L": - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - value = b" " # missing is set to space - elif value in [True, 1]: - value = b"T" - elif value in [False, 0]: - value = b"F" - else: - value = b" " # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b( - value, "ascii", self.encodingErrors - ) # should be default ascii encoding - if len(value) != size: - raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." - % (len(value), fieldName, size) - ) - f.write(value) - - def balance(self): - """Adds corresponding empty attributes or null geometry records depending - on which type of record was created to make sure all three files - are in synch.""" - while self.recNum > self.shpNum: - self.null() - while self.recNum < self.shpNum: - self.record() - - def null(self): - """Creates a null shape.""" - self.shape(Shape(NULL)) - - def point(self, x, y): - """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) - pointShape.points.append([x, y]) - self.shape(pointShape) - - def pointm(self, x, y, m=None): - """Creates a POINTM shape. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) - pointShape.points.append([x, y, m]) - self.shape(pointShape) - - def pointz(self, x, y, z=0, m=None): - """Creates a POINTZ shape. - If the z (elevation) value is not set, it defaults to 0. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) - pointShape.points.append([x, y, z, m]) - self.shape(pointShape) - - def multipoint(self, points): - """Creates a MULTIPOINT shape. - Points is a list of xy values.""" - shapeType = MULTIPOINT - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointm(self, points): - """Creates a MULTIPOINTM shape. - Points is a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointz(self, points): - """Creates a MULTIPOINTZ shape. - Points is a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ - points = [ - points - ] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def line(self, lines): - """Creates a POLYLINE shape. - Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) - - def linem(self, lines): - """Creates a POLYLINEM shape. - Lines is a collection of lines, each made up of a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) - - def linez(self, lines): - """Creates a POLYLINEZ shape. - Lines is a collection of lines, each made up of a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) - - def poly(self, polys): - """Creates a POLYGON shape. - Polys is a collection of polygons, each made up of a list of xy values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) - - def polym(self, polys): - """Creates a POLYGONM shape. - Polys is a collection of polygons, each made up of a list of xym values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) - - def polyz(self, polys): - """Creates a POLYGONZ shape. - Polys is a collection of polygons, each made up of a list of xyzm values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) - - def multipatch(self, parts, partTypes): - """Creates a MULTIPATCH shape. - Parts is a collection of 3D surface patches, each made up of a list of xyzm values. - PartTypes is a list of types that define each of the surface patches. - The types can be any of the following module constants: TRIANGLE_STRIP, - TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - def _shapeparts(self, parts, shapeType): - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - # Make sure polygon rings (parts) are closed - if shapeType in (5, 15, 25, 31): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - # Add points and part indexes - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - # write the shape - self.shape(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = "8" - decimal = 0 - elif fieldType == "L": - size = "1" - decimal = 0 - if len(self.fields) >= 2046: - raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046." - ) - self.fields.append((name, fieldType, size, decimal)) - - -# Begin Testing -def _get_doctests(): - import doctest - - doctest.NORMALIZE_WHITESPACE = 1 - - # run tests - with open("README.md", "rb") as fobj: - tests = doctest.DocTestParser().get_doctest( - string=fobj.read().decode("utf8").replace("\r\n", "\n"), - globs={}, - name="README", - filename="README.md", - lineno=0, - ) - - return tests - - -def _filter_network_doctests(examples, include_network=False, include_non_network=True): - globals_from_network_doctests = set() - - if not (include_network or include_non_network): - return - - examples_it = iter(examples) - - yield next(examples_it) - - for example in examples_it: - # Track variables in doctest shell sessions defined from commands - # that poll remote URLs, to skip subsequent commands until all - # such dependent variables are reassigned. - - if 'sf = shapefile.Reader("https://' in example.source: - globals_from_network_doctests.add("sf") - if include_network: - yield example - continue - - lhs = example.source.partition("=")[0] - - for target in lhs.split(","): - target = target.strip() - if target in globals_from_network_doctests: - globals_from_network_doctests.remove(target) - - # Non-network tests dependent on the network tests. - if globals_from_network_doctests: - if include_network: - yield example - continue - - if not include_non_network: - continue - - yield example - - -def _replace_remote_url( - old_url, - # Default port of Python http.server and Python 2's SimpleHttpServer - port=8000, - scheme="http", - netloc="localhost", - path=None, - params="", - query="", - fragment="", -): - old_parsed = urlparse(old_url) - - # Strip subpaths, so an artefacts - # repo or file tree can be simpler and flat - if path is None: - path = old_parsed.path.rpartition("/")[2] - - if port not in (None, ""): - netloc = "%s:%s" % (netloc, port) - - new_parsed = old_parsed._replace( - scheme=scheme, - netloc=netloc, - path=path, - params=params, - query=query, - fragment=fragment, - ) - - new_url = urlunparse(new_parsed) if PYTHON3 else urlunparse(list(new_parsed)) - return new_url - - -def _test(args=sys.argv[1:], verbosity=0): - if verbosity == 0: - print("Getting doctests...") - - import doctest - import re - - doctest.NORMALIZE_WHITESPACE = 1 - - tests = _get_doctests() - - if len(args) >= 2 and args[0] == "-m": - if verbosity == 0: - print("Filtering doctests...") - tests.examples = list( - _filter_network_doctests( - tests.examples, - include_network=args[1] == "network", - include_non_network=args[1] == "not network", - ) - ) - - if REPLACE_REMOTE_URLS_WITH_LOCALHOST: - if verbosity == 0: - print("Replacing remote urls with http://localhost in doctests...") - - for example in tests.examples: - match_url_str_literal = re.search(r'"(https://.*)"', example.source) - if not match_url_str_literal: - continue - old_url = match_url_str_literal.group(1) - new_url = _replace_remote_url(old_url) - example.source = example.source.replace(old_url, new_url) - - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - - def summarize(self): - doctest.OutputChecker.summarize(True) - - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - - if verbosity == 0: - print("Running %s doctests..." % len(tests.examples)) - failure_count, test_count = runner.run(tests) - - # print results - if verbosity: - runner.summarize(True) - else: - if failure_count == 0: - print("All test passed successfully") - elif failure_count > 0: - runner.summarize(verbosity) - - return failure_count - - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = _test() - sys.exit(failure_count) diff --git a/shapefiles/test/MyPolyZ.dbf b/shapefiles/test/MyPolyZ.dbf deleted file mode 100644 index 54a50dbb..00000000 Binary files a/shapefiles/test/MyPolyZ.dbf and /dev/null differ diff --git a/shapefiles/test/MyPolyZ.shp b/shapefiles/test/MyPolyZ.shp deleted file mode 100644 index 1651f327..00000000 Binary files a/shapefiles/test/MyPolyZ.shp and /dev/null differ diff --git a/shapefiles/test/MyPolyZ.shx b/shapefiles/test/MyPolyZ.shx deleted file mode 100644 index 62de3220..00000000 Binary files a/shapefiles/test/MyPolyZ.shx and /dev/null differ diff --git a/shapefiles/test/NullTest.dbf b/shapefiles/test/NullTest.dbf deleted file mode 100644 index 84f8aa63..00000000 Binary files a/shapefiles/test/NullTest.dbf and /dev/null differ diff --git a/shapefiles/test/NullTest.shp b/shapefiles/test/NullTest.shp deleted file mode 100644 index 216fedaa..00000000 Binary files a/shapefiles/test/NullTest.shp and /dev/null differ diff --git a/shapefiles/test/NullTest.shx b/shapefiles/test/NullTest.shx deleted file mode 100644 index b00b9a80..00000000 Binary files a/shapefiles/test/NullTest.shx and /dev/null differ diff --git a/shapefiles/test/balancing.dbf b/shapefiles/test/balancing.dbf deleted file mode 100644 index c77d63b3..00000000 Binary files a/shapefiles/test/balancing.dbf and /dev/null differ diff --git a/shapefiles/test/balancing.shp b/shapefiles/test/balancing.shp deleted file mode 100644 index 66af077e..00000000 Binary files a/shapefiles/test/balancing.shp and /dev/null differ diff --git a/shapefiles/test/balancing.shx b/shapefiles/test/balancing.shx deleted file mode 100644 index 2712a6ed..00000000 Binary files a/shapefiles/test/balancing.shx and /dev/null differ diff --git a/shapefiles/test/contextwriter.dbf b/shapefiles/test/contextwriter.dbf deleted file mode 100644 index e030c2a3..00000000 Binary files a/shapefiles/test/contextwriter.dbf and /dev/null differ diff --git a/shapefiles/test/contextwriter.shp b/shapefiles/test/contextwriter.shp deleted file mode 100644 index 45d2404b..00000000 Binary files a/shapefiles/test/contextwriter.shp and /dev/null differ diff --git a/shapefiles/test/contextwriter.shx b/shapefiles/test/contextwriter.shx deleted file mode 100644 index 45d2404b..00000000 Binary files a/shapefiles/test/contextwriter.shx and /dev/null differ diff --git a/shapefiles/test/corrupt_too_long.dbf b/shapefiles/test/corrupt_too_long.dbf deleted file mode 100644 index 57230c5d..00000000 Binary files a/shapefiles/test/corrupt_too_long.dbf and /dev/null differ diff --git a/shapefiles/test/corrupt_too_long.shp b/shapefiles/test/corrupt_too_long.shp deleted file mode 100644 index e9ef9cf1..00000000 Binary files a/shapefiles/test/corrupt_too_long.shp and /dev/null differ diff --git a/shapefiles/test/corrupt_too_long.shx b/shapefiles/test/corrupt_too_long.shx deleted file mode 100644 index 79f28708..00000000 Binary files a/shapefiles/test/corrupt_too_long.shx and /dev/null differ diff --git a/shapefiles/test/dtype.dbf b/shapefiles/test/dtype.dbf deleted file mode 100644 index 1ddda01f..00000000 Binary files a/shapefiles/test/dtype.dbf and /dev/null differ diff --git a/shapefiles/test/dtype.shp b/shapefiles/test/dtype.shp deleted file mode 100644 index 0b4ce4f7..00000000 Binary files a/shapefiles/test/dtype.shp and /dev/null differ diff --git a/shapefiles/test/dtype.shx b/shapefiles/test/dtype.shx deleted file mode 100644 index 3842d109..00000000 Binary files a/shapefiles/test/dtype.shx and /dev/null differ diff --git a/shapefiles/test/line.dbf b/shapefiles/test/line.dbf deleted file mode 100644 index 24f529e8..00000000 Binary files a/shapefiles/test/line.dbf and /dev/null differ diff --git a/shapefiles/test/line.shp b/shapefiles/test/line.shp deleted file mode 100644 index 1d232636..00000000 Binary files a/shapefiles/test/line.shp and /dev/null differ diff --git a/shapefiles/test/line.shx b/shapefiles/test/line.shx deleted file mode 100644 index 0b01cd75..00000000 Binary files a/shapefiles/test/line.shx and /dev/null differ diff --git a/shapefiles/test/linem.dbf b/shapefiles/test/linem.dbf deleted file mode 100644 index 28c6681e..00000000 Binary files a/shapefiles/test/linem.dbf and /dev/null differ diff --git a/shapefiles/test/linem.shp b/shapefiles/test/linem.shp deleted file mode 100644 index 6b02941d..00000000 Binary files a/shapefiles/test/linem.shp and /dev/null differ diff --git a/shapefiles/test/linem.shx b/shapefiles/test/linem.shx deleted file mode 100644 index 488d94b3..00000000 Binary files a/shapefiles/test/linem.shx and /dev/null differ diff --git a/shapefiles/test/linez.dbf b/shapefiles/test/linez.dbf deleted file mode 100644 index 6709eb02..00000000 Binary files a/shapefiles/test/linez.dbf and /dev/null differ diff --git a/shapefiles/test/linez.shp b/shapefiles/test/linez.shp deleted file mode 100644 index b0da6415..00000000 Binary files a/shapefiles/test/linez.shp and /dev/null differ diff --git a/shapefiles/test/linez.shx b/shapefiles/test/linez.shx deleted file mode 100644 index 4c59fef7..00000000 Binary files a/shapefiles/test/linez.shx and /dev/null differ diff --git a/shapefiles/test/multipatch.dbf b/shapefiles/test/multipatch.dbf deleted file mode 100644 index bc9fe23f..00000000 Binary files a/shapefiles/test/multipatch.dbf and /dev/null differ diff --git a/shapefiles/test/multipatch.shp b/shapefiles/test/multipatch.shp deleted file mode 100644 index f3d10408..00000000 Binary files a/shapefiles/test/multipatch.shp and /dev/null differ diff --git a/shapefiles/test/multipatch.shx b/shapefiles/test/multipatch.shx deleted file mode 100644 index 05984a7c..00000000 Binary files a/shapefiles/test/multipatch.shx and /dev/null differ diff --git a/shapefiles/test/multipoint.dbf b/shapefiles/test/multipoint.dbf deleted file mode 100644 index 4d7d4f17..00000000 Binary files a/shapefiles/test/multipoint.dbf and /dev/null differ diff --git a/shapefiles/test/multipoint.shp b/shapefiles/test/multipoint.shp deleted file mode 100644 index 13f6e46a..00000000 Binary files a/shapefiles/test/multipoint.shp and /dev/null differ diff --git a/shapefiles/test/multipoint.shx b/shapefiles/test/multipoint.shx deleted file mode 100644 index 5ebace7b..00000000 Binary files a/shapefiles/test/multipoint.shx and /dev/null differ diff --git a/shapefiles/test/onlydbf.dbf b/shapefiles/test/onlydbf.dbf deleted file mode 100644 index e030c2a3..00000000 Binary files a/shapefiles/test/onlydbf.dbf and /dev/null differ diff --git a/shapefiles/test/point.dbf b/shapefiles/test/point.dbf deleted file mode 100644 index e29d0859..00000000 Binary files a/shapefiles/test/point.dbf and /dev/null differ diff --git a/shapefiles/test/point.shp b/shapefiles/test/point.shp deleted file mode 100644 index 84295426..00000000 Binary files a/shapefiles/test/point.shp and /dev/null differ diff --git a/shapefiles/test/point.shx b/shapefiles/test/point.shx deleted file mode 100644 index fda6cae2..00000000 Binary files a/shapefiles/test/point.shx and /dev/null differ diff --git a/shapefiles/test/polygon.dbf b/shapefiles/test/polygon.dbf deleted file mode 100644 index b116dc46..00000000 Binary files a/shapefiles/test/polygon.dbf and /dev/null differ diff --git a/shapefiles/test/polygon.shp b/shapefiles/test/polygon.shp deleted file mode 100644 index c7654ebc..00000000 Binary files a/shapefiles/test/polygon.shp and /dev/null differ diff --git a/shapefiles/test/polygon.shx b/shapefiles/test/polygon.shx deleted file mode 100644 index df6e0155..00000000 Binary files a/shapefiles/test/polygon.shx and /dev/null differ diff --git a/shapefiles/test/shapetype.dbf b/shapefiles/test/shapetype.dbf deleted file mode 100644 index e030c2a3..00000000 Binary files a/shapefiles/test/shapetype.dbf and /dev/null differ diff --git a/shapefiles/test/shapetype.shp b/shapefiles/test/shapetype.shp deleted file mode 100644 index 29de63a1..00000000 Binary files a/shapefiles/test/shapetype.shp and /dev/null differ diff --git a/shapefiles/test/shapetype.shx b/shapefiles/test/shapetype.shx deleted file mode 100644 index 29de63a1..00000000 Binary files a/shapefiles/test/shapetype.shx and /dev/null differ diff --git a/shapefiles/test/testfile.dbf b/shapefiles/test/testfile.dbf deleted file mode 100644 index e030c2a3..00000000 Binary files a/shapefiles/test/testfile.dbf and /dev/null differ diff --git a/shapefiles/test/testfile.shp b/shapefiles/test/testfile.shp deleted file mode 100644 index 45d2404b..00000000 Binary files a/shapefiles/test/testfile.shp and /dev/null differ diff --git a/shapefiles/test/testfile.shx b/shapefiles/test/testfile.shx deleted file mode 100644 index 45d2404b..00000000 Binary files a/shapefiles/test/testfile.shx and /dev/null differ diff --git a/src/py.typed b/src/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/shapefile.py b/src/shapefile.py new file mode 100644 index 00000000..316db0cd --- /dev/null +++ b/src/shapefile.py @@ -0,0 +1,4056 @@ +""" +shapefile.py +Provides read and write support for ESRI Shapefiles. +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +Compatible with Python versions >=3.9 +""" + +from __future__ import annotations + +__version__ = "3.0.0-alpha" + +import array +import doctest +import io +import logging +import os +import sys +import tempfile +import time +import zipfile +from datetime import date +from struct import Struct, calcsize, error, pack, unpack +from typing import ( + IO, + Any, + Container, + Final, + Generic, + Iterable, + Iterator, + Literal, + NamedTuple, + NoReturn, + Optional, + Protocol, + Reversible, + Sequence, + TypedDict, + TypeVar, + Union, + cast, + overload, +) +from urllib.error import HTTPError +from urllib.parse import urlparse, urlunparse +from urllib.request import Request, urlopen + +# Create named logger +logger = logging.getLogger(__name__) + +doctest.NORMALIZE_WHITESPACE = 1 + +# Module settings +VERBOSE = True + +# Test config (for the Doctest runner and test_shapefile.py) +REPLACE_REMOTE_URLS_WITH_LOCALHOST = ( + os.getenv("REPLACE_REMOTE_URLS_WITH_LOCALHOST", "").lower() == "yes" +) + +# Constants for shape types +NULL = 0 +POINT = 1 +POLYLINE = 3 +POLYGON = 5 +MULTIPOINT = 8 +POINTZ = 11 +POLYLINEZ = 13 +POLYGONZ = 15 +MULTIPOINTZ = 18 +POINTM = 21 +POLYLINEM = 23 +POLYGONM = 25 +MULTIPOINTM = 28 +MULTIPATCH = 31 + +SHAPETYPE_LOOKUP = { + NULL: "NULL", + POINT: "POINT", + POLYLINE: "POLYLINE", + POLYGON: "POLYGON", + MULTIPOINT: "MULTIPOINT", + POINTZ: "POINTZ", + POLYLINEZ: "POLYLINEZ", + POLYGONZ: "POLYGONZ", + MULTIPOINTZ: "MULTIPOINTZ", + POINTM: "POINTM", + POLYLINEM: "POLYLINEM", + POLYGONM: "POLYGONM", + MULTIPOINTM: "MULTIPOINTM", + MULTIPATCH: "MULTIPATCH", +} + +SHAPETYPENUM_LOOKUP = {name: code for code, name in SHAPETYPE_LOOKUP.items()} + +TRIANGLE_STRIP = 0 +TRIANGLE_FAN = 1 +OUTER_RING = 2 +INNER_RING = 3 +FIRST_RING = 4 +RING = 5 + +PARTTYPE_LOOKUP = { + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} + +## Custom type variables + +T = TypeVar("T") +Point2D = tuple[float, float] +Point3D = tuple[float, float, float] +PointMT = tuple[float, float, Optional[float]] +PointZT = tuple[float, float, float, Optional[float]] + +Coord = Union[Point2D, Point3D] +Coords = list[Coord] + +PointT = Union[Point2D, PointMT, PointZT] +PointsT = list[PointT] + +BBox = tuple[float, float, float, float] +MBox = tuple[float, float] +ZBox = tuple[float, float] + + +class WriteableBinStream(Protocol): + def write(self, b: bytes): ... + + +class ReadableBinStream(Protocol): + def read(self, size: int = -1): ... + + +class WriteSeekableBinStream(Protocol): + def write(self, b: bytes): ... + def seek(self, offset: int, whence: int = 0): ... + def tell(self): ... + + +class ReadSeekableBinStream(Protocol): + def seek(self, offset: int, whence: int = 0): ... + def tell(self): ... + def read(self, size: int = -1): ... + + +class ReadWriteSeekableBinStream(Protocol): + def write(self, b: bytes): ... + def seek(self, offset: int, whence: int = 0): ... + def tell(self): ... + def read(self, size: int = -1): ... + + +# File name, file object or anything with a read() method that returns bytes. +BinaryFileT = Union[str, IO[bytes]] +BinaryFileStreamT = Union[IO[bytes], io.BytesIO, WriteSeekableBinStream] + +FieldTypeT = Literal["C", "D", "F", "L", "M", "N"] + + +# https://en.wikipedia.org/wiki/.dbf#Database_records +class FieldType: + """A bare bones 'enum', as the enum library noticeably slows performance.""" + + C: Final = "C" # "Character" # (str) + D: Final = "D" # "Date" + F: Final = "F" # "Floating point" + L: Final = "L" # "Logical" # (bool) + M: Final = "M" # "Memo" # Legacy. (10 digit str, starting block in an .dbt file) + N: Final = "N" # "Numeric" # (int) + __members__: set[FieldTypeT] = { + "C", + "D", + "F", + "L", + "M", + "N", + } + + +FIELD_TYPE_ALIASES: dict[Union[str, bytes], FieldTypeT] = {} +for c in FieldType.__members__: + FIELD_TYPE_ALIASES[c.upper()] = c + FIELD_TYPE_ALIASES[c.lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").lower()] = c + FIELD_TYPE_ALIASES[c.encode("ascii").upper()] = c + + +# Use functional syntax to have an attribute named type, a Python keyword +class Field(NamedTuple): + name: str + field_type: FieldTypeT + size: int + decimal: int + + @classmethod + def from_unchecked( + cls, + name: str, + field_type: Union[str, bytes, FieldTypeT] = "C", + size: int = 50, + decimal: int = 0, + ) -> Field: + try: + type_ = FIELD_TYPE_ALIASES[field_type] + except KeyError: + raise ShapefileException( + f"field_type must be in {{FieldType.__members__}}. Got: {field_type=}. " + ) + + if type_ is FieldType.D: + size = 8 + decimal = 0 + elif type_ is FieldType.L: + size = 1 + decimal = 0 + + # A doctest in README.md previously passed in a string ('40') for size, + # so explictly convert name to str, and size and decimal to ints. + return cls( + name=str(name), field_type=type_, size=int(size), decimal=int(decimal) + ) + + def __repr__(self) -> str: + return f'Field(name="{self.name}", field_type=FieldType.{self.field_type}, size={self.size}, decimal={self.decimal})' + + +RecordValueNotDate = Union[bool, int, float, str, date] + +# A Possible value in a Shapefile dbf record, i.e. L, N, M, F, C, or D types +RecordValue = Union[RecordValueNotDate, date] + + +class HasGeoInterface(Protocol): + @property + def __geo_interface__(self) -> Any: ... + + +class GeoJSONPoint(TypedDict): + type: Literal["Point"] + # We fix to a tuple (to statically check the length is 2, 3 or 4) but + # RFC7946 only requires: "A position is an array of numbers. There MUST be two or more + # elements. " + # RFC7946 also requires long/lat easting/northing which we do not enforce, + # and despite the SHOULD NOT, we may use a 4th element for Shapefile M Measures. + coordinates: Union[PointT, tuple[()]] + + +class GeoJSONMultiPoint(TypedDict): + type: Literal["MultiPoint"] + coordinates: PointsT + + +class GeoJSONLineString(TypedDict): + type: Literal["LineString"] + # "Two or more positions" not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.4 + coordinates: PointsT + + +class GeoJSONMultiLineString(TypedDict): + type: Literal["MultiLineString"] + coordinates: list[PointsT] + + +class GeoJSONPolygon(TypedDict): + type: Literal["Polygon"] + # Other requirements for Polygon not enforced by type checker + # https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.6 + coordinates: list[PointsT] + + +class GeoJSONMultiPolygon(TypedDict): + type: Literal["MultiPolygon"] + coordinates: list[list[PointsT]] + + +GeoJSONHomogeneousGeometryObject = Union[ + GeoJSONPoint, + GeoJSONMultiPoint, + GeoJSONLineString, + GeoJSONMultiLineString, + GeoJSONPolygon, + GeoJSONMultiPolygon, +] + +GEOJSON_TO_SHAPETYPE: dict[str, int] = { + "Null": NULL, + "Point": POINT, + "LineString": POLYLINE, + "Polygon": POLYGON, + "MultiPoint": MULTIPOINT, + "MultiLineString": POLYLINE, + "MultiPolygon": POLYGON, +} + + +class GeoJSONGeometryCollection(TypedDict): + type: Literal["GeometryCollection"] + geometries: list[GeoJSONHomogeneousGeometryObject] + + +# RFC7946 3.1 +GeoJSONObject = Union[GeoJSONHomogeneousGeometryObject, GeoJSONGeometryCollection] + + +class GeoJSONFeature(TypedDict): + type: Literal["Feature"] + properties: Optional[ + dict[str, Any] + ] # RFC7946 3.2 "(any JSON object or a JSON null value)" + geometry: Optional[GeoJSONObject] + + +class GeoJSONFeatureCollection(TypedDict): + type: Literal["FeatureCollection"] + features: list[GeoJSONFeature] + + +class GeoJSONFeatureCollectionWithBBox(GeoJSONFeatureCollection): + # bbox is technically optional under the spec + bbox: list[float] + + +# Helpers + +MISSING = (None, "") # Don't make a set, as user input may not be Hashable +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + +unpack_2_int32_be = Struct(">2i").unpack + + +@overload +def fsdecode_if_pathlike(path: os.PathLike) -> str: ... +@overload +def fsdecode_if_pathlike(path: T) -> T: ... +def fsdecode_if_pathlike(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) # str + + return path + + +# Begin + + +class _Array(array.array, Generic[T]): + """Converts python tuples to lists of the appropriate type. + Used to unpack different shapefile header parts.""" + + def __repr__(self): + return str(self.tolist()) + + +def signed_area( + coords: PointsT, + fast: bool = False, +) -> float: + """Return the signed area enclosed by a ring using the linear time + algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. + """ + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs.append(xs[1]) + ys.append(ys[1]) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) + if fast: + return area2 + + return area2 / 2.0 + + +def is_cw(coords: PointsT) -> bool: + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + + +def rewind(coords: Reversible[PointT]) -> PointsT: + """Returns the input coords in reversed order.""" + return list(reversed(coords)) + + +def ring_bbox(coords: PointsT) -> BBox: + """Calculates and returns the bounding box of a ring.""" + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + # bbox = BBox(xmin=min(xs), ymin=min(ys), xmax=max(xs), ymax=max(ys)) + # return bbox + return min(xs), min(ys), max(xs), max(ys) + + +def bbox_overlap(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether two bounding boxes overlap.""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmin2 <= xmax1 and ymin1 <= ymax2 and ymin2 <= ymax1 + return overlap + + +def bbox_contains(bbox1: BBox, bbox2: BBox) -> bool: + """Tests whether bbox1 fully contains bbox2.""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax2 < xmax1 and ymin1 < ymin2 and ymax2 < ymax1 + return contains + + +def ring_contains_point(coords: PointsT, p: Point2D) -> bool: + """Fast point-in-polygon crossings algorithm, MacMartin optimization. + + Adapted from code by Eric Haynes + http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c + + Original description: + Shoot a test ray along +X axis. The strategy, from MacMartin, is to + compare vertex Y values to the testing point's Y and quickly discard + edges which are entirely to one side of the test ray. + """ + tx, ty = p + + # get initial test bit for above/below X axis + vtx0 = coords[0] + yflag0 = vtx0[1] >= ty + + inside_flag = False + for vtx1 in coords[1:]: + yflag1 = vtx1[1] >= ty + # check if endpoints straddle (are on opposite sides) of X axis + # (i.e. the Y's differ); if so, +X ray could intersect this edge. + if yflag0 != yflag1: + xflag0 = vtx0[0] >= tx + # check if endpoints are on same side of the Y axis (i.e. X's + # are the same); if so, it's easy to test if edge hits or misses. + if xflag0 == (vtx1[0] >= tx): + # if edge's X values both right of the point, must hit + if xflag0: + inside_flag = not inside_flag + else: + # compute intersection of pgon segment with +X ray, note + # if >= point's X; if so, the ray hits it. + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: + inside_flag = not inside_flag + + # move to next pair of vertices, retaining info as possible + yflag0 = yflag1 + vtx0 = vtx1 + + return inside_flag + + +class RingSamplingError(Exception): + pass + + +def ring_sample(coords: PointsT, ccw: bool = False) -> Point2D: + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + triplet = [] + + def itercoords(): + # iterate full closed ring + yield from coords + # finally, yield the second coordinate to the end to allow checking the last triplet + yield coords[1] + + for p in itercoords(): + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = not is_cw(closed_triplet) + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + raise RingSamplingError( + f"Unexpected error: Unable to find a ring sample point in: {coords}." + "Ensure the ring's coordinates are oriented clockwise, " + "and ensure the area enclosed is non-zero. " + ) + + +def ring_contains_ring(coords1: PointsT, coords2: list[PointT]) -> bool: + """Returns True if all vertexes in coords2 are fully inside coords1.""" + # Ignore Z and M values in coords2 + return all(ring_contains_point(coords1, p2[:2]) for p2 in coords2) + + +def organize_polygon_rings( + rings: Iterable[PointsT], return_errors: Optional[dict[str, int]] = None +) -> list[list[PointsT]]: + """Organize a list of coordinate rings into one or more polygons with holes. + Returns a list of polygons, where each polygon is composed of a single exterior + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. + + Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). + Rings are determined as exteriors if they run in clockwise direction, or interior + holes if they run in counter-clockwise direction. This method is used to construct + GeoJSON (multi)polygons from the shapefile polygon shape type, which does not + explicitly store the structure of the polygons beyond exterior/interior ring orientation. + """ + # first iterate rings and classify as exterior or hole + exteriors = [] + holes = [] + for ring in rings: + # shapefile format defines a polygon as a sequence of rings + # where exterior rings are clockwise, and holes counterclockwise + if is_cw(ring): + # ring is exterior + exteriors.append(ring) + else: + # ring is a hole + holes.append(ring) + + # if only one exterior, then all holes belong to that exterior + if len(exteriors) == 1: + # exit early + poly = [exteriors[0]] + holes + polys = [poly] + return polys + + # multiple exteriors, ie multi-polygon, have to group holes with correct exterior + # shapefile format does not specify which holes belong to which exteriors + # so have to do efficient multi-stage checking of hole-to-exterior containment + if len(exteriors) > 1: + # exit early if no holes + if not holes: + polys = [] + for ext in exteriors: + poly = [ext] + polys.append(poly) + return polys + + # first determine each hole's candidate exteriors based on simple bbox contains test + hole_exteriors: dict[int, list[int]] = { + hole_i: [] for hole_i in range(len(holes)) + } + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): + hole_bbox = ring_bbox(holes[hole_i]) + for ext_i, ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): + hole_exteriors[hole_i].append(ext_i) + + # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # get hole sample point + ccw = not is_cw(holes[hole_i]) + hole_sample = ring_sample(holes[hole_i], ccw=ccw) + # collect new exterior candidates + new_exterior_candidates = [] + for ext_i in exterior_candidates: + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) + if hole_in_exterior: + new_exterior_candidates.append(ext_i) + + # set new exterior candidates + hole_exteriors[hole_i] = new_exterior_candidates + + # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # exterior candidate with the smallest area is the hole's most immediate parent + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] + hole_exteriors[hole_i] = [ext_i] + + # separate out holes that are orphaned (not contained by any exterior) + orphan_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + if not exterior_candidates: + orphan_holes.append(hole_i) + del hole_exteriors[hole_i] + continue + + # each hole should now only belong to one exterior, group into exterior-holes polygons + polys = [] + for ext_i, ext in enumerate(exteriors): + poly = [ext] + # find relevant holes + poly_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + # hole is relevant if previously matched with this exterior + if exterior_candidates[0] == ext_i: + poly_holes.append(holes[hole_i]) + poly += poly_holes + polys.append(poly) + + # add orphan holes as exteriors + for hole_i in orphan_holes: + ext = holes[hole_i] + # add as single exterior without any holes + poly = [ext] + polys.append(poly) + + if orphan_holes and return_errors is not None: + return_errors["polygon_orphaned_holes"] = len(orphan_holes) + + return polys + + # no exteriors, be nice and assume due to incorrect winding order + if return_errors is not None: + return_errors["polygon_only_holes"] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys + + +class GeoJSON_Error(Exception): + pass + + +class _NoShapeTypeSentinel: + """For use as a default value for Shape.__init__ to + preserve old behaviour for anyone who explictly + called Shape(shapeType=None). + """ + + +def _m_from_point(point: Union[PointMT, PointZT], mpos: int) -> Optional[float]: + if len(point) > mpos and point[mpos] is not None: + return cast(float, point[mpos]) + return None + + +def _ms_from_points( + points: Union[list[PointMT], list[PointZT]], mpos: int +) -> Iterator[Optional[float]]: + return (_m_from_point(p, mpos) for p in points) + + +def _z_from_point(point: PointZT) -> float: + if len(point) >= 3 and point[2] is not None: + return point[2] + return 0.0 + + +def _zs_from_points(points: Iterable[PointZT]) -> Iterator[float]: + return (_z_from_point(p) for p in points) + + +class CanHaveBboxNoLinesKwargs(TypedDict, total=False): + oid: Optional[int] + points: Optional[PointsT] + parts: Optional[Sequence[int]] # index of start point of each part + partTypes: Optional[Sequence[int]] + bbox: Optional[BBox] + m: Optional[Sequence[Optional[float]]] + z: Optional[Sequence[float]] + mbox: Optional[MBox] + zbox: Optional[ZBox] + + +class Shape: + def __init__( + self, + shapeType: Union[int, _NoShapeTypeSentinel] = _NoShapeTypeSentinel(), + points: Optional[PointsT] = None, + parts: Optional[Sequence[int]] = None, # index of start point of each part + lines: Optional[list[PointsT]] = None, + partTypes: Optional[Sequence[int]] = None, + oid: Optional[int] = None, + *, + m: Optional[Sequence[Optional[float]]] = None, + z: Optional[Sequence[float]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + zbox: Optional[ZBox] = None, + ): + """Stores the geometry of the different shape types + specified in the Shapefile spec. Shape types are + usually point, polyline, or polygons. Every shape type + except the "Null" type contains points at some level for + example vertices in a polygon. If a shape type has + multiple shapes containing points within a single + geometry record then those shapes are called parts. Parts + are designated by their starting index in geometry record's + list of shapes. For MultiPatch geometry, partTypes designates + the patch type of each of the parts. + """ + + # Preserve previous behaviour for anyone who set self.shapeType = None + if not isinstance(shapeType, _NoShapeTypeSentinel): + self.shapeType = shapeType + else: + class_name = self.__class__.__name__ + self.shapeType = SHAPETYPENUM_LOOKUP.get(class_name.upper(), NULL) + + if partTypes is not None: + self.partTypes = partTypes + + default_points: PointsT = [] + default_parts: list[int] = [] + + # Make sure polygon rings (parts) are closed + if lines is not None: + if self.shapeType in Polygon_shapeTypes: + lines = list(lines) + self._ensure_polygon_rings_closed(lines) + + default_points, default_parts = self._points_and_parts_indexes_from_lines( + lines + ) + elif points and self.shapeType in _CanHaveBBox_shapeTypes: + # TODO: Raise issue. + # This ensures Polylines, Polygons and Multipatches with no part information are a single + # Polyline, Polygon or Multipatch respectively. + # + # However this also allows MultiPoints shapes to have a single part index 0 as + # documented in README.md,also when set from points + # (even though this is just an artefact of initialising them as a length-1 nested + # list of points via _points_and_parts_indexes_from_lines). + # + # Alternatively single points could be given parts = [0] too, as they do if formed + # _from_geojson. + default_parts = [0] + + self.points: PointsT = points or default_points + + self.parts: Sequence[int] = parts or default_parts + + # and a dict to silently record any errors encountered in GeoJSON + self._errors: dict[str, int] = {} + + # add oid + self.__oid: int = -1 if oid is None else oid + + if bbox is not None: + self.bbox: BBox = bbox + elif len(self.points) >= 2: + self.bbox = self._bbox_from_points() + + ms_found = True + if m: + self.m: Sequence[Optional[float]] = m + elif self.shapeType in _HasM_shapeTypes: + mpos = 3 if self.shapeType in _HasZ_shapeTypes | PointZ_shapeTypes else 2 + points_m_z = cast(Union[list[PointMT], list[PointZT]], self.points) + self.m = list(_ms_from_points(points_m_z, mpos)) + elif self.shapeType in PointM_shapeTypes: + mpos = 3 if self.shapeType == POINTZ else 2 + point_m_z = cast(Union[PointMT, PointZT], self.points[0]) + self.m = (_m_from_point(point_m_z, mpos),) + else: + ms_found = False + + zs_found = True + if z: + self.z: Sequence[float] = z + elif self.shapeType in _HasZ_shapeTypes: + points_z = cast(list[PointZT], self.points) + self.z = list(_zs_from_points(points_z)) + elif self.shapeType == POINTZ: + point_z = cast(PointZT, self.points[0]) + self.z = (_z_from_point(point_z),) + else: + zs_found = False + + if mbox is not None: + self.mbox: MBox = mbox + elif ms_found: + self.mbox = self._mbox_from_ms() + + if zbox is not None: + self.zbox: ZBox = zbox + elif zs_found: + self.zbox = self._zbox_from_zs() + + @staticmethod + def _ensure_polygon_rings_closed( + parts: list[PointsT], # Mutated + ) -> None: + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + + @staticmethod + def _points_and_parts_indexes_from_lines( + parts: list[PointsT], + ) -> tuple[PointsT, list[int]]: + # Intended for Union[Polyline, Polygon, MultiPoint, MultiPatch] + """From a list of parts (each part a list of points) return + a flattened list of points, and a list of indexes into that + flattened list corresponding to the start of each part. + + Internal method for both multipoints (formed entirely by a single part), + and shapes that have multiple collections of points (each one + a part): (poly)lines, polygons, and multipatchs. + """ + part_indexes: list[int] = [] + points: PointsT = [] + + for part in parts: + # set part index position + part_indexes.append(len(points)) + points.extend(part) + + return points, part_indexes + + def _bbox_from_points(self) -> BBox: + xs: list[float] = [] + ys: list[float] = [] + + for point in self.points: + xs.append(point[0]) + ys.append(point[1]) + + return min(xs), min(ys), max(xs), max(ys) + + def _mbox_from_ms(self) -> MBox: + ms: list[float] = [m for m in self.m if m is not None] + + if not ms: + # only if none of the shapes had m values, should mbox be set to missing m values + ms.append(NODATA) + + return min(ms), max(ms) + + def _zbox_from_zs(self) -> ZBox: + return min(self.z), max(self.z) + + @property + def __geo_interface__(self) -> GeoJSONHomogeneousGeometryObject: + if self.shapeType in {POINT, POINTM, POINTZ}: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Point", "coordinates": ()} + + return {"type": "Point", "coordinates": self.points[0]} + + if self.shapeType in {MULTIPOINT, MULTIPOINTM, MULTIPOINTZ}: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "MultiPoint", "coordinates": []} + + # multipoint + return { + "type": "MultiPoint", + "coordinates": self.points, + } + + if self.shapeType in {POLYLINE, POLYLINEM, POLYLINEZ}: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "LineString", "coordinates": []} + + if len(self.parts) == 1: + # linestring + return { + "type": "LineString", + "coordinates": self.points, + } + + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + + coordinates.append(list(self.points[ps:part])) + ps = part + + # assert len(self.parts) > 1 + # from previous if len(self.parts) checks so part is defined + coordinates.append(list(self.points[part:])) + return {"type": "MultiLineString", "coordinates": coordinates} + + if self.shapeType in {POLYGON, POLYGONM, POLYGONZ}: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Polygon", "coordinates": []} + + # get all polygon rings + rings = [] + for i, start in enumerate(self.parts): + # get indexes of start and end points of the ring + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = list(self.points[start:end]) + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = f"Possible issue encountered when converting Shape #{self.oid} to GeoJSON: " + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + + return {"type": "MultiPolygon", "coordinates": polys} + + raise GeoJSON_Error( + f'Shape type "{SHAPETYPE_LOOKUP[self.shapeType]}" cannot be represented as GeoJSON.' + ) + + @staticmethod + def _from_geojson(geoj) -> Shape: + # create empty shape + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType in GEOJSON_TO_SHAPETYPE: + shapeType = GEOJSON_TO_SHAPETYPE[geojType] + else: + raise GeoJSON_Error(f"Cannot create Shape from GeoJSON type '{geojType}'") + + # set points and parts + if geojType == "Point": + points = [geoj["coordinates"]] + parts = [0] + elif geojType in ("MultiPoint", "LineString"): + points = geoj["coordinates"] + parts = [0] + elif geojType == "Polygon": + points = [] + parts = [] + index = 0 + for i, ext_or_hole in enumerate(geoj["coordinates"]): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + elif geojType == "MultiLineString": + points = [] + parts = [] + index = 0 + for linestring in geoj["coordinates"]: + points.extend(linestring) + parts.append(index) + index += len(linestring) + elif geojType == "MultiPolygon": + points = [] + parts = [] + index = 0 + for polygon in geoj["coordinates"]: + for i, ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + return Shape(shapeType=shapeType, points=points, parts=parts) + + @property + def oid(self) -> int: + """The index position of the shape in the original shapefile""" + return self.__oid + + @property + def shapeTypeName(self) -> str: + return SHAPETYPE_LOOKUP[self.shapeType] + + def __repr__(self): + class_name = self.__class__.__name__ + if class_name == "Shape": + return f"Shape #{self.__oid}: {self.shapeTypeName}" + return f"{class_name} #{self.__oid}" + + +# Need unused arguments to keep the same call signature for +# different implementations of from_byte_stream and write_to_byte_stream +class NullShape(Shape): + # Shape.shapeType = NULL already, + # to preserve handling of default args in Shape.__init__ + # Repeated for clarity. + def __init__( + self, + oid: Optional[int] = None, + ): + Shape.__init__(self, shapeType=NULL, oid=oid) + + @staticmethod + def from_byte_stream( + shapeType: int, + b_io: ReadSeekableBinStream, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ) -> NullShape: + # Shape.__init__ sets self.points = points or [] + return NullShape(oid=oid) + + @staticmethod + def write_to_byte_stream( + b_io: WriteableBinStream, + s: Shape, + i: int, + ) -> int: + return 0 + + +_CanHaveBBox_shapeTypes = frozenset( + [ + POLYLINE, + POLYLINEM, + POLYLINEZ, + MULTIPOINT, + MULTIPOINTM, + MULTIPOINTZ, + POLYGON, + POLYGONM, + POLYGONZ, + MULTIPATCH, + ] +) + + +class _CanHaveBBox(Shape): + """As well as setting bounding boxes, we also utilize the + fact that this mixin applies to all the shapes that are + not a single point. + """ + + @staticmethod + def _read_bbox_from_byte_stream(b_io: ReadableBinStream) -> BBox: + return unpack("<4d", b_io.read(32)) + + @staticmethod + def _write_bbox_to_byte_stream( + b_io: WriteableBinStream, i: int, bbox: Optional[BBox] + ) -> int: + if not bbox or len(bbox) != 4: + raise ShapefileException(f"Four numbers required for bbox. Got: {bbox}") + try: + return b_io.write(pack("<4d", *bbox)) + except error: + raise ShapefileException( + f"Failed to write bounding box for record {i}. Expected floats." + ) + + @staticmethod + def _read_npoints_from_byte_stream(b_io: ReadableBinStream) -> int: + return unpack(" int: + return b_io.write(pack(" list[Point2D]: + flat = unpack(f"<{2 * nPoints}d", b_io.read(16 * nPoints)) + return list(zip(*(iter(flat),) * 2)) + + @staticmethod + def _write_points_to_byte_stream( + b_io: WriteableBinStream, s: _CanHaveBBox, i: int + ) -> int: + x_ys: list[float] = [] + for point in s.points: + x_ys.extend(point[:2]) + try: + return b_io.write(pack(f"<{len(x_ys)}d", *x_ys)) + except error: + raise ShapefileException( + f"Failed to write points for record {i}. Expected floats." + ) + + @classmethod + def from_byte_stream( + cls, + shapeType: int, + b_io: ReadSeekableBinStream, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ) -> Optional[Shape]: + ShapeClass = cast(type[_CanHaveBBox], SHAPE_CLASS_FROM_SHAPETYPE[shapeType]) + + kwargs: CanHaveBboxNoLinesKwargs = {"oid": oid} # "shapeType": shapeType} + kwargs["bbox"] = shape_bbox = cls._read_bbox_from_byte_stream(b_io) + + # if bbox specified and no overlap, skip this shape + if bbox is not None and not bbox_overlap(bbox, shape_bbox): + # because we stop parsing this shape, caller must skip to beginning of + # next shape after we return (as done in f.seek(next_shape)) + return None + + nParts: Optional[int] = ( + _CanHaveParts._read_nparts_from_byte_stream(b_io) + if shapeType in _CanHaveParts_shapeTypes + else None + ) + nPoints: int = cls._read_npoints_from_byte_stream(b_io) + # Previously, we also set __zmin = __zmax = __mmin = __mmax = None + + if nParts: + kwargs["parts"] = _CanHaveParts._read_parts_from_byte_stream(b_io, nParts) + if shapeType == MULTIPATCH: + kwargs["partTypes"] = MultiPatch._read_part_types_from_byte_stream( + b_io, nParts + ) + + # else: + # parts = None + # partTypes = None + + if nPoints: + kwargs["points"] = cast( + PointsT, cls._read_points_from_byte_stream(b_io, nPoints) + ) + + if shapeType in _HasZ_shapeTypes: + kwargs["zbox"], kwargs["z"] = _HasZ._read_zs_from_byte_stream( + b_io, nPoints + ) + + if shapeType in _HasM_shapeTypes: + kwargs["mbox"], kwargs["m"] = _HasM._read_ms_from_byte_stream( + b_io, nPoints, next_shape + ) + + # else: + # points = None + # zbox, zs = None, None + # mbox, ms = None, None + + return ShapeClass(**kwargs) + # return ShapeClass( + # shapeType=shapeType, + # # Mypy 1.17.1 doesn't figure out that an Optional[list[Point2D]] is an Optional[list[PointT]] + # points=cast(Optional[PointsT], points), + # parts=parts, + # partTypes=partTypes, + # oid=oid, + # m=ms, + # z=zs, + # bbox=shape_bbox, + # mbox=mbox, + # zbox=zbox, + # ) + + @staticmethod + def write_to_byte_stream( + b_io: WriteableBinStream, + s: Shape, + i: int, + ) -> int: + # We use static methods here and below, + # to support s only being an instance of a the + # Shape base class (with shapeType set) + # i.e. not necessarily one of our newer shape specific + # sub classes. + + n = 0 + + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_bbox_to_byte_stream(b_io, i, s.bbox) + + if s.shapeType in _CanHaveParts_shapeTypes: + n += _CanHaveParts._write_nparts_to_byte_stream( + b_io, cast(_CanHaveParts, s) + ) + # Shape types with multiple points per record + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_npoints_to_byte_stream(b_io, cast(_CanHaveBBox, s)) + # Write part indexes. Includes MultiPatch + if s.shapeType in _CanHaveParts_shapeTypes: + n += _CanHaveParts._write_part_indices_to_byte_stream( + b_io, cast(_CanHaveParts, s) + ) + + if s.shapeType in MultiPatch_shapeTypes: + n += MultiPatch._write_part_types_to_byte_stream(b_io, cast(MultiPatch, s)) + # Write points for multiple-point records + if s.shapeType in _CanHaveBBox_shapeTypes: + n += _CanHaveBBox._write_points_to_byte_stream( + b_io, cast(_CanHaveBBox, s), i + ) + if s.shapeType in _HasZ_shapeTypes: + n += _HasZ._write_zs_to_byte_stream(b_io, cast(_HasZ, s), i, s.zbox) + + if s.shapeType in _HasM_shapeTypes: + n += _HasM._write_ms_to_byte_stream(b_io, cast(_HasM, s), i, s.mbox) + + return n + + +_CanHaveParts_shapeTypes = frozenset( + [ + POLYLINE, + POLYLINEM, + POLYLINEZ, + POLYGON, + POLYGONM, + POLYGONZ, + MULTIPATCH, + ] +) + + +class _CanHaveParts(_CanHaveBBox): + # The parts attribute is initialised by + # the base class Shape's __init__, to parts or []. + # "Can Have Parts" should be read as "Can Have non-empty parts". + + @staticmethod + def _read_nparts_from_byte_stream(b_io: ReadableBinStream) -> int: + return unpack(" int: + return b_io.write(pack(" _Array[int]: + return _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) + + @staticmethod + def _write_part_indices_to_byte_stream( + b_io: WriteableBinStream, s: _CanHaveParts + ) -> int: + return b_io.write(pack(f"<{len(s.parts)}i", *s.parts)) + + +Point_shapeTypes = frozenset([POINT, POINTM, POINTZ]) + + +class Point(Shape): + # We also use the fact that the single Point types are the only + # shapes that cannot have their own bounding box (a user supplied + # bbox is still used to filter out points). + def __init__( + self, + x: float, + y: float, + oid: Optional[int] = None, + ): + Shape.__init__(self, points=[(x, y)], oid=oid) + + @staticmethod + def _x_y_from_byte_stream(b_io: ReadableBinStream): + # Unpack _Array too + x, y = unpack("<2d", b_io.read(16)) + # Convert to tuple + return x, y + + @staticmethod + def _write_x_y_to_byte_stream( + b_io: WriteableBinStream, x: float, y: float, i: int + ) -> int: + try: + return b_io.write(pack("<2d", x, y)) + except error: + raise ShapefileException( + f"Failed to write point for record {i}. Expected floats." + ) + + @classmethod + def from_byte_stream( + cls, + shapeType: int, + b_io: ReadSeekableBinStream, + next_shape: int, + oid: Optional[int] = None, + bbox: Optional[BBox] = None, + ) -> Optional[Shape]: + x, y = cls._x_y_from_byte_stream(b_io) + + if bbox is not None: + # create bounding box for Point by duplicating coordinates + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, (x, y, x, y)): + return None + elif shapeType == POINT: + return Point(x=x, y=y, oid=oid) + + if shapeType == POINTZ: + z = PointZ._read_single_point_zs_from_byte_stream(b_io)[0] + + m = PointM._read_single_point_ms_from_byte_stream(b_io, next_shape)[0] + + if shapeType == POINTZ: + return PointZ(x=x, y=y, z=z, m=m, oid=oid) + + return PointM(x=x, y=y, m=m, oid=oid) + # return Shape(shapeType=shapeType, points=[(x, y)], z=zs, m=ms, oid=oid) + + @staticmethod + def write_to_byte_stream(b_io: WriteableBinStream, s: Shape, i: int) -> int: + # Serialize a single point + x, y = s.points[0][0], s.points[0][1] + n = Point._write_x_y_to_byte_stream(b_io, x, y, i) + + # Write a single Z value + if s.shapeType in PointZ_shapeTypes: + n += PointZ._write_single_point_z_to_byte_stream(b_io, s, i) + + # Write a single M value + if s.shapeType in PointM_shapeTypes: + n += PointM._write_single_point_m_to_byte_stream(b_io, s, i) + + return n + + +Polyline_shapeTypes = frozenset([POLYLINE, POLYLINEM, POLYLINEZ]) + + +class Polyline(_CanHaveParts): + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + points: Optional[PointsT] = None, + parts: Optional[list[int]] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + bbox=bbox, + oid=oid, + ) + + +Polygon_shapeTypes = frozenset([POLYGON, POLYGONM, POLYGONZ]) + + +class Polygon(_CanHaveParts): + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + lines = list(args) if args else lines + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + bbox=bbox, + oid=oid, + ) + + +MultiPoint_shapeTypes = frozenset([MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]) + + +class MultiPoint(_CanHaveBBox): + def __init__( + self, + *args: PointT, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + oid: Optional[int] = None, + ): + if args: + if points: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg points. " + f"Not both. Got both: {args} and {points=}. " + "If this was intentional, after the other positional args, " + "the arg passed to points can be unpacked, e.g. " + " (arg1, arg2, *more_args, *points, oid=oid,...)" + ) + points = list(args) + Shape.__init__( + self, + points=points, + bbox=bbox, + oid=oid, + ) + + +# Not a PointM or a PointZ +_HasM_shapeTypes = frozenset( + [ + POLYLINEM, + POLYLINEZ, + POLYGONM, + POLYGONZ, + MULTIPOINTM, + MULTIPOINTZ, + MULTIPATCH, + ] +) + + +class _HasM(_CanHaveBBox): + m: Sequence[Optional[float]] + + @staticmethod + def _read_ms_from_byte_stream( + b_io: ReadSeekableBinStream, nPoints: int, next_shape: int + ) -> tuple[MBox, list[Optional[float]]]: + if next_shape - b_io.tell() >= 16: + mbox = unpack("<2d", b_io.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next_shape - b_io.tell() >= nPoints * 8: + ms = [] + for m in unpack(f"<{nPoints}d", b_io.read(nPoints * 8)): + if m > NODATA: + ms.append(m) + else: + ms.append(None) + else: + ms = [None for _ in range(nPoints)] + return mbox, ms + + @staticmethod + def _write_ms_to_byte_stream( + b_io: WriteableBinStream, s: Shape, i: int, mbox: Optional[MBox] + ) -> int: + if not mbox or len(mbox) != 2: + raise ShapefileException(f"Two numbers required for mbox. Got: {mbox}") + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + try: + num_bytes_written = b_io.write(pack("<2d", *mbox)) + except error: + raise ShapefileException( + f"Failed to write measure extremes for record {i}. Expected floats" + ) + try: + ms = cast(_HasM, s).m + + ms_to_encode = [m if m is not None else NODATA for m in ms] + + num_bytes_written += b_io.write(pack(f"<{len(ms)}d", *ms_to_encode)) + except error: + raise ShapefileException( + f"Failed to write measure values for record {i}. Expected floats" + ) + + return num_bytes_written + + +# Not a PointZ +_HasZ_shapeTypes = frozenset( + [ + POLYLINEZ, + POLYGONZ, + MULTIPOINTZ, + MULTIPATCH, + ] +) + + +class _HasZ(_CanHaveBBox): + z: Sequence[float] + + @staticmethod + def _read_zs_from_byte_stream( + b_io: ReadableBinStream, nPoints: int + ) -> tuple[ZBox, Sequence[float]]: + zbox = unpack("<2d", b_io.read(16)) + return zbox, _Array[float]("d", unpack(f"<{nPoints}d", b_io.read(nPoints * 8))) + + @staticmethod + def _write_zs_to_byte_stream( + b_io: WriteableBinStream, s: Shape, i: int, zbox: Optional[ZBox] + ) -> int: + if not zbox or len(zbox) != 2: + raise ShapefileException(f"Two numbers required for zbox. Got: {zbox}") + + # Write z extremes and values + # Note: missing z values are autoset to 0, but not sure if this is ideal. + try: + num_bytes_written = b_io.write(pack("<2d", *zbox)) + except error: + raise ShapefileException( + f"Failed to write elevation extremes for record {i}. Expected floats." + ) + try: + zs = cast(_HasZ, s).z + num_bytes_written += b_io.write(pack(f"<{len(zs)}d", *zs)) + except error: + raise ShapefileException( + f"Failed to write elevation values for record {i}. Expected floats." + ) + + return num_bytes_written + + +MultiPatch_shapeTypes = frozenset([MULTIPATCH]) + + +class MultiPatch(_HasM, _HasZ, _CanHaveParts): + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + partTypes: Optional[list[int]] = None, + z: Optional[list[float]] = None, + m: Optional[list[Optional[float]]] = None, + points: Optional[PointsT] = None, + parts: Optional[list[int]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + zbox: Optional[ZBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + partTypes=partTypes, + z=z, + m=m, + bbox=bbox, + zbox=zbox, + mbox=mbox, + oid=oid, + ) + + @staticmethod + def _read_part_types_from_byte_stream( + b_io: ReadableBinStream, nParts: int + ) -> Sequence[int]: + return _Array[int]("i", unpack(f"<{nParts}i", b_io.read(nParts * 4))) + + @staticmethod + def _write_part_types_to_byte_stream(b_io: WriteableBinStream, s: Shape) -> int: + return b_io.write(pack(f"<{len(s.partTypes)}i", *s.partTypes)) + + +PointM_shapeTypes = frozenset([POINTM, POINTZ]) + + +class PointM(Point): + def __init__( + self, + x: float, + y: float, + # same default as in Writer.__shpRecord (if s.shapeType in (11, 21):) + # PyShp encodes None m values as NODATA + m: Optional[float] = None, + oid: Optional[int] = None, + ): + Shape.__init__(self, points=[(x, y)], m=(m,), oid=oid) + + @staticmethod + def _read_single_point_ms_from_byte_stream( + b_io: ReadSeekableBinStream, next_shape: int + ) -> tuple[Optional[float]]: + if next_shape - b_io.tell() >= 8: + (m,) = unpack(" NODATA: + return (m,) + else: + return (None,) + + @staticmethod + def _write_single_point_m_to_byte_stream( + b_io: WriteableBinStream, s: Shape, i: int + ) -> int: + try: + s = cast(_HasM, s) + m = s.m[0] if s.m else None + except error: + raise ShapefileException( + f"Failed to write measure value for record {i}. Expected floats." + ) + + # Note: missing m values are autoset to NODATA. + m_to_encode = m if m is not None else NODATA + + return b_io.write(pack("<1d", m_to_encode)) + + +PolylineM_shapeTypes = frozenset([POLYLINEM, POLYLINEZ]) + + +class PolylineM(Polyline, _HasM): + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + m: Optional[Sequence[Optional[float]]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + + +PolygonM_shapeTypes = frozenset([POLYGONM, POLYGONZ]) + + +class PolygonM(Polygon, _HasM): + def __init__( + self, + *args: PointsT, + lines: Optional[list[PointsT]] = None, + parts: Optional[list[int]] = None, + m: Optional[list[Optional[float]]] = None, + points: Optional[PointsT] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if lines: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg lines. " + f"Not both. Got both: {args} and {lines=}. " + "If this was intentional, after the other positional args, " + "the arg passed to lines can be unpacked (arg1, arg2, *more_args, *lines, oid=oid,...)" + ) + lines = list(args) + Shape.__init__( + self, + lines=lines, + points=points, + parts=parts, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + + +MultiPointM_shapeTypes = frozenset([MULTIPOINTM, MULTIPOINTZ]) + + +class MultiPointM(MultiPoint, _HasM): + def __init__( + self, + *args: PointT, + points: Optional[PointsT] = None, + m: Optional[Sequence[Optional[float]]] = None, + bbox: Optional[BBox] = None, + mbox: Optional[MBox] = None, + oid: Optional[int] = None, + ): + if args: + if points: + raise ShapefileException( + "Specify Either: a) positional args, or: b) the keyword arg points. " + f"Not both. Got both: {args} and {points=}. " + "If this was intentional, after the other positional args, " + "the arg passed to points can be unpacked, e.g. " + " (arg1, arg2, *more_args, *points, oid=oid,...)" + ) + points = list(args) + Shape.__init__( + self, + points=points, + m=m, + bbox=bbox, + mbox=mbox, + oid=oid, + ) + + +PointZ_shapeTypes = frozenset([POINTZ]) + + +class PointZ(PointM): + def __init__( + self, + x: float, + y: float, + z: float = 0.0, + m: Optional[float] = None, + oid: Optional[int] = None, + ): + Shape.__init__(self, points=[(x, y)], z=(z,), m=(m,), oid=oid) + + # same default as in Writer.__shpRecord (if s.shapeType == 11:) + z: Sequence[float] = (0.0,) + + @staticmethod + def _read_single_point_zs_from_byte_stream(b_io: ReadableBinStream) -> tuple[float]: + return unpack(" int: + # Note: missing z values are autoset to 0, but not sure if this is ideal. + z: float = 0.0 + # then write value + + try: + if s.z: + z = s.z[0] + except error: + raise ShapefileException( + f"Failed to write elevation value for record {i}. Expected floats." + ) + + return b_io.write(pack(">> # Create a Record with one field, normally the record is created by the Reader class + >>> r = _Record({'ID': 0}, [0]) + >>> print(r[0]) + >>> print(r['ID']) + >>> print(r.ID) + """ + + def __init__( + self, + field_positions: dict[str, int], + values: Iterable[RecordValue], + oid: Optional[int] = None, + ): + """ + A Record should be created by the Reader class + + :param field_positions: A dict mapping field names to field positions + :param values: A sequence of values + :param oid: The object id, an int (optional) + """ + self.__field_positions = field_positions + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + list.__init__(self, values) + + def __getattr__(self, item: str) -> RecordValue: + """ + __getattr__ is called if an attribute is used that does + not exist in the normal sense. For example r=Record(...), r.ID + calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) + :param item: The field name, used as attribute + :return: Value of the field + :raises: AttributeError, if item is not a field of the shapefile + and IndexError, if the field exists but the field's + corresponding value in the Record does not exist + """ + try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") + index = self.__field_positions[item] + return list.__getitem__(self, index) + except KeyError: + raise AttributeError(f"{item} is not a field name") + except IndexError: + raise IndexError( + f"{item} found as a field but not enough values available." + ) + + def __setattr__(self, key: str, value: RecordValue): + """ + Sets a value of a field attribute + :param key: The field name + :param value: the value of that field + :return: None + :raises: AttributeError, if key is not a field of the shapefile + """ + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute + return list.__setattr__(self, key, value) + try: + index = self.__field_positions[key] + return list.__setitem__(self, index, value) + except KeyError: + raise AttributeError(f"{key} is not a field name") + + def __getitem__(self, item): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID'], r[0] + :param item: Either the position of the value or the name of a field + :return: the value of the field + """ + try: + return list.__getitem__(self, item) + except TypeError: + try: + index = self.__field_positions[item] + except KeyError: + index = None + if index is not None: + return list.__getitem__(self, index) + + raise IndexError(f'"{item}" is not a field name and not an int') + + def __setitem__(self, key, value): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID']=2, r[0]=2 + :param key: Either the position of the value or the name of a field + :param value: the new value of the field + """ + try: + return list.__setitem__(self, key, value) + except TypeError: + index = self.__field_positions.get(key) + if index is not None: + return list.__setitem__(self, index, value) + + raise IndexError(f"{key} is not a field name and not an int") + + @property + def oid(self) -> int: + """The index position of the record in the original shapefile""" + return self.__oid + + def as_dict(self, date_strings: bool = False) -> dict[str, RecordValue]: + """ + Returns this Record as a dictionary using the field names as keys + :return: dict + """ + dct = {f: self[i] for f, i in self.__field_positions.items()} + if date_strings: + for k, v in dct.items(): + if isinstance(v, date): + dct[k] = f"{v.year:04d}{v.month:02d}{v.day:02d}" + return dct + + def __repr__(self): + return f"Record #{self.__oid}: {list(self)}" + + def __dir__(self) -> list[str]: + """ + Helps to show the field names in an interactive environment like IPython. + See: http://ipython.readthedocs.io/en/stable/config/integrating.html + + :return: List of method names and fields + """ + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) + return default + fnames + + def __eq__(self, other): + if isinstance(other, self.__class__): + if self.__field_positions != other.__field_positions: + return False + return list.__eq__(self, other) + + +class ShapeRecord: + """A ShapeRecord object containing a shape along with its attributes. + Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + + def __init__(self, shape: Optional[Shape] = None, record: Optional[_Record] = None): + self.shape = shape + self.record = record + + @property + def __geo_interface__(self) -> GeoJSONFeature: + return { + "type": "Feature", + "properties": None + if self.record is None + else self.record.as_dict(date_strings=True), + "geometry": None + if self.shape is None or self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + + +class Shapes(list[Optional[Shape]]): + """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a GeometryCollection dictionary.""" + + def __repr__(self): + return f"Shapes: {list(self)}" + + @property + def __geo_interface__(self) -> GeoJSONGeometryCollection: + # Note: currently this will fail if any of the shapes are null-geometries + # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords + collection = GeoJSONGeometryCollection( + type="GeometryCollection", + geometries=[shape.__geo_interface__ for shape in self if shape is not None], + ) + return collection + + +class ShapeRecords(list[ShapeRecord]): + """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a FeatureCollection dictionary.""" + + def __repr__(self): + return f"ShapeRecords: {list(self)}" + + @property + def __geo_interface__(self) -> GeoJSONFeatureCollection: + return GeoJSONFeatureCollection( + type="FeatureCollection", + features=[shaperec.__geo_interface__ for shaperec in self], + ) + + +class ShapefileException(Exception): + """An exception to handle shapefile specific problems.""" + + +class _NoShpSentinel: + """For use as a default value for shp to preserve the + behaviour (from when all keyword args were gathered + in the **kwargs dict) in case someone explictly + called Reader(shp=None) to load self.shx. + """ + + +class Reader: + """Reads the three files of a shapefile as a unit or + separately. If one of the three files (.shp, .shx, + .dbf) is missing no exception is thrown until you try + to call a method that depends on that particular file. + The .shx index file is used if available for efficiency + but is not required to read the geometry from the .shp + file. The "shapefile" argument in the constructor is the + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. + + You can instantiate a Reader without specifying a shapefile + and then specify one later with the load() method. + + Only the shapefile headers are read upon loading. Content + within each file is only accessed when required and as + efficiently as possible. Shapefiles are usually not large + but they can be. + """ + + CONSTITUENT_FILE_EXTS = ["shp", "shx", "dbf"] + assert all(ext.islower() for ext in CONSTITUENT_FILE_EXTS) + + def _assert_ext_is_supported(self, ext: str): + assert ext in self.CONSTITUENT_FILE_EXTS + + def __init__( + self, + shapefile_path: Union[str, os.PathLike] = "", + /, + *, + encoding: str = "utf-8", + encodingErrors: str = "strict", + shp: Union[_NoShpSentinel, Optional[BinaryFileT]] = _NoShpSentinel(), + shx: Optional[BinaryFileT] = None, + dbf: Optional[BinaryFileT] = None, + # Keep kwargs even though unused, to preserve PyShp 2.4 API + **kwargs, + ): + self.shp = None + self.shx = None + self.dbf = None + self._files_to_close: list[BinaryFileStreamT] = [] + self.shapeName = "Not specified" + self._offsets: list[int] = [] + self.shpLength: Optional[int] = None + self.numRecords: Optional[int] = None + self.numShapes: Optional[int] = None + self.fields: list[Field] = [] + self.__dbfHdrLength = 0 + self.__fieldLookup: dict[str, int] = {} + self.encoding = encoding + self.encodingErrors = encodingErrors + # See if a shapefile name was passed as the first argument + if shapefile_path: + path = fsdecode_if_pathlike(shapefile_path) + if isinstance(path, str): + if ".zip" in path: + # Shapefile is inside a zipfile + if path.count(".zip") > 1: + # Multiple nested zipfiles + raise ShapefileException( + f"Reading from multiple nested zipfiles is not supported: {path}" + ) + # Split into zipfile and shapefile paths + if path.endswith(".zip"): + zpath = path + shapefile = None + else: + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] + + zipfileobj: Union[tempfile._TemporaryFileWrapper, io.BufferedReader] + # Create a zip file handle + if zpath.startswith("http"): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode="rb") + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, "r") as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) + if len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException( + f"Zipfile contains more than one shapefile: {shapefiles}. " + "Please specify the full path to the shapefile you would like to open." + ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for lower_ext in self.CONSTITUENT_FILE_EXTS: + for cased_ext in [lower_ext, lower_ext.upper()]: + try: + member = archive.open(f"{shapefile}.{cased_ext}") + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, lower_ext, fileobj) + self._files_to_close.append(fileobj) + except (OSError, AttributeError, KeyError): + pass + # Close and delete the temporary zipfile + try: + zipfileobj.close() + except: # noqa: E722 + pass + # Try to load shapefile + if self.shp or self.dbf: + # Load and exit early + self.load() + return + + raise ShapefileException( + f"No shp or dbf file found in zipfile: {path}" + ) + + if path.startswith("http"): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath, _ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ["shp", "shx", "dbf"]: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + "." + ext + _path = urlunparse(_urlinfo) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + self._files_to_close.append(fileobj) + except HTTPError: + pass + if self.shp or self.dbf: + # Load and exit early + self.load() + return + + raise ShapefileException(f"No shp or dbf file found at url: {path}") + + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + + if not isinstance(shp, _NoShpSentinel): + self.shp = self.__seek_0_on_file_obj_wrap_or_open_from_name("shp", shp) + self.shx = self.__seek_0_on_file_obj_wrap_or_open_from_name("shx", shx) + + self.dbf = self.__seek_0_on_file_obj_wrap_or_open_from_name("dbf", dbf) + + # Load the files + if self.shp or self.dbf: + self._try_to_set_constituent_file_headers() + + def __seek_0_on_file_obj_wrap_or_open_from_name( + self, + ext: str, + file_: Optional[BinaryFileT], + ) -> Union[None, IO[bytes]]: + # assert ext in {'shp', 'dbf', 'shx'} + self._assert_ext_is_supported(ext) + + if file_ is None: + return None + + if isinstance(file_, str): + baseName, __ = os.path.splitext(file_) + return self._load_constituent_file(baseName, ext) + + if hasattr(file_, "read"): + # Copy if required + try: + file_.seek(0) + return file_ + except (NameError, io.UnsupportedOperation): + return io.BytesIO(file_.read()) + + raise ShapefileException( + f"Could not load shapefile constituent file from: {file_}" + ) + + def __str__(self): + """ + Use some general info on the shapefile as __str__ + """ + info = ["shapefile Reader"] + if self.shp: + info.append( + f" {len(self)} shapes (type '{SHAPETYPE_LOOKUP[self.shapeType]}')" + ) + if self.dbf: + info.append(f" {len(self)} records ({len(self.fields)} fields)") + return "\n".join(info) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, close opened files. + """ + self.close() + + def __len__(self): + """Returns the number of shapes/records in the shapefile.""" + if self.dbf: + # Preferably use dbf record count + if self.numRecords is None: + self.__dbfHeader() + + return self.numRecords + + if self.shp: + # Otherwise use shape count + if self.shx: + if self.numShapes is None: + self.__shxHeader() + + return self.numShapes + + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + # No file loaded yet, treat as 'empty' shapefile + return 0 + + def __iter__(self): + """Iterates through the shapes/records in the shapefile.""" + yield from self.iterShapeRecords() + + @property + def __geo_interface__(self) -> GeoJSONFeatureCollectionWithBBox: + shaperecords = self.shapeRecords() + fcollection = GeoJSONFeatureCollectionWithBBox( + bbox=list(self.bbox), + **shaperecords.__geo_interface__, + ) + return fcollection + + @property + def shapeTypeName(self) -> str: + return SHAPETYPE_LOOKUP[self.shapeType] + + def load(self, shapefile=None): + """Opens a shapefile from a filename or file-like + object. Normally this method would be called by the + constructor with the file name as an argument.""" + if shapefile: + (shapeName, __ext) = os.path.splitext(shapefile) + self.shapeName = shapeName + self.load_shp(shapeName) + self.load_shx(shapeName) + self.load_dbf(shapeName) + if not (self.shp or self.dbf): + raise ShapefileException( + f"Unable to open {shapeName}.dbf or {shapeName}.shp." + ) + self._try_to_set_constituent_file_headers() + + def _try_to_set_constituent_file_headers(self): + if self.shp: + self.__shpHeader() + if self.dbf: + self.__dbfHeader() + if self.shx: + self.__shxHeader() + + def _try_get_open_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: + """ + Attempts to open a .shp, .dbf or .shx file, + with both lower case and upper case file extensions, + and return it. If it was not possible to open the file, None is returned. + """ + # typing.LiteralString is only available from PYthon 3.11 onwards. + # https://docs.python.org/3/library/typing.html#typing.LiteralString + # assert ext in {'shp', 'dbf', 'shx'} + self._assert_ext_is_supported(ext) + + try: + return open(f"{shapefile_name}.{ext}", "rb") + except OSError: + try: + return open(f"{shapefile_name}.{ext.upper()}", "rb") + except OSError: + return None + + def _load_constituent_file( + self, + shapefile_name: str, + ext: str, + ) -> Union[IO[bytes], None]: + """ + Attempts to open a .shp, .dbf or .shx file, with the extension + as both lower and upper case, and if successful append it to + self._files_to_close. + """ + shp_dbf_or_dhx_file = self._try_get_open_constituent_file(shapefile_name, ext) + if shp_dbf_or_dhx_file is not None: + self._files_to_close.append(shp_dbf_or_dhx_file) + return shp_dbf_or_dhx_file + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + self.shp = self._load_constituent_file(shapefile_name, "shp") + + def load_shx(self, shapefile_name): + """ + Attempts to load file with .shx extension as both lower and upper case + """ + self.shx = self._load_constituent_file(shapefile_name, "shx") + + def load_dbf(self, shapefile_name): + """ + Attempts to load file with .dbf extension as both lower and upper case + """ + self.dbf = self._load_constituent_file(shapefile_name, "dbf") + + def __del__(self): + self.close() + + def close(self): + # Close any files that the reader opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except OSError: + pass + self._files_to_close = [] + + def __getFileObj(self, f: Optional[T]) -> T: + """Checks to see if the requested shapefile file object is + available. If not a ShapefileException is raised.""" + if not f: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) + if self.shp and self.shpLength is None: + self.load() + if self.dbf and len(self.fields) == 0: + self.load() + return f + + def __restrictIndex(self, i: int) -> int: + """Provides list-like handling of a record index with a clearer + error message if the index is out of bounds.""" + if self.numRecords: + rmax = self.numRecords - 1 + if abs(i) > rmax: + raise IndexError( + f"Shape or Record index: {i} out of range. Max index: {rmax}" + ) + if i < 0: + i = range(self.numRecords)[i] + return i + + def __shpHeader(self) -> None: + """Reads the header information from a .shp file.""" + if not self.shp: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) + + shp = self.shp + # File length (16-bit word * 2 = bytes) + shp.seek(24) + self.shpLength = unpack(">i", shp.read(4))[0] * 2 + # Shape type + shp.seek(32) + self.shapeType = unpack("= NODATA else None + for m_bound in unpack("<2d", shp.read(16)) + ] + # self.mbox = MBox(mmin=m_bounds[0], mmax=m_bounds[1]) + self.mbox: tuple[Optional[float], Optional[float]] = (m_bounds[0], m_bounds[1]) + + def __shape( + self, oid: Optional[int] = None, bbox: Optional[BBox] = None + ) -> Optional[Shape]: + """Returns the header info and geometry for a single shape.""" + + f = self.__getFileObj(self.shp) + + # shape = Shape(oid=oid) + (__recNum, recLength) = unpack_2_int32_be(f.read(8)) + # Determine the start of the next record + + # Convert from num of 16 bit words, to 8 bit bytes + recLength_bytes = 2 * recLength + + # next_shape = f.tell() + recLength_bytes + + # Read entire record into memory to avoid having to call + # seek on the file afterwards + b_io: ReadSeekableBinStream = io.BytesIO(f.read(recLength_bytes)) + b_io.seek(0) + + shapeType = unpack("i", shx.read(4))[0] * 2) - 100 + self.numShapes = shxRecordLength // 8 + + def __shxOffsets(self): + """Reads the shape offset positions from a .shx file""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + if self.numShapes is None: + raise ShapefileException( + "numShapes must not be None. " + " Was there a problem with .__shxHeader() ?" + f"Got: {self.numShapes=}" + ) + # Jump to the first record. + shx.seek(100) + # Each index record consists of two nrs, we only want the first one + shxRecords = _Array[int]("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": + shxRecords.byteswap() + self._offsets = [2 * el for el in shxRecords[::2]] + + def __shapeIndex(self, i: Optional[int] = None) -> Optional[int]: + """Returns the offset in a .shp file for a shape based on information + in the .shx index file.""" + shx = self.shx + # Return None if no shx or no index requested + if not shx or i is None: + return None + # At this point, we know the shx file exists + if not self._offsets: + self.__shxOffsets() + return self._offsets[i] + + def shape(self, i: int = 0, bbox: Optional[BBox] = None) -> Optional[Shape]: + """Returns a shape object for a shape in the geometry + record file. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + shp = self.__getFileObj(self.shp) + i = self.__restrictIndex(i) + offset = self.__shapeIndex(i) + if not offset: + # Shx index not available. + # Determine length of shp file + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until the requested index or end of file. + _i = 0 + offset = shp.tell() + while offset < shpLength: + if _i == i: + # Reached the requested index, exit loop with the offset value + break + # Unpack the shape header only + (__recNum, recLength) = unpack_2_int32_be(shp.read(8)) + # Jump to next shape position + offset += 8 + (2 * recLength) + shp.seek(offset) + _i += 1 + # If the index was not found, it likely means the .shp file is incomplete + if _i != i: + raise ShapefileException( + f"Shape index {i} is out of bounds; the .shp file only contains {_i} shapes" + ) + + # Seek to the offset and read the shape + shp.seek(offset) + return self.__shape(oid=i, bbox=bbox) + + def shapes(self, bbox: Optional[BBox] = None) -> Shapes: + """Returns all shapes in a shapefile. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shapes = Shapes() + shapes.extend(self.iterShapes(bbox=bbox)) + return shapes + + def iterShapes(self, bbox: Optional[BBox] = None) -> Iterator[Optional[Shape]]: + """Returns a generator of shapes in a shapefile. Useful + for handling large shapefiles. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shp = self.__getFileObj(self.shp) + # Found shapefiles which report incorrect + # shp file length in the header. Can't trust + # that so we seek to the end of the file + # and figure it out. + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + + if self.numShapes: + # Iterate exactly the number of shapes from shx header + for i in range(self.numShapes): + # MAYBE: check if more left of file or exit early? + shape = self.__shape(oid=i, bbox=bbox) + if shape: + yield shape + else: + # No shx file, unknown nr of shapes + # Instead iterate until reach end of file + # Collect the offset indices during iteration + i = 0 + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + shape = self.__shape(oid=i, bbox=bbox) + pos = shp.tell() + if shape: + yield shape + i += 1 + # Entire shp file consumed + # Update the number of shapes and list of offsets + assert i == len(offsets) + self.numShapes = i + self._offsets = offsets + + def __dbfHeader(self) -> None: + """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + + if not self.dbf: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) + dbf = self.dbf + # read relevant header parts + dbf.seek(0) + self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( + " tuple[str, int]: + """Calculates the format and size of a .dbf record. Optional 'fields' arg + specifies which fieldnames to unpack and which to ignore. Note that this + always includes the DeletionFlag at index 0, regardless of the 'fields' arg. + """ + if self.numRecords is None: + self.__dbfHeader() + structcodes = [f"{fieldinfo.size}s" for fieldinfo in self.fields] + if fields is not None: + # only unpack specified fields, ignore others using padbytes (x) + structcodes = [ + code + if fieldinfo.name in fields + or fieldinfo.name == "DeletionFlag" # always unpack delflag + else f"{fieldinfo.size}x" + for fieldinfo, code in zip(self.fields, structcodes) + ] + fmt = "".join(structcodes) + fmtSize = calcsize(fmt) + # total size of fields should add up to recordlength from the header + while fmtSize < self.__recordLength: + # if not, pad byte until reaches recordlength + fmt += "x" + fmtSize += 1 + return (fmt, fmtSize) + + def __recordFields( + self, fields: Optional[Iterable[str]] = None + ) -> tuple[list[Field], dict[str, int], Struct]: + """Returns the necessary info required to unpack a record's fields, + restricted to a subset of fieldnames 'fields' if specified. + Returns a list of field info tuples, a name-index lookup dict, + and a Struct instance for unpacking these fields. Note that DeletionFlag + is not a valid field. + """ + if fields is not None: + # restrict info to the specified fields + # first ignore repeated field names (order doesn't matter) + unique_fields = list(set(fields)) + # get the struct + fmt, __fmtSize = self.__recordFmt(fields=unique_fields) + recStruct = Struct(fmt) + # make sure the given fieldnames exist + for name in unique_fields: + if name not in self.__fieldLookup or name == "DeletionFlag": + raise ValueError(f'"{name}" is not a valid field name') + # fetch relevant field info tuples + fieldTuples = [] + for fieldinfo in self.fields[1:]: + name = fieldinfo[0] + if name in unique_fields: + fieldTuples.append(fieldinfo) + # store the field positions + recLookup = {f[0]: i for i, f in enumerate(fieldTuples)} + else: + # use all the dbf fields + fieldTuples = self.fields[1:] # sans deletion flag + recStruct = self.__fullRecStruct + recLookup = self.__fullRecLookup + return fieldTuples, recLookup, recStruct + + def __record( + self, + fieldTuples: list[Field], + recLookup: dict[str, int], + recStruct: Struct, + oid: Optional[int] = None, + ) -> Optional[_Record]: + """Reads and returns a dbf record row as a list of values. Requires specifying + a list of field info Field namedtuples 'fieldTuples', a record name-index dict 'recLookup', + and a Struct instance 'recStruct' for unpacking these fields. + """ + f = self.__getFileObj(self.dbf) + + # The only format chars in from self.__recordFmt, in recStruct from __recordFields, + # are s and x (ascii encoded str and pad byte) so everything in recordContents is bytes + # https://docs.python.org/3/library/struct.html#format-characters + recordContents = recStruct.unpack(f.read(recStruct.size)) + + # deletion flag field is always unpacked as first value (see __recordFmt) + if recordContents[0] != b" ": + # deleted record + return None + + # drop deletion flag from values + recordContents = recordContents[1:] + + # check that values match fields + if len(fieldTuples) != len(recordContents): + raise ShapefileException( + f"Number of record values ({len(recordContents)}) is different from the requested " + f"number of fields ({len(fieldTuples)})" + ) + + # parse each value + record = [] + for (__name, typ, __size, decimal), value in zip(fieldTuples, recordContents): + if typ is FieldType.N or typ is FieldType.F: + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + value = value.split(b"\0")[0] + value = value.replace(b"*", b"") # QGIS NULL is all '*' chars + if value == b"": + value = None + elif decimal: + try: + value = float(value) + except ValueError: + # not parseable as float, set to None + value = None + else: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + try: + value = int(float(value)) + except ValueError: + # not parseable as int, set to None + value = None + elif typ is FieldType.D: + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if ( + not value.replace(b"\x00", b"") + .replace(b" ", b"") + .replace(b"0", b"") + ): + # dbf date field has no official null value + # but can check for all hex null-chars, all spaces, or all 0s (QGIS null) + value = None + else: + try: + # return as python date object + y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) + value = date(y, m, d) + except (TypeError, ValueError): + # if invalid date, just return as unicode string so user can decimalde + value = str(value.strip()) + elif typ is FieldType.L: + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value == b" ": + value = None # space means missing or not yet set + else: + if value in b"YyTt1": + value = True + elif value in b"NnFf0": + value = False + else: + value = None # unknown value is set to missing + else: + value = value.decode(self.encoding, self.encodingErrors) + value = value.strip().rstrip( + "\x00" + ) # remove null-padding at end of strings + record.append(value) + + return _Record(recLookup, record, oid) + + def record( + self, i: int = 0, fields: Optional[list[str]] = None + ) -> Optional[_Record]: + """Returns a specific dbf record based on the supplied index. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + """ + f = self.__getFileObj(self.dbf) + if self.numRecords is None: + self.__dbfHeader() + i = self.__restrictIndex(i) + recSize = self.__recordLength + f.seek(0) + f.seek(self.__dbfHdrLength + (i * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + return self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + + def records(self, fields: Optional[list[str]] = None) -> list[_Record]: + """Returns all records in a dbf file. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + """ + if self.numRecords is None: + self.__dbfHeader() + records = [] + f = self.__getFileObj(self.dbf) + f.seek(self.__dbfHdrLength) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + # self.__dbfHeader() sets self.numRecords, so it's fine to cast it to int + # (to tell mypy it's not None). + for i in range(cast(int, self.numRecords)): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + records.append(r) + return records + + def iterRecords( + self, + fields: Optional[list[str]] = None, + start: int = 0, + stop: Optional[int] = None, + ) -> Iterator[Optional[_Record]]: + """Returns a generator of records in a dbf file. + Useful for large shapefiles or dbf files. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + By default yields all records. Otherwise, specify start + (default: 0) or stop (default: number_of_records) + to only yield record numbers i, where + start <= i < stop, (or + start <= i < number_of_records + stop + if stop < 0). + """ + if self.numRecords is None: + self.__dbfHeader() + if not isinstance(self.numRecords, int): + raise ShapefileException( + "Error when reading number of Records in dbf file header" + ) + f = self.__getFileObj(self.dbf) + start = self.__restrictIndex(start) + if stop is None: + stop = self.numRecords + elif abs(stop) > self.numRecords: + raise IndexError( + f"abs(stop): {abs(stop)} exceeds number of records: {self.numRecords}." + ) + elif stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(self.__dbfHdrLength + (start * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + for i in range(start, stop): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + yield r + + def shapeRecord( + self, + i: int = 0, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> Optional[ShapeRecord]: + """Returns a combination geometry and attribute record for the + supplied record index. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + i = self.__restrictIndex(i) + shape = self.shape(i, bbox=bbox) + if shape: + record = self.record(i, fields=fields) + return ShapeRecord(shape=shape, record=record) + return None + + def shapeRecords( + self, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> ShapeRecords: + """Returns a list of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) + + def iterShapeRecords( + self, + fields: Optional[list[str]] = None, + bbox: Optional[BBox] = None, + ) -> Iterator[ShapeRecord]: + """Returns a generator of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + if bbox is None: + # iterate through all shapes and records + for shape, record in zip( + self.iterShapes(), self.iterRecords(fields=fields) + ): + yield ShapeRecord(shape=shape, record=record) + else: + # only iterate where shape.bbox overlaps with the given bbox + # TODO: internal __record method should be faster but would have to + # make sure to seek to correct file location... + + # fieldTuples,recLookup,recStruct = self.__recordFields(fields) + for shape in self.iterShapes(bbox=bbox): + if shape: + # record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) + record = self.record(i=shape.oid, fields=fields) + yield ShapeRecord(shape=shape, record=record) + + +class Writer: + """Provides write support for ESRI Shapefiles.""" + + W = TypeVar("W", bound=WriteSeekableBinStream) + + def __init__( + self, + target: Union[str, os.PathLike, None] = None, + shapeType: Optional[int] = None, + autoBalance: bool = False, + *, + encoding: str = "utf-8", + encodingErrors: str = "strict", + shp: Optional[WriteSeekableBinStream] = None, + shx: Optional[WriteSeekableBinStream] = None, + dbf: Optional[WriteSeekableBinStream] = None, + # Keep kwargs even though unused, to preserve PyShp 2.4 API + **kwargs, + ): + self.target = target + self.autoBalance = autoBalance + self.fields: list[Field] = [] + self.shapeType = shapeType + self.shp: Optional[WriteSeekableBinStream] = None + self.shx: Optional[WriteSeekableBinStream] = None + self.dbf: Optional[WriteSeekableBinStream] = None + self._files_to_close: list[BinaryFileStreamT] = [] + if target: + target = fsdecode_if_pathlike(target) + if not isinstance(target, str): + raise TypeError( + f"The target filepath {target!r} must be of type str/unicode or path-like, not {type(target)}." + ) + self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") + self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") + self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") + elif shp or shx or dbf: + if shp: + self.shp = self.__getFileObj(shp) + if shx: + self.shx = self.__getFileObj(shx) + if dbf: + self.dbf = self.__getFileObj(dbf) + else: + raise TypeError( + "Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile." + ) + # Initiate with empty headers, to be finalized upon closing + if self.shp: + self.shp.write(b"9" * 100) + if self.shx: + self.shx.write(b"9" * 100) + # Geometry record offsets and lengths for writing shx file. + self.recNum = 0 + self.shpNum = 0 + self._bbox: Optional[BBox] = None + self._zbox: Optional[ZBox] = None + self._mbox: Optional[MBox] = None + # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. + self.deletionFlag = 0 + # Encoding + self.encoding = encoding + self.encodingErrors = encodingErrors + + def __len__(self): + """Returns the current number of features written to the shapefile. + If shapes and records are unbalanced, the length is considered the highest + of the two.""" + return max(self.recNum, self.shpNum) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, finish writing and close the files. + """ + self.close() + + def __del__(self): + self.close() + + def close(self): + """ + Write final shp, shx, and dbf headers, close opened files. + """ + # Check if any of the files have already been closed + shp_open = self.shp and not (hasattr(self.shp, "closed") and self.shp.closed) + shx_open = self.shx and not (hasattr(self.shx, "closed") and self.shx.closed) + dbf_open = self.dbf and not (hasattr(self.dbf, "closed") and self.dbf.closed) + + # Balance if already not balanced + if self.shp and shp_open and self.dbf and dbf_open: + if self.autoBalance: + self.balance() + if self.recNum != self.shpNum: + raise ShapefileException( + "When saving both the dbf and shp file, " + f"the number of records ({self.recNum}) must correspond " + f"with the number of shapes ({self.shpNum})" + ) + # Fill in the blank headers + if self.shp and shp_open: + self.__shapefileHeader(self.shp, headerType="shp") + if self.shx and shx_open: + self.__shapefileHeader(self.shx, headerType="shx") + + # Update the dbf header with final length etc + if self.dbf and dbf_open: + self.__dbfHeader() + + # Flush files + for attribute in (self.shp, self.shx, self.dbf): + if attribute is None: + continue + if hasattr(attribute, "flush") and not getattr(attribute, "closed", False): + try: + attribute.flush() + except OSError: + pass + + # Close any files that the writer opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except OSError: + pass + self._files_to_close = [] + + @overload + def __getFileObj(self, f: str) -> WriteSeekableBinStream: ... + @overload + def __getFileObj(self, f: None) -> NoReturn: ... + @overload + def __getFileObj(self, f: WriteSeekableBinStream) -> WriteSeekableBinStream: ... + def __getFileObj(self, f): + """Safety handler to verify file-like objects""" + if not f: + raise ShapefileException("No file-like object available.") + if isinstance(f, str): + pth = os.path.split(f)[0] + if pth and not os.path.exists(pth): + os.makedirs(pth) + fp = open(f, "wb+") + self._files_to_close.append(fp) + return fp + + if hasattr(f, "write"): + return f + raise ShapefileException(f"Unsupported file-like object: {f}") + + def __shpFileLength(self) -> int: + """Calculates the file length of the shp file.""" + shp = self.__getFileObj(self.shp) + + # Remember starting position + + start = shp.tell() + # Calculate size of all shapes + shp.seek(0, 2) + size = shp.tell() + # Calculate size as 16-bit words + size //= 2 + # Return to start + shp.seek(start) + return size + + def _update_file_bbox(self, s: Shape): + if s.shapeType == NULL: + shape_bbox = None + elif s.shapeType in _CanHaveBBox_shapeTypes: + shape_bbox = s.bbox + else: + x, y = s.points[0][:2] + shape_bbox = (x, y, x, y) + + if shape_bbox is None: + return + + if self._bbox: + # compare with existing + self._bbox = ( + min(shape_bbox[0], self._bbox[0]), + min(shape_bbox[1], self._bbox[1]), + max(shape_bbox[2], self._bbox[2]), + max(shape_bbox[3], self._bbox[3]), + ) + else: + # first time bbox is being set + self._bbox = shape_bbox + + def _update_file_zbox(self, s: Union[_HasZ, PointZ]): + if self._zbox: + # compare with existing + self._zbox = (min(s.zbox[0], self._zbox[0]), max(s.zbox[1], self._zbox[1])) + else: + # first time zbox is being set + self._zbox = s.zbox + + def _update_file_mbox(self, s: Union[_HasM, PointM]): + mbox = s.mbox + if self._mbox: + # compare with existing + self._mbox = (min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])) + else: + # first time mbox is being set + self._mbox = mbox + + @property + def shapeTypeName(self) -> str: + return SHAPETYPE_LOOKUP[self.shapeType or 0] + + def bbox(self) -> Optional[BBox]: + """Returns the current bounding box for the shapefile which is + the lower-left and upper-right corners. It does not contain the + elevation or measure extremes.""" + return self._bbox + + def zbox(self) -> Optional[ZBox]: + """Returns the current z extremes for the shapefile.""" + return self._zbox + + def mbox(self) -> Optional[MBox]: + """Returns the current m extremes for the shapefile.""" + return self._mbox + + def __shapefileHeader( + self, + fileObj: Optional[WriteSeekableBinStream], + headerType: Literal["shp", "dbf", "shx"] = "shp", + ) -> None: + """Writes the specified header type to the specified file-like object. + Several of the shapefile formats are so similar that a single generic + method to read or write them is warranted.""" + + f = self.__getFileObj(fileObj) + f.seek(0) + # File code, Unused bytes + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) + # File length (Bytes / 2 = 16-bit words) + if headerType == "shp": + f.write(pack(">i", self.__shpFileLength())) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) + # Version, Shape type + if self.shapeType is None: + self.shapeType = NULL + f.write(pack("<2i", 1000, self.shapeType)) + # The shapefile's bounding box (lower left, upper right) + if self.shapeType != 0: + try: + bbox = self.bbox() + if bbox is None: + # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. + # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. + # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. + # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. + # bbox = BBox(0, 0, 0, 0) + bbox = (0, 0, 0, 0) + f.write(pack("<4d", *bbox)) + except error: + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) + else: + f.write(pack("<4d", 0, 0, 0, 0)) + # Elevation + if self.shapeType in PointZ_shapeTypes | _HasZ_shapeTypes: + # Z values are present in Z type + zbox = self.zbox() + if zbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + # zbox = ZBox(0, 0) + zbox = (0, 0) + else: + # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s + # zbox = ZBox(0, 0) + zbox = (0, 0) + # Measure + if self.shapeType in PointM_shapeTypes | _HasM_shapeTypes: + # M values are present in M or Z type + mbox = self.mbox() + if mbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + # mbox = MBox(0, 0) + mbox = (0, 0) + else: + # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s + # mbox = MBox(0, 0) + mbox = (0, 0) + # Try writing + try: + f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) + except error: + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) + + def __dbfHeader(self) -> None: + """Writes the dbf header and field descriptors.""" + f = self.__getFileObj(self.dbf) + f.seek(0) + version = 3 + year, month, day = time.localtime()[:3] + year -= 1900 + # Get all fields, ignoring DeletionFlag if specified + fields = [field for field in self.fields if field[0] != "DeletionFlag"] + # Ensure has at least one field + if not fields: + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) + numRecs = self.recNum + numFields = len(fields) + headerLength = numFields * 32 + 33 + if headerLength >= 65535: + raise ShapefileException( + "Shapefile dbf header length exceeds maximum length." + ) + recordLength = sum(field.size for field in fields) + 1 + header = pack( + " None: + # Balance if already not balanced + if self.autoBalance and self.recNum < self.shpNum: + self.balance() + # Check is shape or import from geojson + if not isinstance(s, Shape): + if hasattr(s, "__geo_interface__"): + s = s.__geo_interface__ # type: ignore [assignment] + if isinstance(s, dict): + s = Shape._from_geojson(s) + else: + raise TypeError( + "Can only write Shape objects, GeoJSON dictionaries, " + "or objects with the __geo_interface__, " + f"not: {s}" + ) + # Write to file + offset, length = self.__shpRecord(s) + if self.shx: + self.__shxRecord(offset, length) + + def __shpRecord(self, s: Shape) -> tuple[int, int]: + f: WriteSeekableBinStream = self.__getFileObj(self.shp) + offset = f.tell() + self.shpNum += 1 + + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if s.shapeType not in (NULL, self.shapeType): + raise ShapefileException( + f"The shape's type ({s.shapeType}) must match " + f"the type of the shapefile ({self.shapeType})." + ) + + # For both single point and multiple-points non-null shapes, + # update bbox, mbox and zbox of the whole shapefile + if s.shapeType != NULL: + self._update_file_bbox(s) + + if s.shapeType in PointM_shapeTypes | _HasM_shapeTypes: + self._update_file_mbox(cast(Union[_HasM, PointM], s)) + + if s.shapeType in PointZ_shapeTypes | _HasZ_shapeTypes: + self._update_file_zbox(cast(Union[_HasZ, PointZ], s)) + + # Create an in-memory binary buffer to avoid + # unnecessary seeks to files on disk + # (other ops are already buffered until .seek + # or .flush is called if not using RawIOBase). + # https://docs.python.org/3/library/io.html#id2 + # https://docs.python.org/3/library/io.html#io.BufferedWriter + b_io: ReadWriteSeekableBinStream = io.BytesIO() + + # Record number, Content length place holder + b_io.write(pack(">2i", self.shpNum, -1)) + + # Track number of content bytes written. Excluding self.shpNum and length t.b.c. + n = 0 + + n += b_io.write(pack("i", length)) + + # Flush to file. + b_io.seek(0) + f.write(b_io.read()) + return offset, length + + def __shxRecord(self, offset: int, length: int) -> None: + """Writes the shx records.""" + + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) + + def record( + self, + *recordList: RecordValue, + **recordDict: RecordValue, + ) -> None: + """Creates a dbf attribute record. You can submit either a sequence of + field values or keyword arguments of field names and values. Before + adding records you must add fields for the record values using the + field() method. If the record values exceed the number of fields the + extra ones won't be added. In the case of using keyword arguments to specify + field/value pairs only fields matching the already registered fields + will be added.""" + # Balance if already not balanced + if self.autoBalance and self.recNum > self.shpNum: + self.balance() + record: list[RecordValue] + fieldCount = sum(1 for field in self.fields if field[0] != "DeletionFlag") + if recordList: + record = list(recordList) + while len(record) < fieldCount: + record.append("") + elif recordDict: + record = [] + for field in self.fields: + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified + if field[0] in recordDict: + val = recordDict[field[0]] + if val is None: + record.append("") + else: + record.append(val) + else: + record.append("") # need empty value for missing dict entries + else: + # Blank fields for empty record + record = ["" for _ in range(fieldCount)] + self.__dbfRecord(record) + + def __dbfRecord(self, record: list[RecordValue]) -> None: + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + # fieldName, fieldType, size and deci were already checked + # when their Field instance was created and added to self.fields + str_val: Optional[str] = None + + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + str_val = "*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + num_val = int(cast(int, value)) + except ValueError: + # forcing directly to int failed, so was probably a float. + num_val = int(float(cast(float, value))) + str_val = format(num_val, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + f_val = float(cast(float, value)) + str_val = format(f_val, f".{deci}f")[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + str_val = f"{value.year:04d}{value.month:02d}{value.day:02d}" + elif isinstance(value, list) and len(value) == 3: + str_val = f"{value[0]:04d}{value[1]:02d}{value[2]:02d}" + elif value in MISSING: + str_val = "0" * 8 # QGIS NULL for date type + elif isinstance(value, str) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + str_val = " " # missing is set to space + elif value in [True, 1]: + str_val = "T" + elif value in [False, 0]: + str_val = "F" + else: + str_val = " " # unknown is set to space + + if str_val is None: + # Types C and M, and anything else, value is forced to string, + # encoded by the codec specified to the Writer (utf-8 by default), + # then the resulting bytes are padded and truncated to the length + # of the field + encoded = ( + str(value) + .encode(self.encoding, self.encodingErrors)[:size] + .ljust(size) + ) + else: + # str_val was given a not-None string value + # under the checks for fieldTypes "N", "F", "D", or "L" above + # Numeric, logical, and date numeric types are ascii already, but + # for Shapefile or dbf spec reasons + # "should be default ascii encoding" + encoded = str_val.encode("ascii", self.encodingErrors) + + if len(encoded) != size: + raise ShapefileException( + f"Shapefile Writer unable to pack incorrect sized {value=}" + f" (encoded as {len(encoded)}B) into field '{fieldName}' ({size}B)." + ) + f.write(encoded) + + def balance(self) -> None: + """Adds corresponding empty attributes or null geometry records depending + on which type of record was created to make sure all three files + are in synch.""" + while self.recNum > self.shpNum: + self.null() + while self.recNum < self.shpNum: + self.record() + + def null(self) -> None: + """Creates a null shape.""" + self.shape(NullShape()) + + def point(self, x: float, y: float) -> None: + """Creates a POINT shape.""" + pointShape = Point(x, y) + self.shape(pointShape) + + def pointm(self, x: float, y: float, m: Optional[float] = None) -> None: + """Creates a POINTM shape. + If the m (measure) value is not set, it defaults to NoData.""" + pointShape = PointM(x, y, m) + self.shape(pointShape) + + def pointz( + self, x: float, y: float, z: float = 0.0, m: Optional[float] = None + ) -> None: + """Creates a POINTZ shape. + If the z (elevation) value is not set, it defaults to 0. + If the m (measure) value is not set, it defaults to NoData.""" + pointShape = PointZ(x, y, z, m) + self.shape(pointShape) + + def multipoint(self, points: PointsT) -> None: + """Creates a MULTIPOINT shape. + Points is a list of xy values.""" + # nest the points inside a list to be compatible with the generic shapeparts method + shape = MultiPoint(points=points) + self.shape(shape) + + def multipointm(self, points: PointsT) -> None: + """Creates a MULTIPOINTM shape. + Points is a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + # nest the points inside a list to be compatible with the generic shapeparts method + shape = MultiPointM(points=points) + self.shape(shape) + + def multipointz(self, points: PointsT) -> None: + """Creates a MULTIPOINTZ shape. + Points is a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + # nest the points inside a list to be compatible with the generic shapeparts method + shape = MultiPointZ(points=points) + self.shape(shape) + + def line(self, lines: list[PointsT]) -> None: + """Creates a POLYLINE shape. + Lines is a collection of lines, each made up of a list of xy values.""" + shape = Polyline(lines=lines) + self.shape(shape) + + def linem(self, lines: list[PointsT]) -> None: + """Creates a POLYLINEM shape. + Lines is a collection of lines, each made up of a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shape = PolylineM(lines=lines) + self.shape(shape) + + def linez(self, lines: list[PointsT]) -> None: + """Creates a POLYLINEZ shape. + Lines is a collection of lines, each made up of a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shape = PolylineZ(lines=lines) + self.shape(shape) + + def poly(self, polys: list[PointsT]) -> None: + """Creates a POLYGON shape. + Polys is a collection of polygons, each made up of a list of xy values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction.""" + shape = Polygon(lines=polys) + self.shape(shape) + + def polym(self, polys: list[PointsT]) -> None: + """Creates a POLYGONM shape. + Polys is a collection of polygons, each made up of a list of xym values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the m (measure) value is not included, it defaults to None (NoData).""" + shape = PolygonM(lines=polys) + self.shape(shape) + + def polyz(self, polys: list[PointsT]) -> None: + """Creates a POLYGONZ shape. + Polys is a collection of polygons, each made up of a list of xyzm values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shape = PolygonZ(lines=polys) + self.shape(shape) + + def multipatch(self, parts: list[PointsT], partTypes: list[int]) -> None: + """Creates a MULTIPATCH shape. + Parts is a collection of 3D surface patches, each made up of a list of xyzm values. + PartTypes is a list of types that define each of the surface patches. + The types can be any of the following module constants: TRIANGLE_STRIP, + TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shape = MultiPatch(lines=parts, partTypes=partTypes) + self.shape(shape) + + def field( + # Types of args should match *Field + self, + name: str, + field_type: FieldTypeT = "C", + size: int = 50, + decimal: int = 0, + ) -> None: + """Adds a dbf field descriptor to the shapefile.""" + if len(self.fields) >= 2046: + raise ShapefileException( + "Shapefile Writer reached maximum number of fields: 2046." + ) + field_ = Field.from_unchecked(name, field_type, size, decimal) + self.fields.append(field_) + + +# Begin Testing +def _get_doctests() -> doctest.DocTest: + # run tests + with open("README.md", "rb") as fobj: + tests = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) + + return tests + + +def _filter_network_doctests( + examples: Iterable[doctest.Example], + include_network: bool = False, + include_non_network: bool = True, +) -> Iterator[doctest.Example]: + globals_from_network_doctests = set() + + if not (include_network or include_non_network): + return + + examples_it = iter(examples) + + yield next(examples_it) + + for example in examples_it: + # Track variables in doctest shell sessions defined from commands + # that poll remote URLs, to skip subsequent commands until all + # such dependent variables are reassigned. + + if 'sf = shapefile.Reader("https://' in example.source: + globals_from_network_doctests.add("sf") + if include_network: + yield example + continue + + lhs = example.source.partition("=")[0] + + for target in lhs.split(","): + target = target.strip() + if target in globals_from_network_doctests: + globals_from_network_doctests.remove(target) + + # Non-network tests dependent on the network tests. + if globals_from_network_doctests: + if include_network: + yield example + continue + + if not include_non_network: + continue + + yield example + + +def _replace_remote_url( + old_url: str, + # Default port of Python http.server and Python 2's SimpleHttpServer + port: int = 8000, + scheme: str = "http", + netloc: str = "localhost", + path: Optional[str] = None, + params: str = "", + query: str = "", + fragment: str = "", +) -> str: + old_parsed = urlparse(old_url) + + # Strip subpaths, so an artefacts + # repo or file tree can be simpler and flat + if path is None: + path = old_parsed.path.rpartition("/")[2] + + if port not in (None, ""): + netloc = f"{netloc}:{port}" + + new_parsed = old_parsed._replace( + scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=fragment, + ) + + new_url = urlunparse(new_parsed) + return new_url + + +def _test(args: list[str] = sys.argv[1:], verbosity: bool = False) -> int: + if verbosity == 0: + print("Getting doctests...") + + import re + + tests = _get_doctests() + + if len(args) >= 2 and args[0] == "-m": + if verbosity == 0: + print("Filtering doctests...") + tests.examples = list( + _filter_network_doctests( + tests.examples, + include_network=args[1] == "network", + include_non_network=args[1] == "not network", + ) + ) + + if REPLACE_REMOTE_URLS_WITH_LOCALHOST: + if verbosity == 0: + print("Replacing remote urls with http://localhost in doctests...") + + for example in tests.examples: + match_url_str_literal = re.search(r'"(https://.*)"', example.source) + if not match_url_str_literal: + continue + old_url = match_url_str_literal.group(1) + new_url = _replace_remote_url(old_url) + example.source = example.source.replace(old_url, new_url) + + runner = doctest.DocTestRunner(verbose=verbosity, optionflags=doctest.FAIL_FAST) + + if verbosity == 0: + print(f"Running {len(tests.examples)} doctests...") + failure_count, __test_count = runner.run(tests) + + # print results + if verbosity: + runner.summarize(True) + else: + if failure_count == 0: + print("All test passed successfully") + elif failure_count > 0: + runner.summarize(verbosity) + + return failure_count + + +def main(): + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = _test() + sys.exit(failure_count) + + +if __name__ == "__main__": + main() diff --git a/test_shapefile.py b/test_shapefile.py index bf145aeb..2a10d3ee 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -5,12 +5,7 @@ import datetime import json import os.path - -try: - from pathlib import Path -except ImportError: - # pathlib2 is a dependency of pytest >= 3.7 - from pathlib2 import Path +from pathlib import Path # third party imports import pytest @@ -1851,3 +1846,9 @@ def test_write_empty_shapefile(tmpdir, shape_type): assert len(r.records()) == 0 # test shapes are empty assert len(r.shapes()) == 0 + + +# This allows a PyShp wheel installed in the env to be tested +# against the doctests. +if __name__ == "__main__": + shapefile.main()