Skip to content

Fickle API connection to S2 catalog that errors with RuntimeError: not recognized as a supported file format.  #192

@FlorisCalkoen

Description

@FlorisCalkoen

Since yesterday (2023-02-22) 21:00 CET I have a very unstable connection when loading data from the S2 SR catalog.
A query like below often errors with RuntimeError: not recognized as a supported file format. (see details). Usually I run these on a dask.client.

import planetary_computer
import pystac_client
import stackstac
import rasterio
import rioxarray
import xarray as xr


catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)
bbox = [151.2945334307, -33.7448472377, 151.3229999588, -33.6917695125]

roi = {
    "type": "Polygon",
    "coordinates": [
        [
            [151.2945334307, -33.7448472377],
            [151.3229999588, -33.7448472377],
            [151.3229999588, -33.6917695125],
            [151.2945334307, -33.6917695125],
            [151.2945334307, -33.7448472377],
        ]
    ],
}

search = catalog.search(
    collections=["sentinel-2-l2a"],
    intersects=roi,
    datetime="2022-12-31/2023-02-01",
    query={"eo:cloud_cover": {"lt": 50}},
)
items = search.item_collection()

da = stackstac.stack(
    items,
    assets=["B02", "B03", "B04", "B08", "B11", "SCL"],
    bounds_latlon=bbox,
    resampling=rasterio.enums.Resampling.bilinear,
).compute()
2023-02-23 09:07:52,233 - distributed.worker - WARNING - Compute Failed
Key:       ('asset_table_to_reader_and_window-fetch_raster_window-33cfbe764bf9c7870b6cd517607e768e', 4, 1, 0, 0)
Function:  execute_task
args:      ((subgraph_callable-36480e7c-187a-45a2-8d8c-8768940161f4, (subgraph_callable-0424adc3-32ef-449d-90af-2fcd67fbfe0b, array([[('https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D', [ 300000., 6190240.,  409800., 6300040.])]],
      dtype=[('url', 'O'), ('bounds', '<f8', (4,))]), RasterSpec(epsg=32756, bounds=(341920.0, 6264820.0, 344670.0, 6270760.0), resolutions_xy=(10.0, 10.0)), <Resampling.bilinear: 1>, dtype('float64'), nan, True, None, (<class 'tuple'>, [RasterioIOError('HTTP response code: 404')]), <class 'stac
kwargs:    {}
Exception: 'RuntimeError(\'Error opening \\\'https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D\\\': RasterioIOError("\\\'/vsicurl/https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D\\\' not recognized as a supported file format.")\')'

---------------------------------------------------------------------------
RasterioIOError                           Traceback (most recent call last)
File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/stackstac/rio_reader.py:326, in _open()
    325 try:
--> 326     ds = SelfCleaningDatasetReader(
    327         self.url, sharing=False
    328     )
    329 except Exception as e:

File rasterio/_base.pyx:309, in rasterio._base.DatasetBase.__init__()

RasterioIOError: '/vsicurl/https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D' not recognized as a supported file format.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
Cell In[65], line 33
     20 search = catalog.search(
     21     collections=["sentinel-2-l2a"],
     22     intersects=roi,
     23     datetime="2022-12-31/2023-02-01",
     24     query={"eo:cloud_cover": {"lt": 50}},
     25 )
     26 items = search.item_collection()
     28 da = stackstac.stack(
     29     items,
     30     assets=["B02", "B03", "B04", "B08", "B11", "SCL"],
     31     bounds_latlon=bbox,
     32     resampling=rasterio.enums.Resampling.bilinear,
---> 33 ).compute()

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/xarray/core/dataarray.py:1089, in DataArray.compute(self, **kwargs)
   1070 """Manually trigger loading of this array's data from disk or a
   1071 remote source into memory and return a new array. The original is
   1072 left unaltered.
   (...)
   1086 dask.compute
   1087 """
   1088 new = self.copy(deep=False)
-> 1089 return new.load(**kwargs)

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/xarray/core/dataarray.py:1063, in DataArray.load(self, **kwargs)
   1045 def load(self: T_DataArray, **kwargs) -> T_DataArray:
   1046     """Manually trigger loading of this array's data from disk or a
   1047     remote source into memory and return this array.
   1048 
   (...)
   1061     dask.compute
   1062     """
-> 1063     ds = self._to_temp_dataset().load(**kwargs)
   1064     new = self._from_temp_dataset(ds)
   1065     self._variable = new._variable

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/xarray/core/dataset.py:746, in Dataset.load(self, **kwargs)
    743 import dask.array as da
    745 # evaluate all the dask arrays simultaneously
--> 746 evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    748 for k, data in zip(lazy_data, evaluated_data):
    749     self.variables[k].data = data

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    596     keys.append(x.__dask_keys__())
    597     postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
    600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/client.py:3137, in Client.get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
   3135         should_rejoin = False
   3136 try:
-> 3137     results = self.gather(packed, asynchronous=asynchronous, direct=direct)
   3138 finally:
   3139     for f in futures.values():

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/client.py:2306, in Client.gather(self, futures, errors, direct, asynchronous)
   2304 else:
   2305     local_worker = None
-> 2306 return self.sync(
   2307     self._gather,
   2308     futures,
   2309     errors=errors,
   2310     direct=direct,
   2311     local_worker=local_worker,
   2312     asynchronous=asynchronous,
   2313 )

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/utils.py:338, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
    336     return future
    337 else:
--> 338     return sync(
    339         self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
    340     )

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/utils.py:405, in sync(loop, func, callback_timeout, *args, **kwargs)
    403 if error:
    404     typ, exc, tb = error
--> 405     raise exc.with_traceback(tb)
    406 else:
    407     return result

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/utils.py:378, in sync.<locals>.f()
    376         future = asyncio.wait_for(future, callback_timeout)
    377     future = asyncio.ensure_future(future)
--> 378     result = yield future
    379 except Exception:
    380     error = sys.exc_info()

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/tornado/gen.py:769, in Runner.run(self)
    766 exc_info = None
    768 try:
--> 769     value = future.result()
    770 except Exception:
    771     exc_info = sys.exc_info()

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/distributed/client.py:2169, in Client._gather(self, futures, errors, direct, local_worker)
   2167         exc = CancelledError(key)
   2168     else:
-> 2169         raise exception.with_traceback(traceback)
   2170     raise exc
   2171 if errors == "skip":

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/dask/optimization.py:990, in __call__()
    988 if not len(args) == len(self.inkeys):
    989     raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 990 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/dask/core.py:149, in get()
    147 for key in toposort(dsk):
    148     task = dsk[key]
--> 149     result = _execute_task(task, cache)
    150     cache[key] = result
    151 result = _execute_task(out, cache)

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/dask/core.py:119, in _execute_task()
    115     func, args = arg[0], arg[1:]
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):
    121     return arg

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/stackstac/to_dask.py:185, in fetch_raster_window()
    178 # Only read if the window we're fetching actually overlaps with the asset
    179 if windows.intersect(current_window, asset_window):
    180     # NOTE: when there are multiple assets, we _could_ parallelize these reads with our own threadpool.
    181     # However, that would probably increase memory usage, since the internal, thread-local GDAL datasets
    182     # would end up copied to even more threads.
    183 
    184     # TODO when the Reader won't be rescaling, support passing `output` to avoid the copy?
--> 185     data = reader.read(current_window)
    187     if all_empty:
    188         # Turn `output` from a broadcast-trick array to a real array, so it's writeable
    189         if (
    190             np.isnan(data)
    191             if np.isnan(fill_value)
    192             else np.equal(data, fill_value)
    193         ).all():
    194             # Unless the data we just read is all empty anyway

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/stackstac/rio_reader.py:385, in read()
    384 def read(self, window: Window, **kwargs) -> np.ndarray:
--> 385     reader = self.dataset
    386     try:
    387         result = reader.read(
    388             window=window,
    389             masked=True,
   (...)
    392             **kwargs,
    393         )

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/stackstac/rio_reader.py:381, in dataset()
    379 with self._dataset_lock:
    380     if self._dataset is None:
--> 381         self._dataset = self._open()
    382     return self._dataset

File ~/mambaforge/envs/coastal/lib/python3.10/site-packages/stackstac/rio_reader.py:337, in _open()
    332             warnings.warn(msg)
    333             return NodataReader(
    334                 dtype=self.dtype, fill_value=self.fill_value
    335             )
--> 337         raise RuntimeError(msg) from e
    338 if ds.count != 1:
    339     ds.close()

RuntimeError: Error opening 'https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D': RasterioIOError("'/vsicurl/https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/25/S2B_MSIL2A_20230125T235229_N0400_R130_T56HLH_20230127T052848.SAFE/GRANULE/L2A_T56HLH_A030758_20230125T235228/IMG_DATA/R10m/T56HLH_20230125T235229_B03_10m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D' not recognized as a supported file format.")

2023-02-23 09:07:52,385 - distributed.worker - WARNING - Compute Failed
Key:       ('asset_table_to_reader_and_window-fetch_raster_window-33cfbe764bf9c7870b6cd517607e768e', 2, 4, 0, 0)
Function:  execute_task
args:      ((subgraph_callable-36480e7c-187a-45a2-8d8c-8768940161f4, (subgraph_callable-0424adc3-32ef-449d-90af-2fcd67fbfe0b, array([[('https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/15/S2B_MSIL2A_20230115T235229_N0400_R130_T56HLH_20230116T091738.SAFE/GRANULE/L2A_T56HLH_A030615_20230115T235227/IMG_DATA/R20m/T56HLH_20230115T235229_B11_20m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D', [ 300000., 6190240.,  409800., 6300040.])]],
      dtype=[('url', 'O'), ('bounds', '<f8', (4,))]), RasterSpec(epsg=32756, bounds=(341920.0, 6264820.0, 344670.0, 6270760.0), resolutions_xy=(10.0, 10.0)), <Resampling.bilinear: 1>, dtype('float64'), nan, True, None, (<class 'tuple'>, [RasterioIOError('HTTP response code: 404')]), <class 'stac
kwargs:    {}
Exception: 'RuntimeError("Error reading Window(col_off=0, row_off=0, width=275, height=594) from \'https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/56/H/LH/2023/01/15/S2B_MSIL2A_20230115T235229_N0400_R130_T56HLH_20230116T091738.SAFE/GRANULE/L2A_T56HLH_A030615_20230115T235227/IMG_DATA/R20m/T56HLH_20230115T235229_B11_20m.tif?st=2023-02-22T07%3A25%3A33Z&se=2023-02-24T07%3A25%3A33Z&sp=rl&sv=2021-06-08&sr=c&skoid=c85c15d6-d1ae-42d4-af60-e2ca0f81359b&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2023-02-23T06%3A42%3A59Z&ske=2023-03-02T06%3A42%3A59Z&sks=b&skv=2021-06-08&sig=A4QAd0NTlYyf0NKVTb3Ple4ASgU43Etg4IXELFQtJvY%3D\': RasterioIOError(\'Read or write failed. IReadBlock failed at X offset 0, Y offset 0: IReadBlock failed at X offset 4, Y offset 2: TIFFReadEncodedTile() failed.\')")'


<\details>

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions