-
Notifications
You must be signed in to change notification settings - Fork 115
Description
Hi,
I am trying to update a remote collection using pystac, pystac_client and requests.
The collection is read and updated using the STAC FastAPI.
- First I use pystac_client to grab the collection
- Then I update the collection with pystac (I just add or replace a single item)
- After that, I update the modified collection using requests
The problem is that the "items" link of the collection is duplicated every time the collection is updated.
After N updates, I got N "items" links in the collection links!
I don't know if its a bug, a limitation, of a wrong usage of pystac with pystac_client.
I have added below a minimal example to reproduce the thing.
Code snippet to reproduce the error
import datetime
import pystac
from pystac_client import Client, exceptions
import requests
from urllib.parse import urljoin
def post_or_put(url: str, data: dict):
"""Post or put data to url."""
r = requests.post(url, json=data)
if r.status_code == 409:
new_url = url if data["type"] == "Collection" else url + f"/{data['id']}"
# Exists, so update
r = requests.put(new_url, json=data)
# Unchanged may throw a 404
if not r.status_code == 404:
r.raise_for_status()
else:
r.raise_for_status()
# New stac item
new_item = pystac.Item(
id="my_item",
bbox=[0.28, 43.20, 1.03, 43.76],
geometry={'type': 'Polygon', 'coordinates': [[[0.28, 43.74], [1.01, 43.76], [1.03, 43.21], [0.30, 43.20], [0.28, 43.74]]]},
datetime=datetime.datetime(year=2022, month=1, day=1),
properties={'platform': 'something', 'instruments': ['something'], 'datetime': '2022-01-01T00:00:00Z'}
)
new_item.validate()
collection_id = "my_collection"
stacapi_url = "http://some-stac-fastapi.org"
api = Client.open(stacapi_url)
try:
existing_collection = api.get_collection(collection_id)
except exceptions.APIError as e:
existing_collection = None
if not existing_collection:
print("Collection does not exist")
spat_extent = pystac.SpatialExtent(bboxes=[new_item.bbox])
temp_extent = pystac.TemporalExtent(intervals=[(new_item.datetime, new_item.datetime)])
extent = pystac.Extent(spat_extent, temp_extent)
collection = pystac.Collection(id=collection_id,
description="some description",
extent=extent,
title="my collection",
providers=[pystac.Provider("Some provider")])
else:
print("Collection already exist")
collection = existing_collection
collection.add_item(new_item)
collection.normalize_hrefs(stacapi_url)
collection.make_all_asset_hrefs_relative()
collection.validate()
post_or_put(urljoin(stacapi_url, "/collections"), collection.to_dict())
for link in collection.links:
if link.rel == "item":
post_or_put(urljoin(stacapi_url, f"collections/{collection_id}/items"), new_item.to_dict())First run
Output:
Collection does not exist
Resulting collection:
{
"id": "my_collection",
"type": "Collection",
"links": [
{
"rel": "items",
"type": "application/geo+json",
"href": "http://some-stac-fastapi.org/collections/my_collection/items"
},
{
"rel": "parent",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "root",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "self",
"type": "application/json",
"href": "http://some-stac-fastapi.org/collections/my_collection"
}
],
"title": "my collection",
"extent": {
"spatial": {
"bbox": [
[
0.28,
43.2,
1.03,
43.76
]
]
},
"temporal": {
"interval": [
[
"2022-01-01T00:00:00Z",
"2022-01-01T00:00:00Z"
]
]
}
},
"license": "proprietary",
"providers": [
{
"name": "Some provider"
}
],
"description": "some description",
"stac_version": "1.0.0",
"stac_extensions": []
}Nothing really exciting here. The collection is created.
Second run
Output:
Collection already exist
Resulting collection:
{
"id": "my_collection",
"type": "Collection",
"links": [
{
"rel": "items",
"type": "application/geo+json",
"href": "http://some-stac-fastapi.org/collections/my_collection/items"
},
{
"rel": "parent",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "root",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "self",
"type": "application/json",
"href": "http://some-stac-fastapi.org/collections/my_collection"
},
{
"rel": "items",
"href": "http://some-stac-fastapi.org/collections/my_collection/items",
"type": "application/geo+json"
}
],
"title": "my collection",
"extent": {
"spatial": {
"bbox": [
[
0.28,
43.2,
1.03,
43.76
]
]
},
"temporal": {
"interval": [
[
"2022-01-01T00:00:00Z",
"2022-01-01T00:00:00Z"
]
]
}
},
"license": "proprietary",
"providers": [
{
"name": "Some provider"
}
],
"description": "some description",
"stac_version": "1.0.0",
"stac_extensions": []
}Here, you can notice that "items " is duplicated!
Third run
Collection already exist
Resulting collection:
{
"id": "my_collection",
"type": "Collection",
"links": [
{
"rel": "items",
"type": "application/geo+json",
"href": "http://some-stac-fastapi.org/collections/my_collection/items"
},
{
"rel": "parent",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "root",
"type": "application/json",
"href": "http://some-stac-fastapi.org/"
},
{
"rel": "self",
"type": "application/json",
"href": "http://some-stac-fastapi.org/collections/my_collection"
},
{
"rel": "items",
"href": "http://some-stac-fastapi.org/collections/my_collection/items",
"type": "application/geo+json"
},
{
"rel": "items",
"href": "http://some-stac-fastapi.org/collections/my_collection/items",
"type": "application/geo+json"
}
],
"title": "my collection",
"extent": {
"spatial": {
"bbox": [
[
0.28,
43.2,
1.03,
43.76
]
]
},
"temporal": {
"interval": [
[
"2022-01-01T00:00:00Z",
"2022-01-01T00:00:00Z"
]
]
}
},
"license": "proprietary",
"providers": [
{
"name": "Some provider"
}
],
"description": "some description",
"stac_version": "1.0.0",
"stac_extensions": []
}Here "items" has been one more time duplicated. There is now 3 "items" entry.
Is this behavior nominal?
If yes, how should I avoid the duplicated links?
Thanks
Rémi