From 4ca3ede15e878a7ca85489ce5ea7addb8e6e0fe0 Mon Sep 17 00:00:00 2001 From: Kendall Smith Date: Sat, 26 Mar 2022 10:01:18 -0700 Subject: [PATCH 1/2] notebook ready for review --- tutorials/radiant-mlhub-publish-dataset.ipynb | 1076 +++++++++++++++-- 1 file changed, 958 insertions(+), 118 deletions(-) diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb index c3661dd1..abc32831 100644 --- a/tutorials/radiant-mlhub-publish-dataset.ipynb +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -15,9 +15,9 @@ "id": "4c593de1-8451-4bfd-b004-a0fc6de1aa27", "metadata": {}, "source": [ - "In this tutorial, we will walk through the process of creating STAC Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for submitting this dataset to [Radiant MLHub](https://mlhub.earth/) for publication.\n", + "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for manual review and publication.\n", "\n", - "For this example, we will use the sample training dataset from the [SpaceNet 7: Multi-Temporal Urban Development Challenge](https://spacenet.ai/sn7-challenge/)." + "For this example, we will use the sample training dataset from the [SpaceNet 6: Multi-Sensor All-Weather Mapping](https://spacenet.ai/sn6-challenge/)." ] }, { @@ -46,21 +46,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: rio-stac==0.3.2 in /srv/conda/envs/notebook/lib/python3.8/site-packages (0.3.2)\n", - "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", + "Collecting rio-stac==0.3.2\n", + " Downloading rio-stac-0.3.2.tar.gz (8.1 kB)\n", "Requirement already satisfied: rasterio in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.10)\n", + "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", "Requirement already satisfied: python-dateutil>=2.7.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from python-dateutil>=2.7.0->pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (1.16.0)\n", - "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", - "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", "Requirement already satisfied: setuptools in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (59.8.0)\n", - "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", + "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.22.3)\n", + "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", "Requirement already satisfied: affine in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2.3.0)\n", - "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", + "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", + "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (7.1.2)\n", "Requirement already satisfied: certifi in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2021.10.8)\n", - "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.20.0)\n", - "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (8.0.3)\n", - "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n" + "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", + "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", + "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n", + "Building wheels for collected packages: rio-stac\n", + " Building wheel for rio-stac (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for rio-stac: filename=rio_stac-0.3.2-py3-none-any.whl size=8279 sha256=13e01dd9e6dcd02c086ddfc260de260387dea212829ab082de08519d764b8999\n", + " Stored in directory: /home/jovyan/.cache/pip/wheels/42/12/1a/677dda98b5bb48936e8636e4e71ddc6ed65ee7f3a849ca2c77\n", + "Successfully built rio-stac\n", + "Installing collected packages: rio-stac\n", + "Successfully installed rio-stac-0.3.2\n" ] } ], @@ -70,25 +78,39 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "id": "52420a33-ae4e-4b16-b893-f42dd67909fb", "metadata": {}, "outputs": [], "source": [ "import enum\n", "import os\n", + "import tarfile\n", + "import shutil\n", "import pathlib\n", "import re\n", "import shutil\n", "import tarfile\n", "import tempfile\n", "import urllib.parse\n", + "from dateutil.parser import parse\n", + "import datetime as dt\n", + "from typing import List, Dict, Tuple\n", "\n", "import pystac\n", "import rasterio\n", "from pystac.utils import str_to_datetime\n", "from pystac.extensions.eo import Band, EOExtension\n", - "from rio_stac.stac import create_stac_item" + "from pystac.extensions.label import LabelExtension\n", + "from rio_stac.stac import create_stac_item\n", + "import geopandas as gpd\n", + "from pystac import Catalog, Collection, Item, MediaType, \\\n", + " Asset, Link, Extent, SpatialExtent, TemporalExtent, CatalogType\n", + "from pystac.extensions.scientific import ScientificExtension\n", + "from shapely.geometry import GeometryCollection, Polygon, mapping, shape\n", + "\n", + "from pprint import PrettyPrinter\n", + "pp = PrettyPrinter(indent=2)" ] }, { @@ -110,6 +132,33 @@ { "cell_type": "code", "execution_count": 3, + "id": "022cbe6f-1d8e-4a61-8615-0736926f4a27", + "metadata": {}, + "outputs": [], + "source": [ + "# Get the TMP directory for this system\n", + "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", + "\n", + "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "tar_root = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\"\n", + "# tar_path = tmp_dir / \"sample_data.tar.gz\"\n", + "# data_dir = tmp_dir / \"sample_data\"\n", + "tar_path = tmp_dir / \"SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", + "untar_path = tmp_dir / \"SN6_buildings_AOI_11_Rotterdam_train_sample\"\n", + "data_dir = tmp_dir / \"spacenet_6_rotterdam\"" + ] + }, + { + "cell_type": "markdown", + "id": "3e66fd96-889e-4bc5-8154-2e25dcc7f022", + "metadata": {}, + "source": [ + "If the archive `SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz` does not already exists in our temporary directory, then we will download it using `curl` command." + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "a6be708a-b187-48c9-8d5f-27a896ed10a0", "metadata": {}, "outputs": [ @@ -117,30 +166,57 @@ "name": "stdout", "output_type": "stream", "text": [ - "File /tmp/sample_data.tar.gz already exists, skipping download\n", - "Data already extracted from archive; skipping extract.\n" + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 123M 100 123M 0 0 17.2M 0 0:00:07 0:00:07 --:--:-- 21.9M\n" ] } ], "source": [ - "# Get the TMP directory for this system\n", - "tmp_dir = pathlib.Path(tempfile.gettempdir())\n", - "\n", - "tar_url = \"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/tarballs/SN6_buildings_AOI_11_Rotterdam_train_sample.tar.gz\"\n", - "tar_path = tmp_dir / \"sample_data.tar.gz\"\n", - "data_dir = tmp_dir / \"sample_data\"\n", - "\n", "if tar_path.exists():\n", " print(f\"File {tar_path} already exists, skipping download\")\n", "else:\n", - " !curl {tar_url} -o {tar_path}\n", - " \n", - "if data_dir.exists():\n", + " !curl {tar_url} -o {tar_path}" + ] + }, + { + "cell_type": "markdown", + "id": "22078ea3-197b-4bda-b7bb-6ce8f6710205", + "metadata": {}, + "source": [ + "Then to make the directory names more meaningful, we will rename the directory to `spacenet_6_rotterdam`, which later matches the name of the catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d76015d1-794c-412c-9352-6787be3a35f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracted data to /tmp/SN6_buildings_AOI_11_Rotterdam_train_sample\n", + "Renamed folder to /tmp/spacenet_6_rotterdam\n" + ] + } + ], + "source": [ + "if untar_path.exists():\n", " print(f\"Data already extracted from archive; skipping extract.\")\n", "else:\n", - " os.makedirs(data_dir, exist_ok=True)\n", - " !tar -xzf {tar_path} -C {tmp_dir} --transform s/SN6_buildings_AOI_11_Rotterdam_train_sample/{data_dir.name}/\n", - " print(f\"Extracted data to {data_dir}\")" + " os.makedirs(untar_path)\n", + " !tar -zxf {tar_path} -C {tmp_dir}\n", + " \n", + " if os.path.exists(untar_path):\n", + " print(f\"Extracted data to {untar_path}\")\n", + " \n", + " os.makedirs(data_dir, exist_ok=True)\n", + " !mv {untar_path}/* {data_dir}\n", + " print(f\"Renamed folder to {data_dir}\")\n", + "\n", + " !rm -rf {untar_path}" ] }, { @@ -153,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "273826a9-e03a-4124-8b9f-8ceeb61fde51", "metadata": {}, "outputs": [ @@ -161,11 +237,42 @@ "name": "stdout", "output_type": "stream", "text": [ - "/tmp/sample_data\n", - "/tmp/sample_data/AOI_11_Rotterdam\n", - "/tmp/sample_data/AOI_11_Rotterdam/SummaryData\n", - "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", - "/tmp/sample_data/AOI_11_Rotterdam/geojson_buildings\n", + "/tmp/spacenet_6_rotterdam\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGB\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PAN\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SAR-Intensity\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/geojson_buildings\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", @@ -176,18 +283,7 @@ "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "/tmp/sample_data/AOI_11_Rotterdam/RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PS-RGBNIR\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGBNIR\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_69.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804133131_20190804133356_tile_783.tif\n", @@ -198,39 +294,19 @@ "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/SAR-Intensity\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PAN\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/sample_data/AOI_11_Rotterdam/PS-RGB\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n" + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SummaryData\n", + "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", + "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/RGBNIR\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", + "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n" ] } ], @@ -255,22 +331,23 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "id": "a5e16d25-ddf3-45ae-bd49-91888ab5a89c", "metadata": {}, "outputs": [], "source": [ "aoi_name = \"AOI_11_Rotterdam\"\n", "aoi_dir = data_dir / aoi_name\n", + "os.chdir(data_dir)\n", "\n", "labels_pattern = re.compile(\n", " r\"^(?PSN6_Train_AOI_11_Rotterdam)\"\n", " \"_Buildings_\"\n", - " \"(?P20190804120223)\"\n", + " \"(?P\\d{14})\"\n", " \"_\"\n", - " \"(?P20190804120456)\"\n", + " \"(?P\\d{14})\"\n", " \"_tile_\"\n", - " \"(?P55)\"\n", + " \"(?P\\d+)\"\n", " \"\\.geojson$\"\n", ")\n", "\n", @@ -283,10 +360,8 @@ " PAN = \"PAN\"\n", " PS_RGB = \"PS-RGB\"\n", "\n", - "def get_source_info(label_path):\n", - " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", - " the given label file path.\n", - " \"\"\"\n", + "def strip_meta_matches(label_path: str) -> Tuple[any]:\n", + " \"\"\"Uses Regex pattern above to strip out relevant metadata about the file\"\"\"\n", " label_path = os.fspath(label_path)\n", " label_path = pathlib.Path(label_path)\n", " match = labels_pattern.match(label_path.name)\n", @@ -298,11 +373,19 @@ " end_datetime = match.group(\"end_datetime\")\n", " tile = match.group(\"tile\")\n", " \n", + " return prefix, start_datetime, end_datetime, tile\n", + " \n", + "def get_source_info(label_path: str) -> List[Dict[str, any]]:\n", + " \"\"\"Gets a list of paths (as pathlib.Path instances) to source data associated with\n", + " the given label file path.\n", + " \"\"\"\n", + " \n", + " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", + "\n", " return [\n", " {\n", - " # We use the path on S3 instead of the local path here\n", - " \"href\": f\"https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/\" \\\n", - " f\"{aoi_name}/{source_type.value}/\" \\\n", + " # We will use relative paths here when archiving the entire catalog with the dataset\n", + " \"href\": f\"{aoi_name}/{source_type.value}/\" \\\n", " f\"{prefix}_{source_type.value}_{start_datetime}_{end_datetime}_tile_{tile}.tif\",\n", " \"type\": source_type.value,\n", " \"start_datetime\": start_datetime,\n", @@ -311,7 +394,19 @@ " for source_type in SourceType\n", " ]\n", " \n", - " " + "def get_label_info(label_path: str) -> List[Dict[str, any]]:\n", + " \"\"\"Gets the single path and metadata attributes from the given label path\n", + " \"\"\"\n", + " \n", + " prefix, start_datetime, end_datetime, tile = strip_meta_matches(label_path)\n", + " \n", + " return {\n", + " \"href\": f\"{aoi_name}/geojson_buildings/\" \\\n", + " f\"{prefix}_Buildings_{start_datetime}_{end_datetime}_tile_{tile}.geojson\",\n", + " \"type\": \"Buildings\",\n", + " \"start_datetime\": start_datetime,\n", + " \"end_datetime\": end_datetime\n", + " }" ] }, { @@ -319,49 +414,74 @@ "id": "c453e704-34cf-4d7d-8de1-044c3dbf83ea", "metadata": {}, "source": [ - "For example..." + "For example, we can see what information our regex pattern above can learn about the geojson label filename..." ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "id": "dcb37528-fc62-4eeb-904d-704ec85b9695", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + "[{'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'RGBNIR',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PS-RGBNIR',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'SAR-Intensity',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PAN',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'},\n", - " {'href': 'https://s3.amazonaws.com/spacenet-dataset/spacenet/SN6_buildings/train/AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", + " {'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", " 'type': 'PS-RGB',\n", " 'start_datetime': '20190804120223',\n", " 'end_datetime': '20190804120456'}]" ] }, - "execution_count": 22, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", - "sources = get_source_info(label)\n", - "sources" + "source_info = get_source_info(label)\n", + "source_info" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "07aa9f59-7a7c-41d4-a5ce-49c7e2342d50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", + " 'type': 'Buildings',\n", + " 'start_datetime': '20190804120223',\n", + " 'end_datetime': '20190804120456'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "label_info = get_label_info(label)\n", + "label_info" ] }, { @@ -369,7 +489,7 @@ "id": "c7dda111-e0b3-42ff-bdbc-f9ad49f50e15", "metadata": {}, "source": [ - "### Catalog Source Imagery" + "### Create Catalog Source Items" ] }, { @@ -377,24 +497,45 @@ "id": "5a0c4fb6-7005-4189-afe8-75781461f7c9", "metadata": {}, "source": [ - "Since each of the sources for a given label cover the same spatial and temporal extents, we can combine them into a single STAC Item, with each source represented as a distinct Asset. We will create the helper functions " + "Since each of the sources for a given label cover the same spatial and temporal extents, we can combine them into a single STAC Item, with each source represented as a distinct Asset. We will create the helper functions that allow us to easily create a STAC Item from just the label filename based on the source imagery in our dataset directory." ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, + "id": "eff8b798-25ca-4a9f-abf8-c8e59f962f2f", + "metadata": {}, + "outputs": [], + "source": [ + "def get_item_id(source_href: str, source_type: str, item_type: str) -> str:\n", + " \"\"\"Helper function to return the appropriate Item ID\"\"\"\n", + " return source_href.split('/')[-1].replace(f'_{source_type}','').replace('.tif',f'_{item_type}').replace('.geojson',f'_{item_type}')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "6782d5c1-100b-4f56-960a-844fcbdb0d09", "metadata": {}, "outputs": [], "source": [ - "def create_source_item(label_path):\n", + "def create_source_item(label_path: str) -> Item:\n", + " \"\"\"Helper function that leverages rio-stac to create a STAC item\n", + " from a source image Asset, and adds the rest of the images as Assets\n", + " \"\"\"\n", " sources = get_source_info(label_path)\n", " \n", + " # we need the first source object\n", " first_source = sources[0]\n", " \n", + " # rio-stac by default provides the filepath, so we override the item id\n", + " item_id = get_item_id(first_source[\"href\"], first_source[\"type\"], \"source\")\n", + " \n", " # Bootstrap the source item using rio-stac based on the first asset\n", " with rasterio.open(sources[0][\"href\"]) as src:\n", + " \n", " item = create_stac_item(\n", + " id=item_id,\n", " source=src,\n", " asset_name=first_source[\"type\"],\n", " asset_roles=[\"data\"],\n", @@ -405,7 +546,7 @@ " \n", " # rio-stac does not add the Asset \"type\" or \"title\" fields, so we add them manually\n", " # (all assets are Cloud-Optimized GeoTIFFs)\n", - " item.assets[first_source[\"type\"]].type = pystac.MediaType.COG\n", + " item.assets[first_source[\"type\"]].type = MediaType.COG\n", " item.assets[first_source[\"type\"]].title = first_source[\"type\"]\n", " \n", " # Since the spatiotemporal metadata is the same for all assets, we do not need to read \n", @@ -414,7 +555,7 @@ " asset = pystac.Asset.from_dict({\n", " \"href\": source[\"href\"],\n", " \"roles\": [\"data\"],\n", - " \"type\": str(pystac.MediaType.COG),\n", + " \"type\": str(MediaType.COG),\n", " \"title\": source[\"type\"]\n", " })\n", " item.add_asset(source[\"type\"], asset)\n", @@ -422,45 +563,744 @@ " return item" ] }, + { + "cell_type": "markdown", + "id": "956f9cd3-8e3c-4850-94a6-1d9459ce7c40", + "metadata": {}, + "source": [ + "We can examine the output of our helper function `create_source_item` above to see that it has populated the required attributes for a generic source item. However, per the [STAC Item Specification](https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md), it is recommended to add more properties to the Item and its Assets, such as the [EOExtension](https://github.com/stac-extensions/eo) for electro-optical bands, e.g. RGB. For now we will stick with the core required properties for a source item." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "e769016b-cf1c-4dca-ab42-2e3588f8e668", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{ 'assets': { 'PAN': { 'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PAN',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'PS-RGB': { 'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PS-RGB',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'PS-RGBNIR': { 'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'PS-RGBNIR',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'},\n", + " 'RGBNIR': { 'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'RGBNIR'},\n", + " 'SAR-Intensity': { 'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", + " 'roles': ['data'],\n", + " 'title': 'SAR-Intensity',\n", + " 'type': 'image/tiff; application=geotiff; '\n", + " 'profile=cloud-optimized'}},\n", + " 'bbox': [ 4.350277623237341,\n", + " 51.90744725678807,\n", + " 4.356939496573024,\n", + " 51.9115675425874],\n", + " 'geometry': { 'coordinates': [ [ [4.350277623237341, 51.9115675425874],\n", + " [4.350277623237341, 51.90744725678807],\n", + " [4.356939496573024, 51.90744725678807],\n", + " [4.356939496573024, 51.9115675425874],\n", + " [4.350277623237341, 51.9115675425874]]],\n", + " 'type': 'Polygon'},\n", + " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_source',\n", + " 'links': [],\n", + " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", + " 'proj:bbox': [ 592886.1399464327,\n", + " 5751614.151231687,\n", + " 593336.1616884505,\n", + " 5752064.1729737045],\n", + " 'proj:epsg': 32631,\n", + " 'proj:geometry': { 'coordinates': [ [ [ 592886.1399464327,\n", + " 5752064.1729737045],\n", + " [ 592886.1399464327,\n", + " 5751614.151231687],\n", + " [ 593336.1616884505,\n", + " 5751614.151231687],\n", + " [ 593336.1616884505,\n", + " 5752064.1729737045],\n", + " [ 592886.1399464327,\n", + " 5752064.1729737045]]],\n", + " 'type': 'Polygon'},\n", + " 'proj:shape': [450, 450],\n", + " 'proj:transform': [ 1.0000483155950517,\n", + " 0.0,\n", + " 592886.1399464327,\n", + " 0.0,\n", + " -1.0000483155950517,\n", + " 5752064.1729737045,\n", + " 0.0,\n", + " 0.0,\n", + " 1.0]},\n", + " 'stac_extensions': [ 'https://stac-extensions.github.io/projection/v1.0.0/schema.json'],\n", + " 'stac_version': '1.0.0',\n", + " 'type': 'Feature'}\n" + ] + } + ], + "source": [ + "source_item = create_source_item(label)\n", + "pp.pprint(source_item.to_dict())" + ] }, { "cell_type": "markdown", "id": "66a48b4f-9772-46b9-a96f-ec7591c6b3c8", "metadata": {}, "source": [ - "### Catalog Labels" + "### Create Catalog Label Items" ] }, { "cell_type": "markdown", - "id": "b02acefe-ee14-4dfb-a826-b2af28a46594", + "id": "61847d62-9ee6-4ce6-85c7-16baaa285140", "metadata": {}, "source": [ - "### Submit to Radiant MLHub" + "Similar to the helper functions created above, we need some functions to more easily create a label STAC Item for the catalog." ] }, { "cell_type": "code", - "execution_count": null, - "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", + "execution_count": 26, + "id": "29098a46-f86a-4439-9538-c351b011ab95", "metadata": {}, "outputs": [], "source": [ - "tar_path.unlink(missing_ok=True)\n", - "shutil.rmtree(data_dir, ignore_errors=True)" + "def get_item_datetime(dt_str: str) -> dt.datetime:\n", + " \"\"\"Returns an items datetime based on ID string pattern\"\"\"\n", + " return dt.datetime.strptime(str(dt_str), '%Y%m%d%H%M%S') #20190804120223" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "d28005c3-feaf-4932-a376-bf3c85cae173", + "metadata": {}, + "outputs": [], + "source": [ + "def get_geojson_extent(fname: str) -> Polygon:\n", + " \"\"\"Takes a path to GeoJSON vector file and returns the Polygon geometry for an Item reprojected\"\"\"\n", + " \n", + " gdf = gpd.read_file(fname)\n", + " gdf = gdf.to_crs(\"EPSG:4326\")\n", + " bounds = gdf.total_bounds\n", + " geometry = Polygon(\n", + " (\n", + " (bounds[0], bounds[1]),\n", + " (bounds[0], bounds[3]),\n", + " (bounds[2], bounds[3]),\n", + " (bounds[2], bounds[1]),\n", + " (bounds[0], bounds[1])\n", + " )\n", + " )\n", + " return geometry" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fc37d0b5-5e2a-47b5-9837-78aa5f30a9c0", + "metadata": {}, + "outputs": [], + "source": [ + "def add_label_extension(label: Item, label_meta: Dict[str, any]) -> Item:\n", + " \"\"\"This applies the STAC LabelExtension to the label item and related properties\"\"\"\n", + " # apply the Label Extention\n", + " label_ext = LabelExtension.ext(\n", + " label, \n", + " add_if_missing = True\n", + " )\n", + "\n", + " label_ext.apply(\n", + " label_description = \"SpaceNet 6 Building Footprints\",\n", + " label_type = 'vector'\n", + " )\n", + "\n", + " # instantiate GeoJSON Asset\n", + " asset=Asset(\n", + " href = label_meta[\"href\"],\n", + " media_type = MediaType.GEOJSON,\n", + " )\n", + "\n", + " # add GeoTiff Asset to item\n", + " label.add_asset(\n", + " key = 'buildings',\n", + " asset = asset\n", + " )\n", + " \n", + " return label" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "35057b0c-107f-43db-ba38-07d9e0a848e7", + "metadata": {}, + "outputs": [], + "source": [ + "def create_label_item(label_path: str) -> Item:\n", + " \"\"\"Helper function that creates a STAC label item\n", + " from a geojson label path and adds it as the Asset\n", + " \"\"\"\n", + " label_meta = get_label_info(label_path)\n", + " \n", + " # rio-stac by default provides the filepath, so we override the item id\n", + " item_id = get_item_id(label_meta[\"href\"], label_meta[\"type\"], \"labels\").replace('_' + label_meta['type'],'')\n", + " item_geometry = get_geojson_extent(label_meta[\"href\"])\n", + " \n", + " return add_label_extension(\n", + " Item(\n", + " id=item_id,\n", + " datetime = get_item_datetime(label_meta['start_datetime']),\n", + " geometry = mapping(item_geometry),\n", + " bbox = item_geometry.bounds,\n", + " properties = {}\n", + " ), \n", + " label_meta,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "6fe27c8b-d0c6-4d8f-8d02-9a45e2738f26", + "metadata": {}, + "outputs": [], + "source": [ + "def add_label_source_link(source: Item, label: Item) -> Item:\n", + " \"\"\"Takes a 1:1 source to label item relationship, and adds the source link to label Item\"\"\"\n", + " \n", + " source_link = Link(\n", + " rel = 'source',\n", + " target = source,\n", + " media_type = MediaType.COG\n", + " )\n", + " label.add_link(source_link)" + ] + }, + { + "cell_type": "markdown", + "id": "dcf45223-af5b-4c27-b730-ac46a8b6382c", + "metadata": {}, + "source": [ + "Now we can examine the label Item output of our function `create_label_item` above after adding the source Item object reference to the Links in the label Item. This is a necessary step so that the label items can point to the appropriate source imagery Items and related Assets in our Catalog. " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "e8b2c289-1787-4235-8ca2-b684f118516e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{ 'assets': { 'buildings': { 'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", + " 'type': }},\n", + " 'bbox': ( 4.350277630686814,\n", + " 51.90744911466251,\n", + " 4.356899148164554,\n", + " 51.91047050708562),\n", + " 'geometry': { 'coordinates': ( ( (4.350277630686814, 51.90744911466251),\n", + " (4.350277630686814, 51.91047050708562),\n", + " (4.356899148164554, 51.91047050708562),\n", + " (4.356899148164554, 51.90744911466251),\n", + " (4.350277630686814, 51.90744911466251)),),\n", + " 'type': 'Polygon'},\n", + " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_labels',\n", + " 'links': [ { 'href': None,\n", + " 'rel': 'source',\n", + " 'type': }],\n", + " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", + " 'label:description': 'SpaceNet 6 Building Footprints',\n", + " 'label:properties': None,\n", + " 'label:type': 'vector'},\n", + " 'stac_extensions': [ 'https://stac-extensions.github.io/label/v1.0.0/schema.json'],\n", + " 'stac_version': '1.0.0',\n", + " 'type': 'Feature'}\n" + ] + } + ], + "source": [ + "label_item = create_label_item(label)\n", + "add_label_source_link(source_item, label_item)\n", + "pp.pprint(label_item.to_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "f09578e1-b674-463d-98fb-eb29129257db", + "metadata": {}, + "source": [ + "Similar to `EOExtention` there are other best practices that can be employed when creating a STAC Item. For example, since this is a label Item, we could add `label:overviews`, `label_classes` and `file:values` properties to store more information about the labels that improve indexing on the Catalog:\n", + "\n", + "* `label:overviews` contain the names of the unique classes in the label file and the [Count Objects](https://github.com/stac-extensions/label#count-object) with associated classes\n", + "* `label:classes` is a list of all [Class Objects](https://github.com/stac-extensions/label#count-object) representing possible classes across the labels found in a dataset\n", + "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivelant " + ] + }, + { + "cell_type": "markdown", + "id": "67d44b63-2109-48b1-8955-a4088e6dcd29", + "metadata": {}, + "source": [ + "### Define Catalog and Collection metadata properties" + ] + }, + { + "cell_type": "markdown", + "id": "5f67997e-52e0-4c25-a1a8-9385fb717aea", + "metadata": {}, + "source": [ + "Now that we have all the helper functions in place to create both our source and label Items, we need to create the actual Catalog and its children Collections. Collections. There will be two Collections in this Catalog, one for each source and labels. The reason for this is that per [STAC Collection Specification](https://github.com/radiantearth/stac-spec/tree/master/collection-spec), we should use Collections so as to make logically related groups of Items and store the metadata that they share. In this example, the first clear delineation between the Collections is one set is raster source images in `.tif` files, while the other set is vector building footprints in `.geojson` files. The second is that the rasters are the source data while the vectors are the label data.\n", + "\n", + "All of the metadata information defined below, except for the Catalog and Collection names, all came from the [SpaceNet 6 Challenge](https://spacenet.ai/sn6-challenge/) webpage." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "51d22430-5614-4a16-a9a0-34119aceb0a6", + "metadata": {}, + "outputs": [], + "source": [ + "# catalog specific properties\n", + "catalog_id = \"spacenet_6_rotterdam\"\n", + "catalog_title = \"SpaceNet Multi-Sensor All-Weather Mapping Challenge - Rotterdam\"\n", + "catalog_description = \"\"\"\n", + "In this challenge, the training dataset contained both SAR and EO imagery, however, \n", + "the testing and scoring datasets contained only SAR data. Consequently, the EO data \n", + "could be used for pre-processing the SAR data in some fashion, such as colorization, \n", + "domain adaptation, or image translation, but cannot be used to directly map buildings. \n", + "The dataset was structured to mimic real-world scenarios where historical EO data \n", + "may be available, but concurrent EO collection with SAR is often not possible due to \n", + "inconsistent orbits of the sensors, or cloud cover that will render the EO data unusable.\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "a203e004-dfed-416d-ba37-83606f9d4f03", + "metadata": {}, + "source": [ + "We can create a barebones Catalog with the above required properties" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "8c2fb77d-591d-4553-80f6-4acfcc663ba3", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog = Catalog(\n", + " id=catalog_id,\n", + " title=catalog_title,\n", + " description=catalog_description\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7835c567-5b97-4bc1-8ff2-a85b7e742f2c", + "metadata": {}, + "outputs": [], + "source": [ + "# collection specific properties\n", + "source_collection_id = \"spacenet_6_rotterdam_source\"\n", + "source_collection_title = \"SpaceNet 6 Rotterdam Source Imagery\"\n", + "\n", + "labels_collection_id = \"spacenet_6_rotterdam_labels\"\n", + "labels_collection_title = \"SpaceNet 6 Rotterdam Labels\"\n", + "\n", + "citation = \"Shermeyer, J., Hogan, D., Brown, J., Etten, A.V., Weir, N., Pacifici, F., Hänsch, R., Bastidas, A., Soenen, S., Bacastow, T.M., & Lewis, R. (2020). SpaceNet 6: Multi-Sensor All Weather Mapping Dataset. 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), 768-777.\"\n", + "license = \"CC-BY-SA-4.0\"" + ] + }, + { + "cell_type": "markdown", + "id": "1c2eaf79-ccb0-45a4-bef7-c81878272d26", + "metadata": {}, + "source": [ + "Here we will define another helper function that loads a default spatial and temporal extent to each Collection as they're being created, as this is a required attribute. That can be manually defined if known up front, or it can be implicitly learned from the spatial and temporal attributes of the Items in each Collection using the `Collection.update_extent_from_items` function, as seen below." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3ed27d56-3bf0-41cf-9368-aebd2c474258", + "metadata": {}, + "outputs": [], + "source": [ + "def get_default_extent():\n", + " \"\"\"Returns a default spatial and temporal Extent STAC object\"\"\"\n", + " # default spatial extent is the entire globe\n", + " default_spatial_extent = SpatialExtent([[-180, -90, 180, 90]])\n", + " \n", + " # default temporal extent is the current date\n", + " right_now = dt.datetime.now().strftime('%Y-%m-%d')\n", + " default_temporal_extent = TemporalExtent([[]])\n", + " \n", + " return Extent(default_spatial_extent, default_temporal_extent)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "e3b0f434-d132-4fd0-9fe1-6e67361b5eb0", + "metadata": {}, + "outputs": [], + "source": [ + "def create_collection(id, description, license, citation):\n", + " \"\"\"Creates a skeleton Collection with required properties\"\"\"\n", + " collection = Collection(\n", + " id=id,\n", + " license=license,\n", + " extent=get_default_extent(),\n", + " description=description\n", + " )\n", + " \n", + " sci_ext = ScientificExtension.ext(collection, add_if_missing=True)\n", + " sci_ext.apply(citation=citation)\n", + " \n", + " return collection" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "3b9d62ea-4070-48e3-9928-f37eb99c45a1", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_source_collection = create_collection(\n", + " source_collection_id, \n", + " source_collection_title,\n", + " license,\n", + " citation\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "ac1d0c86-1197-47a0-ad2d-e60bf158c122", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_labels_collection = create_collection(\n", + " labels_collection_id, \n", + " labels_collection_title,\n", + " license,\n", + " citation\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "19e3f7f4-8d53-4098-8e9e-ed0a954ac0a0", + "metadata": {}, + "source": [ + "### Iteratively add items to Source and Label Collections" + ] + }, + { + "cell_type": "markdown", + "id": "165d88e4-352e-4cc2-8305-5342e287703a", + "metadata": {}, + "source": [ + "There are many ways to do this next step, but given our dataset is so small, we can just use a non-parallelized iterative loop to create the related source and label items at the same time, and then add them to their respective Collections." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "02e683c0-f27c-4154-a438-ecf2598aa417", + "metadata": {}, + "outputs": [], + "source": [ + "label_paths = [f for f in os.listdir(aoi_dir / \"geojson_buildings\") if f.endswith('geojson')]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "02a68a71-f8b8-4279-b0b0-40db5d3115aa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", + "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n" + ] + } + ], + "source": [ + "for label_path in label_paths:\n", + " # get the geojson label filename\n", + " label_filename = label_path.split('/')[-1]\n", + " print(f'Creating source and label items from {label_filename}')\n", + " \n", + " # create the source and label items for a given label path\n", + " source_item = create_source_item(label_filename)\n", + " label_item = create_label_item(label_filename)\n", + " \n", + " # add the source link to label item\n", + " add_label_source_link(source_item, label_item)\n", + " \n", + " # add the source and label items to collections\n", + " sn6_source_collection.add_item(source_item)\n", + " sn6_labels_collection.add_item(label_item)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "9a0f7839-f6dc-449b-a591-6619e768b9d2", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_source_collection.update_extent_from_items()\n", + "sn6_labels_collection.update_extent_from_items()" + ] + }, + { + "cell_type": "markdown", + "id": "f6a02ea6-0952-4319-9092-6c094afd8a70", + "metadata": {}, + "source": [ + "### Add children Collections to Catalog" + ] + }, + { + "cell_type": "markdown", + "id": "9acb42f9-34ba-4f5d-8ea3-ce1f446c42af", + "metadata": {}, + "source": [ + "With all the Items added to the source and labels Collections, we can add the two Collections as children of the Catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "f0defa15-2428-4a6d-aef0-d4e9b808f1d3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n", + " * \n" + ] + } + ], + "source": [ + "sn6_catalog.add_children([sn6_source_collection, sn6_labels_collection])\n", + "sn6_catalog.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "2f78189b-ecf4-4be0-8c8a-df35d1eb5002", + "metadata": {}, + "source": [ + "### Normalize Links, validate Catalog and save to file" + ] + }, + { + "cell_type": "markdown", + "id": "3a0caf4e-54f7-46e1-a970-6b404b2c5977", + "metadata": {}, + "source": [ + "The last few steps we need to take in created the Catalog are normalizing all of the links between the related Items and Collections, validate that it's a valid STAC Catalog, and then save it to JSON file in our temporary `spacenet_6_rotterdam` directory." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "fd3c9feb-7a4f-4865-9344-5f785a5343b5", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.normalize_hrefs(data_dir.as_posix())" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "9275083f-3595-46ad-9bcd-fea994ed789a", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.validate_all()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e44ce22b-4771-4921-8d12-ebd6602f405f", + "metadata": {}, + "outputs": [], + "source": [ + "sn6_catalog.save(catalog_type=CatalogType.SELF_CONTAINED)" + ] + }, + { + "cell_type": "markdown", + "id": "89d5084b-40a7-4dbd-8d72-ddf65f489f34", + "metadata": {}, + "source": [ + "### Compress catalog with dataset source images and labels into single archive" + ] + }, + { + "cell_type": "markdown", + "id": "129e5891-33e4-4205-b704-a4cc58f9b45f", + "metadata": {}, + "source": [ + "The very last step in the Catalog creation process before submitting to Radiant MLHub is compressing the entire archive we just created, so that we have a self-contained catalog bundled with all the source imagery and label files together in a single place. This will speed up processing for the Radiant team downstream." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "23c0eff1-8bc5-4fe6-8230-982cd72cfb24", + "metadata": {}, + "outputs": [], + "source": [ + "def create_tar_gz(archive_name, target_dir):\n", + " with tarfile.open(archive_name, \"w:gz\") as tar:\n", + " tar.add(target_dir)\n", + " print(f\"Archive file {archive_name} created\")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "54a2bd88-eb2a-4fda-8d6e-d0bace01f7e9", + "metadata": {}, + "outputs": [], + "source": [ + "os.chdir('/home/jovyan/tutorials')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "2badc04c-59bb-4757-a084-ba6c5ba48d6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive file spacenet_6_rotterdam.tar.gz created\n" + ] + } + ], + "source": [ + "output_archive_filename = f'{data_dir.name}.tar.gz'\n", + "create_tar_gz(output_archive_filename, data_dir.as_posix())" + ] + }, + { + "cell_type": "markdown", + "id": "b02acefe-ee14-4dfb-a826-b2af28a46594", + "metadata": {}, + "source": [ + "### Submit to Radiant MLHub" + ] + }, + { + "cell_type": "markdown", + "id": "c887989c-3202-4325-9cd0-c84f46e58001", + "metadata": {}, + "source": [ + "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. To start the process, go to the [General Dataset Inquiry Form](https://radiantearth.typeform.com/to/j0duax) and submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask taht you share this archive file with us on a cloud storage solution, such as Azure, AWS or Google Cloud." + ] + }, + { + "cell_type": "markdown", + "id": "7f7e9b40-4ad5-45a4-b9bd-335ee5fd364b", + "metadata": {}, + "source": [ + "### Garbage Cleanup" + ] + }, + { + "cell_type": "markdown", + "id": "3422890d-7297-41fb-90a0-bdf4ba730e5d", + "metadata": {}, + "source": [ + "The following commands simply clean-up the instance enviroment of all the archive files and directories you created in this notebook. They are not necessary to run, however it should be noted that anything kept in the `tmp` directory will be flushed when the notebook server instance is shutdown. Therefore make sure to backup/download any files you wish to keep." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", + "metadata": {}, + "outputs": [], + "source": [ + "tar_path.unlink(missing_ok=True)\n", + "shutil.rmtree(data_dir, ignore_errors=True)\n", + "\n", + "if os.path.exists(tar_path):\n", + " os.remove(tar_path)\n", + "\n", + "if os.path.exists(data_dir):\n", + " os.remove(data_dir)" ] }, { "cell_type": "code", "execution_count": null, - "id": "ff611fad-f7a5-45c3-8108-3035e028777f", + "id": "eab53ca3-1876-434d-8bf6-56d59dd19c5c", "metadata": {}, "outputs": [], "source": [] From 6e6dcd372efd000b7181c969bcb73fa1ccd0b250 Mon Sep 17 00:00:00 2001 From: Hamed Alemohammad Date: Tue, 29 Mar 2022 17:05:56 -0400 Subject: [PATCH 2/2] revised descriptions, and file path for storing the catalog --- tutorials/radiant-mlhub-publish-dataset.ipynb | 439 +++--------------- 1 file changed, 53 insertions(+), 386 deletions(-) diff --git a/tutorials/radiant-mlhub-publish-dataset.ipynb b/tutorials/radiant-mlhub-publish-dataset.ipynb index abc32831..0570312f 100644 --- a/tutorials/radiant-mlhub-publish-dataset.ipynb +++ b/tutorials/radiant-mlhub-publish-dataset.ipynb @@ -15,7 +15,7 @@ "id": "4c593de1-8451-4bfd-b004-a0fc6de1aa27", "metadata": {}, "source": [ - "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for manual review and publication.\n", + "In this tutorial, we will walk through the process of creating a self-contained STAC Catalog, and its children Collections for the labels and source imagery in an example machine learning (ML) training dataset. We will then describe the process for getting the dataset read for submission to [Radiant MLHub](https://mlhub.earth/) for review and publication.\n", "\n", "For this example, we will use the sample training dataset from the [SpaceNet 6: Multi-Sensor All-Weather Mapping](https://spacenet.ai/sn6-challenge/)." ] @@ -38,47 +38,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ce566ab8-e215-4367-9fe6-4da05286d6b5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting rio-stac==0.3.2\n", - " Downloading rio-stac-0.3.2.tar.gz (8.1 kB)\n", - "Requirement already satisfied: rasterio in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.10)\n", - "Requirement already satisfied: pystac<2.0.0,>=1.0.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rio-stac==0.3.2) (1.2.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from python-dateutil>=2.7.0->pystac<2.0.0,>=1.0.0->rio-stac==0.3.2) (1.16.0)\n", - "Requirement already satisfied: setuptools in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (59.8.0)\n", - "Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.22.3)\n", - "Requirement already satisfied: cligj>=0.5 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (0.7.2)\n", - "Requirement already satisfied: affine in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2.3.0)\n", - "Requirement already satisfied: attrs in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (21.4.0)\n", - "Requirement already satisfied: click>=4.0 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (7.1.2)\n", - "Requirement already satisfied: certifi in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (2021.10.8)\n", - "Requirement already satisfied: snuggs>=1.4.1 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.4.7)\n", - "Requirement already satisfied: click-plugins in /srv/conda/envs/notebook/lib/python3.8/site-packages (from rasterio->rio-stac==0.3.2) (1.1.1)\n", - "Requirement already satisfied: pyparsing>=2.1.6 in /srv/conda/envs/notebook/lib/python3.8/site-packages (from snuggs>=1.4.1->rasterio->rio-stac==0.3.2) (3.0.7)\n", - "Building wheels for collected packages: rio-stac\n", - " Building wheel for rio-stac (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for rio-stac: filename=rio_stac-0.3.2-py3-none-any.whl size=8279 sha256=13e01dd9e6dcd02c086ddfc260de260387dea212829ab082de08519d764b8999\n", - " Stored in directory: /home/jovyan/.cache/pip/wheels/42/12/1a/677dda98b5bb48936e8636e4e71ddc6ed65ee7f3a849ca2c77\n", - "Successfully built rio-stac\n", - "Installing collected packages: rio-stac\n", - "Successfully installed rio-stac-0.3.2\n" - ] - } - ], + "outputs": [], "source": [ "!pip install rio-stac==0.3.2" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "52420a33-ae4e-4b16-b893-f42dd67909fb", "metadata": {}, "outputs": [], @@ -131,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "022cbe6f-1d8e-4a61-8615-0736926f4a27", "metadata": {}, "outputs": [], @@ -158,20 +128,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a6be708a-b187-48c9-8d5f-27a896ed10a0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 123M 100 123M 0 0 17.2M 0 0:00:07 0:00:07 --:--:-- 21.9M\n" - ] - } - ], + "outputs": [], "source": [ "if tar_path.exists():\n", " print(f\"File {tar_path} already exists, skipping download\")\n", @@ -189,19 +149,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "d76015d1-794c-412c-9352-6787be3a35f4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracted data to /tmp/SN6_buildings_AOI_11_Rotterdam_train_sample\n", - "Renamed folder to /tmp/spacenet_6_rotterdam\n" - ] - } - ], + "outputs": [], "source": [ "if untar_path.exists():\n", " print(f\"Data already extracted from archive; skipping extract.\")\n", @@ -229,87 +180,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "273826a9-e03a-4124-8b9f-8ceeb61fde51", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/spacenet_6_rotterdam\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGB\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGB_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PAN\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PAN_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SAR-Intensity\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/geojson_buildings\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", - "\tSN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/PS-RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190823145306_20190823145612_tile_7218.tif\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/SummaryData\n", - "\tSN6_TrainSample_AOI_11_Rotterdam_Buildings.csv\n", - "/tmp/spacenet_6_rotterdam/AOI_11_Rotterdam/RGBNIR\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_69.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190804133131_20190804133356_tile_783.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822075219_20190822075510_tile_8137.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822082538_20190822082826_tile_4164.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190822091156_20190822091502_tile_108.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823082625_20190823082938_tile_442.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823091132_20190823091448_tile_7924.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823123151_20190823123459_tile_2317.tif\n", - "\tSN6_Train_AOI_11_Rotterdam_RGBNIR_20190823145306_20190823145612_tile_7218.tif\n" - ] - } - ], + "outputs": [], "source": [ "for root, _, files in os.walk(data_dir):\n", " print(root)\n", @@ -331,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "a5e16d25-ddf3-45ae-bd49-91888ab5a89c", "metadata": {}, "outputs": [], @@ -419,40 +293,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "dcb37528-fc62-4eeb-904d-704ec85b9695", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'RGBNIR',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PS-RGBNIR',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'SAR-Intensity',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PAN',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'},\n", - " {'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", - " 'type': 'PS-RGB',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'}]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "label = \"SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\"\n", "source_info = get_source_info(label)\n", @@ -461,24 +305,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "07aa9f59-7a7c-41d4-a5ce-49c7e2342d50", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", - " 'type': 'Buildings',\n", - " 'start_datetime': '20190804120223',\n", - " 'end_datetime': '20190804120456'}" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "label_info = get_label_info(label)\n", "label_info" @@ -502,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "eff8b798-25ca-4a9f-abf8-c8e59f962f2f", "metadata": {}, "outputs": [], @@ -514,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "6782d5c1-100b-4f56-960a-844fcbdb0d09", "metadata": {}, "outputs": [], @@ -573,82 +403,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "e769016b-cf1c-4dca-ab42-2e3588f8e668", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ 'assets': { 'PAN': { 'href': 'AOI_11_Rotterdam/PAN/SN6_Train_AOI_11_Rotterdam_PAN_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PAN',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'PS-RGB': { 'href': 'AOI_11_Rotterdam/PS-RGB/SN6_Train_AOI_11_Rotterdam_PS-RGB_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PS-RGB',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'PS-RGBNIR': { 'href': 'AOI_11_Rotterdam/PS-RGBNIR/SN6_Train_AOI_11_Rotterdam_PS-RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'PS-RGBNIR',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'},\n", - " 'RGBNIR': { 'href': 'AOI_11_Rotterdam/RGBNIR/SN6_Train_AOI_11_Rotterdam_RGBNIR_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'RGBNIR'},\n", - " 'SAR-Intensity': { 'href': 'AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190804120223_20190804120456_tile_55.tif',\n", - " 'roles': ['data'],\n", - " 'title': 'SAR-Intensity',\n", - " 'type': 'image/tiff; application=geotiff; '\n", - " 'profile=cloud-optimized'}},\n", - " 'bbox': [ 4.350277623237341,\n", - " 51.90744725678807,\n", - " 4.356939496573024,\n", - " 51.9115675425874],\n", - " 'geometry': { 'coordinates': [ [ [4.350277623237341, 51.9115675425874],\n", - " [4.350277623237341, 51.90744725678807],\n", - " [4.356939496573024, 51.90744725678807],\n", - " [4.356939496573024, 51.9115675425874],\n", - " [4.350277623237341, 51.9115675425874]]],\n", - " 'type': 'Polygon'},\n", - " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_source',\n", - " 'links': [],\n", - " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", - " 'proj:bbox': [ 592886.1399464327,\n", - " 5751614.151231687,\n", - " 593336.1616884505,\n", - " 5752064.1729737045],\n", - " 'proj:epsg': 32631,\n", - " 'proj:geometry': { 'coordinates': [ [ [ 592886.1399464327,\n", - " 5752064.1729737045],\n", - " [ 592886.1399464327,\n", - " 5751614.151231687],\n", - " [ 593336.1616884505,\n", - " 5751614.151231687],\n", - " [ 593336.1616884505,\n", - " 5752064.1729737045],\n", - " [ 592886.1399464327,\n", - " 5752064.1729737045]]],\n", - " 'type': 'Polygon'},\n", - " 'proj:shape': [450, 450],\n", - " 'proj:transform': [ 1.0000483155950517,\n", - " 0.0,\n", - " 592886.1399464327,\n", - " 0.0,\n", - " -1.0000483155950517,\n", - " 5752064.1729737045,\n", - " 0.0,\n", - " 0.0,\n", - " 1.0]},\n", - " 'stac_extensions': [ 'https://stac-extensions.github.io/projection/v1.0.0/schema.json'],\n", - " 'stac_version': '1.0.0',\n", - " 'type': 'Feature'}\n" - ] - } - ], + "outputs": [], "source": [ "source_item = create_source_item(label)\n", "pp.pprint(source_item.to_dict())" @@ -672,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "29098a46-f86a-4439-9538-c351b011ab95", "metadata": {}, "outputs": [], @@ -684,7 +442,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "d28005c3-feaf-4932-a376-bf3c85cae173", "metadata": {}, "outputs": [], @@ -709,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "fc37d0b5-5e2a-47b5-9837-78aa5f30a9c0", "metadata": {}, "outputs": [], @@ -744,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "35057b0c-107f-43db-ba38-07d9e0a848e7", "metadata": {}, "outputs": [], @@ -773,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "6fe27c8b-d0c6-4d8f-8d02-9a45e2738f26", "metadata": {}, "outputs": [], @@ -799,40 +557,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "e8b2c289-1787-4235-8ca2-b684f118516e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ 'assets': { 'buildings': { 'href': 'AOI_11_Rotterdam/geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson',\n", - " 'type': }},\n", - " 'bbox': ( 4.350277630686814,\n", - " 51.90744911466251,\n", - " 4.356899148164554,\n", - " 51.91047050708562),\n", - " 'geometry': { 'coordinates': ( ( (4.350277630686814, 51.90744911466251),\n", - " (4.350277630686814, 51.91047050708562),\n", - " (4.356899148164554, 51.91047050708562),\n", - " (4.356899148164554, 51.90744911466251),\n", - " (4.350277630686814, 51.90744911466251)),),\n", - " 'type': 'Polygon'},\n", - " 'id': 'SN6_Train_AOI_11_Rotterdam_20190804120223_20190804120456_tile_55_labels',\n", - " 'links': [ { 'href': None,\n", - " 'rel': 'source',\n", - " 'type': }],\n", - " 'properties': { 'datetime': '2019-08-04T12:02:23Z',\n", - " 'label:description': 'SpaceNet 6 Building Footprints',\n", - " 'label:properties': None,\n", - " 'label:type': 'vector'},\n", - " 'stac_extensions': [ 'https://stac-extensions.github.io/label/v1.0.0/schema.json'],\n", - " 'stac_version': '1.0.0',\n", - " 'type': 'Feature'}\n" - ] - } - ], + "outputs": [], "source": [ "label_item = create_label_item(label)\n", "add_label_source_link(source_item, label_item)\n", @@ -848,7 +576,7 @@ "\n", "* `label:overviews` contain the names of the unique classes in the label file and the [Count Objects](https://github.com/stac-extensions/label#count-object) with associated classes\n", "* `label:classes` is a list of all [Class Objects](https://github.com/stac-extensions/label#count-object) representing possible classes across the labels found in a dataset\n", - "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivelant " + "* `file:values` can be used to store the [Mapping Object](https://github.com/stac-extensions/file#mapping-object) between numeric classification values and the descriptive string text equivalent " ] }, { @@ -871,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "51d22430-5614-4a16-a9a0-34119aceb0a6", "metadata": {}, "outputs": [], @@ -900,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "8c2fb77d-591d-4553-80f6-4acfcc663ba3", "metadata": {}, "outputs": [], @@ -914,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "7835c567-5b97-4bc1-8ff2-a85b7e742f2c", "metadata": {}, "outputs": [], @@ -940,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "3ed27d56-3bf0-41cf-9368-aebd2c474258", "metadata": {}, "outputs": [], @@ -959,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "e3b0f434-d132-4fd0-9fe1-6e67361b5eb0", "metadata": {}, "outputs": [], @@ -981,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "3b9d62ea-4070-48e3-9928-f37eb99c45a1", "metadata": {}, "outputs": [], @@ -996,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "ac1d0c86-1197-47a0-ad2d-e60bf158c122", "metadata": {}, "outputs": [], @@ -1027,7 +755,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "02e683c0-f27c-4154-a438-ecf2598aa417", "metadata": {}, "outputs": [], @@ -1037,27 +765,10 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "02a68a71-f8b8-4279-b0b0-40db5d3115aa", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823082625_20190823082938_tile_442.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_55.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822082538_20190822082826_tile_4164.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823091132_20190823091448_tile_7924.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822091156_20190822091502_tile_108.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823123151_20190823123459_tile_2317.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190822075219_20190822075510_tile_8137.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804133131_20190804133356_tile_783.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190823145306_20190823145612_tile_7218.geojson\n", - "Creating source and label items from SN6_Train_AOI_11_Rotterdam_Buildings_20190804120223_20190804120456_tile_69.geojson\n" - ] - } - ], + "outputs": [], "source": [ "for label_path in label_paths:\n", " # get the geojson label filename\n", @@ -1078,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "9a0f7839-f6dc-449b-a591-6619e768b9d2", "metadata": {}, "outputs": [], @@ -1105,40 +816,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "f0defa15-2428-4a6d-aef0-d4e9b808f1d3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "* \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n", - " * \n" - ] - } - ], + "outputs": [], "source": [ "sn6_catalog.add_children([sn6_source_collection, sn6_labels_collection])\n", "sn6_catalog.describe()" @@ -1162,7 +843,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "fd3c9feb-7a4f-4865-9344-5f785a5343b5", "metadata": {}, "outputs": [], @@ -1172,7 +853,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "id": "9275083f-3595-46ad-9bcd-fea994ed789a", "metadata": {}, "outputs": [], @@ -1182,7 +863,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "e44ce22b-4771-4921-8d12-ebd6602f405f", "metadata": {}, "outputs": [], @@ -1208,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "id": "23c0eff1-8bc5-4fe6-8230-982cd72cfb24", "metadata": {}, "outputs": [], @@ -1221,28 +902,20 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "54a2bd88-eb2a-4fda-8d6e-d0bace01f7e9", "metadata": {}, "outputs": [], "source": [ - "os.chdir('/home/jovyan/tutorials')" + "os.chdir('/home/jovyan/PlanetaryComputerExamples/tutorials')" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "id": "2badc04c-59bb-4757-a084-ba6c5ba48d6b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Archive file spacenet_6_rotterdam.tar.gz created\n" - ] - } - ], + "outputs": [], "source": [ "output_archive_filename = f'{data_dir.name}.tar.gz'\n", "create_tar_gz(output_archive_filename, data_dir.as_posix())" @@ -1261,7 +934,9 @@ "id": "c887989c-3202-4325-9cd0-c84f46e58001", "metadata": {}, "source": [ - "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. To start the process, go to the [General Dataset Inquiry Form](https://radiantearth.typeform.com/to/j0duax) and submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask taht you share this archive file with us on a cloud storage solution, such as Azure, AWS or Google Cloud." + "Now that the archive of your dataset and the Catalog has been created, you should see the tar file in your browser view to the left titled `spacenet_6_rotterdam.tar.gz`. You would need to generate a similar archive for your own dataset if you want to publish it on [Radiant MLHub](www.mlhub.earth). This is the file you will share with the Radiant Earth engineering team to streamline the process of publishing your dataset to Radiant MLHub. \n", + "\n", + "To start the process, go to the [Contribute](https://mlhub.earth/contribute) page on Radiant MLHub website, and click on General Dataset Inquiry Form (you need to create an account on Radiant MLHub to access this page). Submit the form with as complete details as possible. This will automatically notify the Radiant team of your request. When we're ready to process and ingest your dataset, we will ask that you share this archive file with us on a cloud storage solution, such as Azure, AWS, Google Cloud/Drive or Dropbox." ] }, { @@ -1282,7 +957,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "id": "85307e44-2bd6-4463-903c-9fea2df3ed48", "metadata": {}, "outputs": [], @@ -1296,14 +971,6 @@ "if os.path.exists(data_dir):\n", " os.remove(data_dir)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eab53ca3-1876-434d-8bf6-56d59dd19c5c", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {