diff --git a/CHANGELOG.md b/CHANGELOG.md index acf61ac6b..5eec00ec5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- `metadata_from_stac()` now keeps declared STAC `cube:dimensions` as the dimension source of truth and handles STAC 1.1 common `bands` metadata without requiring the datacube extension ([#743](https://github.com/Open-EO/openeo-python-client/issues/743), [#867](https://github.com/Open-EO/openeo-python-client/pull/867)). + ## [0.49.0] - 2026-04-01 diff --git a/openeo/metadata.py b/openeo/metadata.py index 49edef428..25b9dac24 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException): # TODO: make these dimension classes immutable data classes -# TODO: align better with STAC datacube extension -# TODO: align/adapt/integrate with pystac's datacube extension implementation? class Dimension: """Base class for dimensions.""" @@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension: class SpatialDimension(Dimension): # TODO: align better with STAC datacube extension: e.g. support "axis" (x or y) - DEFAULT_CRS = 4326 def __init__( @@ -679,30 +676,16 @@ def metadata_from_stac(url: str) -> CubeMetadata: """ Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + Policy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty. + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. """ stac_object = pystac.read_file(href=url) - bands = _StacMetadataParser().bands_from_stac_object(stac_object) - - # At least assume there are spatial dimensions - # TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions? - dimensions = [ - SpatialDimension(name="x", extent=[None, None]), - SpatialDimension(name="y", extent=[None, None]), - ] - - # TODO #743: conditionally include band dimension when there was actual indication of band metadata? - band_dimension = BandDimension(name="bands", bands=bands) - dimensions.append(band_dimension) - - # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? - temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) - if temporal_dimension: - dimensions.append(temporal_dimension) - - metadata = CubeMetadata(dimensions=dimensions) - return metadata + parser = _StacMetadataParser() + return parser.metadata_from_stac_object(stac_object) # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) # TODO: remove this once support for Python 3.7 and 3.8 is dropped @@ -760,39 +743,159 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa # Use caching trick to avoid duplicate warnings self._warn = functools.lru_cache(maxsize=1000)(self._warn) + def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata: + """ + Build cube metadata from a STAC object. + """ + dimensions = self.dimensions_from_stac_object(stac_object=stac_object) + return CubeMetadata(dimensions=dimensions) + + def dimensions_from_stac_object(self, stac_object: pystac.STACObject) -> List[Dimension]: + """ + Build dimension metadata from a STAC object. + + Philosophy: + - If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands). + - Otherwise: apply openEO-style defaults (x, y, bands, optional t). + """ + bands = self.bands_from_stac_object(stac_object) + if self._has_cube_dimensions(stac_object): + return self._parse_declared_dimensions(stac_object=stac_object, bands=bands) + + dimensions: List[Dimension] = [ + SpatialDimension(name="x", extent=[None, None]), + SpatialDimension(name="y", extent=[None, None]), + BandDimension(name="bands", bands=list(bands)), + ] + temporal_dimension = self.get_temporal_dimension(stac_object) + if temporal_dimension: + dimensions.append(temporal_dimension) + return dimensions + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: """ Extract the temporal dimension from a STAC Collection/Item (if any) """ - # TODO: also extract temporal dimension from assets? - if _PYSTAC_1_9_EXTENSION_INTERFACE: - if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): - temporal_dims = [ - (n, d.extent or [None, None]) - for (n, d) in stac_obj.ext.cube.dimensions.items() - if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + if self._has_cube_dimensions(stac_obj): + temporal_dimensions = [ + d + for d in self._parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([])) + if isinstance(d, TemporalDimension) + ] + if len(temporal_dimensions) == 1: + return temporal_dimensions[0] + + if isinstance(stac_obj, pystac.Collection) and stac_obj.extent and stac_obj.extent.temporal: + extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] + return TemporalDimension(name="t", extent=extent) + + if isinstance(stac_obj, pystac.Item): + props = stac_obj.properties + start = props.get("start_datetime") + end = props.get("end_datetime") + if start or end: + extent = [ + Rfc3339(propagate_none=True).normalize(start), + Rfc3339(propagate_none=True).normalize(end), ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) - elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal: - # No explicit "cube:dimensions": build fallback from "extent.temporal", - # with dimension name "t" (openEO API recommendation). - extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]] return TemporalDimension(name="t", extent=extent) - else: - if isinstance(stac_obj, pystac.Item): - cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) - elif isinstance(stac_obj, pystac.Collection): - cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + + dt = props.get("datetime") + if dt: + norm = Rfc3339(propagate_none=True).normalize(dt) + return TemporalDimension(name="t", extent=[norm, norm]) + + def _has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool: + cube_dimensions = self._cube_dimensions_dict(stac_object) + return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0 + + def _cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]: + """ + Return raw cube:dimensions dict from a Collection/Item, or {}. + """ + if isinstance(stac_object, pystac.Item): + return stac_object.properties.get("cube:dimensions", {}) or {} + if isinstance(stac_object, pystac.Collection): + return stac_object.extra_fields.get("cube:dimensions", {}) or {} + return {} + + @staticmethod + def _safe_extent_from_pystac_cube_dim(dim) -> list: + """ + PySTAC cube dimension wrapper may raise if 'extent' is missing. + Also, depending on serialization/version, extent might live in extra_fields. + """ + try: + ext = dim.extent + except Exception: + ext = None + + if not ext: + extra = getattr(dim, "extra_fields", {}) or {} + ext = extra.get("extent") + + return ext or [None, None] + + def _parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions declared through cube:dimensions. + """ + if ( + _PYSTAC_1_9_EXTENSION_INTERFACE + and getattr(stac_object, "ext", None) is not None + and stac_object.ext.has("cube") + and hasattr(stac_object.ext, "cube") + ): + return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands) + return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands) + + def _parse_cube_dimensions_from_pystac_extension( + self, stac_object: pystac.STACObject, bands: _BandList + ) -> List[Dimension]: + """ + Parse dimensions from PySTAC's cube extension wrapper (when present). + Important: PySTAC DimensionType only has SPATIAL + TEMPORAL. + Everything else is treated as band-like. + """ + dimensions = [] + for name, dim in stac_object.ext.cube.dimensions.items(): + dim_type = getattr(dim, "dim_type", None) + extent = self._safe_extent_from_pystac_cube_dim(dim) + + if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL: + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL: + dimensions.append(TemporalDimension(name=name, extent=extent)) else: - cube_dimensions = {} - temporal_dims = [ - (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" - ] - if len(temporal_dims) == 1: - name, extent = temporal_dims[0] - return TemporalDimension(name=name, extent=extent) + dimensions.append(BandDimension(name=name, bands=list(bands))) + + return dimensions + + def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]: + """ + Parse dimensions from raw cube:dimensions dict. + Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias). + """ + dimensions = [] + cube_dimensions = self._cube_dimensions_dict(stac_object) + + for name, dim in cube_dimensions.items(): + if not isinstance(dim, dict): + continue + + dim_type = dim.get("type") + extent = dim.get("extent", [None, None]) + + if dim_type == "spatial": + dimensions.append(SpatialDimension(name=name, extent=extent)) + elif dim_type == "temporal": + dimensions.append(TemporalDimension(name=name, extent=extent)) + elif dim_type in ("bands", "spectral"): + dimensions.append(BandDimension(name=name, bands=list(bands))) + else: + dimensions.append(Dimension(name=name, type=dim_type)) + + return dimensions def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band: """Construct band from metadata in eo v1.1 style""" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index ef8fbf65e..115c7e3ec 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1159,6 +1159,27 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected): assert metadata.band_names == expected +def test_metadata_from_stac_stac_1_1_common_bands_without_datacube_extension(tmp_path): + stac_dict = StacDummyBuilder.collection( + stac_version="1.1.0", + bands=[ + {"name": "red", "eo:common_name": "red", "eo:center_wavelength": 0.665}, + {"name": "nir", "eo:common_name": "nir", "eo:center_wavelength": 0.842}, + ], + ) + assert "stac_extensions" not in stac_dict + assert "cube:dimensions" not in stac_dict + + path = tmp_path / "stac.json" + # TODO #738 real request mocking of STAC resources compatible with pystac? + path.write_text(json.dumps(stac_dict)) + metadata = metadata_from_stac(str(path)) + + assert metadata.dimension_names() == ["x", "y", "bands", "t"] + assert metadata.band_names == ["red", "nir"] + assert metadata.band_dimension.bands[0].common_name == "red" + assert metadata.band_dimension.bands[1].wavelength_um == 0.842 + @pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface") @pytest.mark.parametrize( @@ -1210,7 +1231,17 @@ def test_metadata_from_stac_collection_bands_from_item_assets( [ ( StacDummyBuilder.item(), - None, + ("t", ["2024-03-08", "2024-03-08"]), + ), + ( + StacDummyBuilder.item( + properties={ + "datetime": "2024-03-08T00:00:00Z", + "start_datetime": "2024-04-04T00:00:00Z", + "end_datetime": "2024-06-06T00:00:00Z", + } + ), + ("t", ["2024-04-04T00:00:00Z", "2024-06-06T00:00:00Z"]), ), ( StacDummyBuilder.item(cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}), @@ -1256,6 +1287,69 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): assert not metadata.has_temporal_dimension() + +# Dimension name resolution policy (STAC cube:dimensions vs openEO defaults) +@pytest.mark.parametrize( + ["stac_dict", "expected_dims"], + [ + ( + # No cube:dimensions -> fall back to openEO default naming convention + StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}), + {"t", "bands", "y", "x"}, + ), + ( + # No cube:dimensions (item) -> fall back to openEO default naming convention + StacDummyBuilder.item( + properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]} + ), + {"t", "bands", "y", "x"}, + ), + ( + # cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys + StacDummyBuilder.collection( + cube_dimensions={ + "time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]}, + "band": {"type": "bands", "axis": "bands", "values": ["B01"]}, + "y": {"type": "spatial", "axis": "y", "extent": [0, 1]}, + "x": {"type": "spatial", "axis": "x", "extent": [0, 1]}, + } + ), + {"time", "band", "y", "x"}, + ), + ( + # cube:dimensions present without band dimension -> don't inject an openEO "bands" dimension + StacDummyBuilder.collection( + summaries={"eo:bands": [{"name": "B01"}]}, + cube_dimensions={ + "time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]}, + "y": {"type": "spatial", "axis": "y", "extent": [0, 1]}, + "x": {"type": "spatial", "axis": "x", "extent": [0, 1]}, + }, + ), + {"time", "y", "x"}, + ), + ], +) +def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims): + path = tmp_path / "stac.json" + # TODO #738 real request mocking of STAC resources compatible with pystac? + path.write_text(json.dumps(stac_dict)) + metadata = metadata_from_stac(str(path)) + + got = tuple(metadata.dimension_names() or ()) + + # Order-insensitive check: names only + assert set(got) == expected_dims + + # Ensure the policy logic is exercised correctly: + # cube:dimensions can be located at root (collection) or in properties (item) + cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions") + if cube_dims is None: + assert set(got) == {"t", "bands", "y", "x"} + else: + assert set(got) == set(cube_dims.keys()) + + @pytest.mark.parametrize( ["kwargs", "expected_x", "expected_y"], [