Skip to content

stac

This module contains functions that are related to STAC API.

AssetSubItem #

AssetSubItem(asset: Item, item_id: str, band: str, filename: str | Path)

Class that represent a STAC asset sub item.

Generally represents a single satellite image band.

Initializes an AssetSubItem.

Parameters:

Name Type Description Default
asset Item

The pystac Item this asset belongs to.

required
item_id str

The ID of the item.

required
band str

The band name of this sub-item.

required
filename str | Path

The local filename of the downloaded asset.

required
Source code in src/geospatial_tools/stac.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def __init__(self, asset: pystac.Item, item_id: str, band: str, filename: str | Path) -> None:
    """
    Initializes an AssetSubItem.

    Args:
        asset: The pystac Item this asset belongs to.
        item_id: The ID of the item.
        band: The band name of this sub-item.
        filename: The local filename of the downloaded asset.
    """
    if isinstance(filename, str):
        filename = Path(filename)
    self.asset = asset
    self.item_id: str = item_id
    self.band: str = band
    self.filename: Path = filename

Asset #

Asset(
    asset_id: str,
    bands: list[str] | None = None,
    asset_item_list: list[AssetSubItem] | None = None,
    merged_asset_path: str | Path | None = None,
    reprojected_asset: str | Path | None = None,
    logger: Logger = LOGGER,
)

Represents a STAC asset, potentially composed of multiple bands/sub-items.

Initializes an Asset object.

Parameters:

Name Type Description Default
asset_id str

Unique ID for the asset (usually the item ID).

required
bands list[str] | None

List of bands this asset contains.

None
asset_item_list list[AssetSubItem] | None

List of AssetSubItem objects belonging to this asset.

None
merged_asset_path str | Path | None

Path to the merged multi-band raster file.

None
reprojected_asset str | Path | None

Path to the reprojected raster file.

None
logger Logger

Logger instance.

LOGGER
Source code in src/geospatial_tools/stac.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def __init__(
    self,
    asset_id: str,
    bands: list[str] | None = None,
    asset_item_list: list[AssetSubItem] | None = None,
    merged_asset_path: str | Path | None = None,
    reprojected_asset: str | Path | None = None,
    logger: logging.Logger = LOGGER,
) -> None:
    """
    Initializes an Asset object.

    Args:
        asset_id: Unique ID for the asset (usually the item ID).
        bands: List of bands this asset contains.
        asset_item_list: List of AssetSubItem objects belonging to this asset.
        merged_asset_path: Path to the merged multi-band raster file.
        reprojected_asset: Path to the reprojected raster file.
        logger: Logger instance.
    """
    self.asset_id = asset_id
    self.bands = bands
    self.merged_asset_path = Path(merged_asset_path) if isinstance(merged_asset_path, str) else merged_asset_path
    self.reprojected_asset_path = (
        Path(reprojected_asset) if isinstance(reprojected_asset, str) else reprojected_asset
    )
    self.logger = logger

    self._sub_items: list[AssetSubItem] = asset_item_list or []

__iter__ #

__iter__() -> Iterator[AssetSubItem]

Allows direct iteration: for item in asset:

Source code in src/geospatial_tools/stac.py
193
194
195
def __iter__(self) -> Iterator[AssetSubItem]:
    """Allows direct iteration: `for item in asset:`"""
    return iter(self._sub_items)

__len__ #

__len__() -> int

Allows checking size: len(asset)

Source code in src/geospatial_tools/stac.py
197
198
199
def __len__(self) -> int:
    """Allows checking size: `len(asset)`"""
    return len(self._sub_items)

__contains__ #

__contains__(band_name: str) -> bool

Allows checking for band existence: "B04" in asset

Source code in src/geospatial_tools/stac.py
201
202
203
def __contains__(self, band_name: str) -> bool:
    """Allows checking for band existence: `"B04" in asset`"""
    return any(item.band == band_name for item in self._sub_items)

__getitem__ #

__getitem__(index: int) -> AssetSubItem
__getitem__(band_name: str) -> AssetSubItem
__getitem__(key: int | str) -> AssetSubItem

Allows indexing by position or band name: asset[0] or asset["B04"]

Source code in src/geospatial_tools/stac.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def __getitem__(self, key: int | str) -> AssetSubItem:
    """
    Allows indexing by position or band name:
    `asset[0]` or `asset["B04"]`
    """
    if isinstance(key, int):
        return self._sub_items[key]

    if isinstance(key, str):
        for item in self._sub_items:
            if item.band == key:
                return item
        raise KeyError(f"Band '{key}' not found in asset '{self.asset_id}'.")

    raise TypeError(f"Invalid argument type: {type(key)}. Expected int or str.")

add_asset_item #

add_asset_item(asset: AssetSubItem) -> None

Adds an AssetSubItem to the asset.

Parameters:

Name Type Description Default
asset AssetSubItem

The AssetSubItem to add.

required
Source code in src/geospatial_tools/stac.py
227
228
229
230
231
232
233
234
235
236
def add_asset_item(self, asset: AssetSubItem) -> None:
    """
    Adds an AssetSubItem to the asset.

    Args:
      asset: The AssetSubItem to add.
    """
    self._sub_items.append(asset)
    if self.bands is not None and asset.band not in self.bands:
        self.bands.append(asset.band)

show_asset_items #

show_asset_items() -> None

Show items that belong to this asset.

Source code in src/geospatial_tools/stac.py
238
239
240
241
242
243
def show_asset_items(self) -> None:
    """Show items that belong to this asset."""
    asset_list = [
        f"ID: [{item.item_id}], Band: [{item.band}], filename: [{item.filename}]" for item in self._sub_items
    ]
    self.logger.info(f"Asset list for asset [{self.asset_id}] :\n\t{asset_list}")

merge_asset #

merge_asset(
    base_directory: str | Path | None = None, delete_sub_items: bool = False
) -> Path | None

Merges individual band rasters into a single multi-band raster file.

Parameters:

Name Type Description Default
base_directory str | Path | None

Directory where the merged file will be saved.

None
delete_sub_items bool

If True, delete individual band files after merging.

False

Returns:

Type Description
Path | None

The Path to the merged file if successful, else None.

Source code in src/geospatial_tools/stac.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def merge_asset(self, base_directory: str | Path | None = None, delete_sub_items: bool = False) -> Path | None:
    """
    Merges individual band rasters into a single multi-band raster file.

    Args:
      base_directory: Directory where the merged file will be saved.
      delete_sub_items: If True, delete individual band files after merging.

    Returns:
        The Path to the merged file if successful, else None.
    """
    if not base_directory:
        base_directory = Path()
    if isinstance(base_directory, str):
        base_directory = Path(base_directory)

    merged_filename = base_directory / f"{self.asset_id}_merged.tif"

    if not self._sub_items:
        self.logger.error(f"No asset items to merge for asset [{self.asset_id}]")
        return None

    asset_filename_list = [asset.filename for asset in self._sub_items]

    meta = self._create_merged_asset_metadata()

    merge_raster_bands(
        merged_filename=merged_filename,
        raster_file_list=asset_filename_list,
        merged_metadata=meta,
        merged_band_names=self.bands,
    )

    if merged_filename.exists():
        self.logger.info(f"Asset [{self.asset_id}] merged successfully")
        self.logger.info(f"Asset location : [{merged_filename}]")
        self.merged_asset_path = merged_filename
        if delete_sub_items:
            self.delete_asset_sub_items()
        return merged_filename
    self.logger.error(f"There was a problem merging asset [{self.asset_id}]")
    return None

reproject_merged_asset #

reproject_merged_asset(
    target_projection: str | int,
    base_directory: str | Path | None = None,
    delete_merged_asset: bool = False,
) -> Path | None

Reprojects the merged multi-band raster to a target projection.

Parameters:

Name Type Description Default
target_projection str | int

The target CRS (EPSG code or string).

required
base_directory str | Path | None

Directory where the reprojected file will be saved.

None
delete_merged_asset bool

If True, delete the merged file after reprojection.

False

Returns:

Type Description
Path | None

The Path to the reprojected file if successful, else None.

Source code in src/geospatial_tools/stac.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def reproject_merged_asset(
    self,
    target_projection: str | int,
    base_directory: str | Path | None = None,
    delete_merged_asset: bool = False,
) -> Path | None:
    """
    Reprojects the merged multi-band raster to a target projection.

    Args:
      target_projection: The target CRS (EPSG code or string).
      base_directory: Directory where the reprojected file will be saved.
      delete_merged_asset: If True, delete the merged file after reprojection.

    Returns:
        The Path to the reprojected file if successful, else None.
    """
    if not base_directory:
        base_directory = Path()
    if isinstance(base_directory, str):
        base_directory = Path(base_directory)
    target_path = base_directory / f"{self.asset_id}_reprojected.tif"
    self.logger.info(f"Reprojecting asset [{self.asset_id}] ...")

    if not self.merged_asset_path:
        self.logger.error(f"Merged asset path is missing for asset [{self.asset_id}]")
        return None

    reprojected_filename = reproject_raster(
        dataset_path=self.merged_asset_path,
        target_path=target_path,
        target_crs=target_projection,
        logger=self.logger,
    )
    if reprojected_filename and reprojected_filename.exists():
        self.logger.info(f"Asset location : [{reprojected_filename}]")
        self.reprojected_asset_path = reprojected_filename
        if delete_merged_asset:
            self.delete_merged_asset()
        return reprojected_filename
    self.logger.error(f"There was a problem reprojecting asset [{self.asset_id}]")
    return None

delete_asset_sub_items #

delete_asset_sub_items() -> None

Delete all asset sub items that belong to this asset.

Source code in src/geospatial_tools/stac.py
331
332
333
334
335
336
def delete_asset_sub_items(self) -> None:
    """Delete all asset sub items that belong to this asset."""
    self.logger.info(f"Deleting asset sub items from asset [{self.asset_id}]")
    for item in self._sub_items:
        self.logger.info(f"Deleting [{item.filename}] ...")
        item.filename.unlink(missing_ok=True)

delete_merged_asset #

delete_merged_asset() -> None

Delete merged asset.

Source code in src/geospatial_tools/stac.py
338
339
340
341
342
def delete_merged_asset(self) -> None:
    """Delete merged asset."""
    if self.merged_asset_path:
        self.logger.info(f"Deleting merged asset file for [{self.merged_asset_path}]")
        self.merged_asset_path.unlink(missing_ok=True)

delete_reprojected_asset #

delete_reprojected_asset() -> None

Delete reprojected asset.

Source code in src/geospatial_tools/stac.py
344
345
346
347
348
def delete_reprojected_asset(self) -> None:
    """Delete reprojected asset."""
    if self.reprojected_asset_path:
        self.logger.info(f"Deleting reprojected asset file for [{self.reprojected_asset_path}]")
        self.reprojected_asset_path.unlink(missing_ok=True)

StacSearch #

StacSearch(catalog_name: str, logger: Logger = LOGGER)

Utility class to help facilitate and automate STAC API searches through the use of pystac_client.Client.

Initializes a StacSearch instance.

Parameters:

Name Type Description Default
catalog_name str

Name of the STAC catalog (e.g., 'planetary_computer', 'copernicus').

required
logger Logger

Logger instance.

LOGGER
Source code in src/geospatial_tools/stac.py
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def __init__(self, catalog_name: str, logger: logging.Logger = LOGGER) -> None:
    """
    Initializes a StacSearch instance.

    Args:
        catalog_name: Name of the STAC catalog (e.g., 'planetary_computer', 'copernicus').
        logger: Logger instance.
    """
    self.catalog_name = catalog_name
    self.catalog: pystac_client.Client | None = catalog_generator(catalog_name=catalog_name)
    self.search_results: list[pystac.Item] | None = None
    self.cloud_cover_sorted_results: list[pystac.Item] | None = None
    self.filtered_results: list[pystac.Item] | None = None
    self.downloaded_search_assets: list[Asset] | None = None
    self.downloaded_cloud_cover_sorted_assets: list[Asset] | None = None
    self.downloaded_best_sorted_asset: Asset | None = None
    self.logger = logger
    self.s3_client: Any | None = None
    if catalog_name == COPERNICUS:
        self.s3_client = s3_utils.get_s3_client()

search #

search(
    date_range: DateLike = None,
    max_items: int | None = None,
    limit: int | None = None,
    ids: list[str] | None = None,
    collections: str | list[str] | None = None,
    bbox: BBoxLike | None = None,
    intersects: IntersectsLike | None = None,
    query: dict[str, Any] | None = None,
    sortby: list[dict[str, str]] | str | list[str] | None = None,
    max_retries: int = 3,
    delay: int = 5,
) -> list[Item]

STAC API search that will use search query and parameters. Essentially a wrapper on pystac_client.Client.

Parameter descriptions taken from pystac docs.

Parameters:

Name Type Description Default
date_range DateLike

Either a single datetime or datetime range used to filter results. You may express a single datetime using a :class:datetime.datetime instance, a RFC 3339-compliant <https://tools.ietf.org/html/rfc3339>__ timestamp, or a simple date string (see below). Instances of :class:datetime.datetime may be either timezone aware or unaware. Timezone aware instances will be converted to a UTC timestamp before being passed to the endpoint. Timezone unaware instances are assumed to represent UTC timestamps. You may represent a datetime range using a "/" separated string as described in the spec, or a list, tuple, or iterator of 2 timestamps or datetime instances. For open-ended ranges, use either ".." ('2020-01-01:00:00:00Z/..', ['2020-01-01:00:00:00Z', '..']) or a value of None (['2020-01-01:00:00:00Z', None]). If using a simple date string, the datetime can be specified in YYYY-mm-dd format, optionally truncating to YYYY-mm or just YYYY. Simple date strings will be expanded to include the entire time period, for example: 2017 expands to 2017-01-01T00:00:00Z/2017-12-31T23:59:59Z and 2017-06 expands to 2017-06-01T00:00:00Z/2017-06-30T23:59:59Z If used in a range, the end of the range expands to the end of that day/month/year, for example: 2017-06-10/2017-06-11 expands to 2017-06-10T00:00:00Z/2017-06-11T23:59:59Z (Default value = None)

None
max_items int | None

The maximum number of items to return from the search, even if there are more matching results.

None
limit int | None

A recommendation to the service as to the number of items to return per page of results.

None
ids list[str] | None

List of one or more Item ids to filter on.

None
collections str | list[str] | None

List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided Collections will be searched

None
bbox BBoxLike | None

A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered to only those intersecting the bounding box.

None
intersects IntersectsLike | None

A string or dictionary representing a GeoJSON geometry, or an object that implements a geo_interface property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. Results filtered to only those intersecting the geometry.

None
query dict[str, Any] | None

List or JSON of query parameters as per the STAC API query extension.

None
sortby list[dict[str, str]] | str | list[str] | None

A single field or list of fields to sort the response by

None
max_retries int
3
delay int
5

Returns:

Type Description
list[Item]

A list of pystac.Item objects matching the search criteria.

Source code in src/geospatial_tools/stac.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
def search(
    self,
    date_range: DateLike = None,
    max_items: int | None = None,
    limit: int | None = None,
    ids: list[str] | None = None,
    collections: str | list[str] | None = None,
    bbox: geotools_types.BBoxLike | None = None,
    intersects: geotools_types.IntersectsLike | None = None,
    query: dict[str, Any] | None = None,
    sortby: list[dict[str, str]] | str | list[str] | None = None,
    max_retries: int = 3,
    delay: int = 5,
) -> list[pystac.Item]:
    """
    STAC API search that will use search query and parameters. Essentially a wrapper on `pystac_client.Client`.

    Parameter descriptions taken from pystac docs.

    Args:
      date_range: Either a single datetime or datetime range used to filter results.
            You may express a single datetime using a :class:`datetime.datetime`
            instance, a `RFC 3339-compliant <https://tools.ietf.org/html/rfc3339>`__
            timestamp, or a simple date string (see below). Instances of
            :class:`datetime.datetime` may be either
            timezone aware or unaware. Timezone aware instances will be converted to
            a UTC timestamp before being passed
            to the endpoint. Timezone unaware instances are assumed to represent UTC
            timestamps. You may represent a
            datetime range using a ``"/"`` separated string as described in the
            spec, or a list, tuple, or iterator
            of 2 timestamps or datetime instances. For open-ended ranges, use either
            ``".."`` (``'2020-01-01:00:00:00Z/..'``,
            ``['2020-01-01:00:00:00Z', '..']``) or a value of ``None``
            (``['2020-01-01:00:00:00Z', None]``).
            If using a simple date string, the datetime can be specified in
            ``YYYY-mm-dd`` format, optionally truncating
            to ``YYYY-mm`` or just ``YYYY``. Simple date strings will be expanded to
            include the entire time period, for example: ``2017`` expands to
            ``2017-01-01T00:00:00Z/2017-12-31T23:59:59Z`` and ``2017-06`` expands
            to ``2017-06-01T00:00:00Z/2017-06-30T23:59:59Z``
            If used in a range, the end of the range expands to the end of that
            day/month/year, for example: ``2017-06-10/2017-06-11`` expands to
              ``2017-06-10T00:00:00Z/2017-06-11T23:59:59Z`` (Default value = None)
      max_items: The maximum number of items to return from the search, even if there are
        more matching results.
      limit: A recommendation to the service as to the number of items to return per
        page of results.
      ids: List of one or more Item ids to filter on.
      collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the
        provided Collections will be searched
      bbox: A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered
        to only those intersecting the bounding box.
      intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements a
        __geo_interface__ property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson.
        Results filtered to only those intersecting the geometry.
      query: List or JSON of query parameters as per the STAC API query extension.
      sortby: A single field or list of fields to sort the response by
      max_retries:
      delay:

    Returns:
        A list of pystac.Item objects matching the search criteria.
    """
    if isinstance(collections, str):
        collections = [collections]
    if isinstance(sortby, dict):
        sortby = [sortby]

    if not self.catalog:
        self.logger.error("STAC client is not initialized.")
        return []

    intro_log = "Initiating STAC API search"
    if query:
        intro_log = f"{intro_log} \n\tQuery : [{query}]"
    self.logger.info(intro_log)
    items: list[pystac.Item] = []
    for attempt in range(1, max_retries + 1):
        try:
            items = self._base_catalog_search(
                date_range=date_range,
                max_items=max_items,
                limit=limit,
                ids=ids,
                collections=collections,
                bbox=bbox,
                intersects=intersects,
                query=query,
                sortby=sortby,
            )
            break
        except APIError as e:  # pylint: disable=W0718
            self.logger.error(f"Attempt {attempt} failed: {e}")
            if attempt < max_retries:
                time.sleep(delay)
            else:
                raise e

    self.search_results = items
    return items

search_for_date_ranges #

search_for_date_ranges(
    date_ranges: Sequence[DateLike],
    max_items: int | None = None,
    limit: int | None = None,
    collections: str | list[str] | None = None,
    bbox: BBoxLike | None = None,
    intersects: IntersectsLike | None = None,
    query: dict[str, Any] | None = None,
    sortby: list[dict[str, str]] | str | list[str] | None = None,
    max_retries: int = 3,
    delay: int = 5,
) -> list[Item]

STAC API search that will use search query and parameters for each date range in given list of date_ranges.

Date ranges can be generated with the help of the geospatial_tools.utils.create_date_range_for_specific_period function for more complex ranges.

Parameters:

Name Type Description Default
date_ranges Sequence[DateLike]

List containing datetime date ranges

required
max_items int | None

The maximum number of items to return from the search, even if there are more matching results

None
limit int | None

A recommendation to the service as to the number of items to return per page of results.

None
collections str | list[str] | None

List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the provided Collections will be searched

None
bbox BBoxLike | None

A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be filtered to only those intersecting the bounding box.

None
intersects IntersectsLike | None

A string or dictionary representing a GeoJSON geometry, or an object that implements a geo_interface property, as supported by several libraries including Shapely, ArcPy, PySAL, and geojson. Results filtered to only those intersecting the geometry.

None
query dict[str, Any] | None

List or JSON of query parameters as per the STAC API query extension.

None
sortby list[dict[str, str]] | str | list[str] | None

A single field or list of fields to sort the response by

None
max_retries int
3
delay int
5

Returns:

Type Description
list[Item]

A list of pystac.Item objects.

Source code in src/geospatial_tools/stac.py
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
def search_for_date_ranges(
    self,
    date_ranges: Sequence[DateLike],
    max_items: int | None = None,
    limit: int | None = None,
    collections: str | list[str] | None = None,
    bbox: geotools_types.BBoxLike | None = None,
    intersects: geotools_types.IntersectsLike | None = None,
    query: dict[str, Any] | None = None,
    sortby: list[dict[str, str]] | str | list[str] | None = None,
    max_retries: int = 3,
    delay: int = 5,
) -> list[pystac.Item]:
    """
    STAC API search that will use search query and parameters for each date range in given list of `date_ranges`.

    Date ranges can be generated with the help of the `geospatial_tools.utils.create_date_range_for_specific_period`
    function for more complex ranges.

    Args:
      date_ranges: List containing datetime date ranges
      max_items: The maximum number of items to return from the search, even if there are more matching results
      limit: A recommendation to the service as to the number of items to return per page of results.
      collections: List of one or more Collection IDs or pystac. Collection instances. Only Items in one of the
        provided Collections will be searched
      bbox: A list, tuple, or iterator representing a bounding box of 2D or 3D coordinates. Results will be
        filtered to only those intersecting the bounding box.
      intersects: A string or dictionary representing a GeoJSON geometry, or an object that implements
        a __geo_interface__ property, as supported by several libraries including Shapely, ArcPy, PySAL, and
        geojson. Results filtered to only those intersecting the geometry.
      query: List or JSON of query parameters as per the STAC API query extension.
      sortby: A single field or list of fields to sort the response by
      max_retries:
      delay:

    Returns:
        A list of pystac.Item objects.
    """
    results: list[pystac.Item] = []
    if isinstance(collections, str):
        collections = [collections]
    if isinstance(sortby, dict):
        sortby = [sortby]

    if not self.catalog:
        self.logger.error("STAC client is not initialized.")
        return []

    intro_log = f"Running STAC API search for the following parameters: \n\tDate ranges : {date_ranges}"
    if query:
        intro_log = f"{intro_log} \n\tQuery : {query}"
    self.logger.info(intro_log)

    for attempt in range(1, max_retries + 1):
        try:
            for date_range in date_ranges:
                items = self._base_catalog_search(
                    date_range=date_range,
                    max_items=max_items,
                    limit=limit,
                    collections=collections,
                    bbox=bbox,
                    intersects=intersects,
                    query=query,
                    sortby=sortby,
                )
                results.extend(items)
            break
        except APIError as e:  # pylint: disable=W0718
            self.logger.error(f"Attempt {attempt} failed: {e}")
            if attempt < max_retries:
                time.sleep(delay)
            else:
                raise e

    if not results:
        self.logger.warning(f"Search for date ranges [{date_ranges}] found no results!")
        self.search_results = None

    self.search_results = results
    return results

sort_results_by_cloud_coverage #

sort_results_by_cloud_coverage() -> list[Item] | None

Sorts the search results by cloud coverage (ascending).

Returns:

Type Description
list[Item] | None

A list of sorted pystac.Item objects, or None if no results exist.

Source code in src/geospatial_tools/stac.py
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
def sort_results_by_cloud_coverage(self) -> list[pystac.Item] | None:
    """
    Sorts the search results by cloud coverage (ascending).

    Returns:
        A list of sorted pystac.Item objects, or None if no results exist.
    """
    if self.search_results:
        self.logger.debug("Sorting results by cloud cover (from least to most)")
        self.cloud_cover_sorted_results = sorted(
            self.search_results, key=lambda item: item.properties.get("eo:cloud_cover", float("inf"))
        )
        return self.cloud_cover_sorted_results
    self.logger.warning("No results found: please run a search before trying to sort results")
    return None

filter_no_data #

filter_no_data(property_name: str, max_no_data_value: int = 5) -> list[Item] | None

Filter results that are above a nodata value threshold.

Parameters:

Name Type Description Default
property_name str

Name of the property containing nodata percentage.

required
max_no_data_value int

Max allowed percentage of nodata. (Default value = 5)

5

Returns:

Type Description
list[Item] | None

Filtered list of pystac.Item objects.

Source code in src/geospatial_tools/stac.py
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
def filter_no_data(self, property_name: str, max_no_data_value: int = 5) -> list[pystac.Item] | None:
    """
    Filter results that are above a nodata value threshold.

    Args:
      property_name: Name of the property containing nodata percentage.
      max_no_data_value: Max allowed percentage of nodata. (Default value = 5)

    Returns:
        Filtered list of pystac.Item objects.
    """
    sorted_results = self.cloud_cover_sorted_results
    if not sorted_results:
        sorted_results = self.sort_results_by_cloud_coverage()
    if not sorted_results:
        return None

    filtered_results = []
    for item in sorted_results:
        if item.properties.get(property_name, 0) < max_no_data_value:
            filtered_results.append(item)
    self.filtered_results = filtered_results

    return filtered_results

download_search_results #

download_search_results(bands: list[str], base_directory: str | Path) -> list[Asset]

Downloads assets for all search results.

Parameters:

Name Type Description Default
bands list[str]

List of bands to download.

required
base_directory str | Path

The base directory for downloads.

required

Returns:

Type Description
list[Asset]

A list of Asset objects for the downloaded search results.

Source code in src/geospatial_tools/stac.py
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
def download_search_results(self, bands: list[str], base_directory: str | Path) -> list[Asset]:
    """
    Downloads assets for all search results.

    Args:
        bands: List of bands to download.
        base_directory: The base directory for downloads.

    Returns:
        A list of Asset objects for the downloaded search results.
    """
    downloaded_search_results = self._download_results(
        results=self.search_results, bands=bands, base_directory=base_directory
    )
    self.downloaded_search_assets = downloaded_search_results
    return downloaded_search_results

download_sorted_by_cloud_cover_search_results #

download_sorted_by_cloud_cover_search_results(
    bands: list[str],
    base_directory: str | Path,
    first_x_num_of_items: int | None = None,
) -> list[Asset]

Downloads sorted results.

Parameters:

Name Type Description Default
bands list[str]

List of bands to download.

required
base_directory str | Path

The base directory for downloads.

required
first_x_num_of_items int | None

Optional number of top items to download.

None

Returns:

Type Description
list[Asset]

A list of Asset objects for the downloaded items.

Source code in src/geospatial_tools/stac.py
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
def download_sorted_by_cloud_cover_search_results(
    self, bands: list[str], base_directory: str | Path, first_x_num_of_items: int | None = None
) -> list[Asset]:
    """
    Downloads sorted results.

    Args:
        bands: List of bands to download.
        base_directory: The base directory for downloads.
        first_x_num_of_items: Optional number of top items to download.

    Returns:
        A list of Asset objects for the downloaded items.
    """
    results = self._generate_best_results()
    if not results:
        return []
    if first_x_num_of_items:
        results = results[:first_x_num_of_items]
    downloaded_search_results = self._download_results(results=results, bands=bands, base_directory=base_directory)
    self.downloaded_cloud_cover_sorted_assets = downloaded_search_results
    return downloaded_search_results

download_best_cloud_cover_result #

download_best_cloud_cover_result(
    bands: list[str], base_directory: str | Path
) -> Asset | None

Downloads the single best result based on cloud cover.

Parameters:

Name Type Description Default
bands list[str]

List of bands to download.

required
base_directory str | Path

The base directory for downloads.

required

Returns:

Type Description
Asset | None

The Asset object for the best result, or None if no results available.

Source code in src/geospatial_tools/stac.py
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
def download_best_cloud_cover_result(self, bands: list[str], base_directory: str | Path) -> Asset | None:
    """
    Downloads the single best result based on cloud cover.

    Args:
        bands: List of bands to download.
        base_directory: The base directory for downloads.

    Returns:
        The Asset object for the best result, or None if no results available.
    """
    results = self._generate_best_results()
    if not results:
        return None
    best_result = [results[0]]

    if self.downloaded_cloud_cover_sorted_assets:
        self.logger.info(f"Asset [{best_result[0].id}] is already downloaded")
        self.downloaded_best_sorted_asset = self.downloaded_cloud_cover_sorted_assets[0]
        return self.downloaded_cloud_cover_sorted_assets[0]

    downloaded_search_results = self._download_results(
        results=best_result, bands=bands, base_directory=base_directory
    )
    if downloaded_search_results:
        self.downloaded_best_sorted_asset = downloaded_search_results[0]
        return downloaded_search_results[0]
    return None

create_planetary_computer_catalog #

create_planetary_computer_catalog(
    max_retries: int = 3, delay: int = 5, logger: Logger = LOGGER
) -> Client | None

Creates a Planetary Computer Catalog Client.

Parameters:

Name Type Description Default
max_retries int

The maximum number of retries for the API connection. (Default value = 3)

3
delay int

The delay between retry attempts in seconds. (Default value = 5)

5
logger Logger

The logger instance to use. (Default value = LOGGER)

LOGGER

Returns:

Type Description
Client | None

A pystac_client.Client instance if successful, else None.

Source code in src/geospatial_tools/stac.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def create_planetary_computer_catalog(
    max_retries: int = 3, delay: int = 5, logger: logging.Logger = LOGGER
) -> pystac_client.Client | None:
    """
    Creates a Planetary Computer Catalog Client.

    Args:
      max_retries: The maximum number of retries for the API connection. (Default value = 3)
      delay: The delay between retry attempts in seconds. (Default value = 5)
      logger: The logger instance to use. (Default value = LOGGER)

    Returns:
        A pystac_client.Client instance if successful, else None.
    """
    for attempt in range(1, max_retries + 1):
        try:
            client = pystac_client.Client.open(PLANETARY_COMPUTER_API, modifier=sign_inplace)
            logger.debug("Successfully connected to the API.")
            return client
        except Exception as e:  # pylint: disable=W0718
            logger.error(f"Attempt {attempt} failed: {e}")
            if attempt < max_retries:
                time.sleep(delay)
            else:
                logger.error(e)
                raise e
    return None

create_copernicus_catalog #

create_copernicus_catalog(
    max_retries: int = 3, delay: int = 5, logger: Logger = LOGGER
) -> Client | None

Creates a Copernicus Data Space Ecosystem Catalog Client.

Parameters:

Name Type Description Default
max_retries int

The maximum number of retries for the API connection. (Default value = 3)

3
delay int

The delay between retry attempts in seconds. (Default value = 5)

5
logger Logger

The logger instance to use. (Default value = LOGGER)

LOGGER

Returns:

Type Description
Client | None

A pystac_client.Client instance if successful, else None.

Source code in src/geospatial_tools/stac.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def create_copernicus_catalog(
    max_retries: int = 3, delay: int = 5, logger: logging.Logger = LOGGER
) -> pystac_client.Client | None:
    """
    Creates a Copernicus Data Space Ecosystem Catalog Client.

    Args:
      max_retries: The maximum number of retries for the API connection. (Default value = 3)
      delay: The delay between retry attempts in seconds. (Default value = 5)
      logger: The logger instance to use. (Default value = LOGGER)

    Returns:
        A pystac_client.Client instance if successful, else None.
    """
    for attempt in range(1, max_retries + 1):
        try:
            client = pystac_client.Client.open(COPERNICUS_API)
            logger.debug("Successfully connected to the API.")
            return client
        except Exception as e:  # pylint: disable=W0718
            logger.error(f"Attempt {attempt} failed: {e}")
            if attempt < max_retries:
                time.sleep(delay)
            else:
                logger.error(e)
                raise e
    return None

catalog_generator #

catalog_generator(catalog_name: str, logger: Logger = LOGGER) -> Client | None

Generates a STAC Client for the specified catalog.

Parameters:

Name Type Description Default
catalog_name str

The name of the catalog (e.g., 'planetary_computer', 'copernicus').

required
logger Logger

The logger instance to use.

LOGGER

Returns:

Type Description
Client | None

A pystac_client.Client instance for the requested catalog if supported, else None.

Source code in src/geospatial_tools/stac.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def catalog_generator(catalog_name: str, logger: logging.Logger = LOGGER) -> pystac_client.Client | None:
    """
    Generates a STAC Client for the specified catalog.

    Args:
      catalog_name: The name of the catalog (e.g., 'planetary_computer', 'copernicus').
      logger: The logger instance to use.

    Returns:
        A pystac_client.Client instance for the requested catalog if supported, else None.
    """
    catalog_dict = {
        PLANETARY_COMPUTER: create_planetary_computer_catalog,
        COPERNICUS: create_copernicus_catalog,
    }
    if catalog_name not in catalog_dict:
        logger.error(f"Unsupported catalog name: {catalog_name}")
        return None

    catalog = catalog_dict[catalog_name]()

    return catalog

list_available_catalogs #

list_available_catalogs(logger: Logger = LOGGER) -> frozenset[str]

Lists all available STAC catalogs.

Parameters:

Name Type Description Default
logger Logger

The logger instance to use.

LOGGER

Returns:

Type Description
frozenset[str]

A frozenset of available catalog names.

Source code in src/geospatial_tools/stac.py
121
122
123
124
125
126
127
128
129
130
131
132
def list_available_catalogs(logger: logging.Logger = LOGGER) -> frozenset[str]:
    """
    Lists all available STAC catalogs.

    Args:
      logger: The logger instance to use.

    Returns:
        A frozenset of available catalog names.
    """
    logger.info("Available catalogs")
    return CATALOG_NAME_LIST

download_stac_asset #

download_stac_asset(
    asset_url: str,
    destination: Path,
    method: str = "http",
    headers: dict[str, str] | None = None,
    s3_client: Any | None = None,
    logger: Logger = LOGGER,
) -> Path | None

Generic dispatcher for downloading STAC assets via HTTP or S3.

Parameters:

Name Type Description Default
asset_url str

URL/HREF of the asset to download.

required
destination Path

Path where the file will be saved.

required
method str

Download method ('http' or 's3').

'http'
headers dict[str, str] | None

Headers for HTTP request.

None
s3_client Any | None

Boto3 S3 client (required for 's3' method).

None
logger Logger

Logger instance.

LOGGER

Returns:

Type Description
Path | None

The Path to the downloaded file if successful, else None.

Source code in src/geospatial_tools/stac.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def download_stac_asset(
    asset_url: str,
    destination: Path,
    method: str = "http",
    headers: dict[str, str] | None = None,
    s3_client: Any | None = None,
    logger: logging.Logger = LOGGER,
) -> Path | None:
    """
    Generic dispatcher for downloading STAC assets via HTTP or S3.

    Args:
        asset_url: URL/HREF of the asset to download.
        destination: Path where the file will be saved.
        method: Download method ('http' or 's3').
        headers: Headers for HTTP request.
        s3_client: Boto3 S3 client (required for 's3' method).
        logger: Logger instance.

    Returns:
        The Path to the downloaded file if successful, else None.
    """
    if method == "s3":
        file_path = download_url_s3(asset_url=asset_url, destination=destination, s3_client=s3_client, logger=logger)
        return file_path
    # Default to HTTP
    file_path = download_url(url=asset_url, filename=destination, headers=headers, logger=logger)
    return file_path