Skip to content

s3_utils

Utility module for S3 operations related to Copernicus Data Space Ecosystem.

get_s3_client #

get_s3_client(endpoint_url: str | None = None) -> client

Creates and returns a boto3 S3 client.

Parameters:

Name Type Description Default
endpoint_url str | None

The S3 endpoint URL. If None, it attempts to use the COPERNICUS_S3_ENDPOINT environment variable.

None

Returns:

Type Description
client

A boto3 S3 client.

Source code in src/geospatial_tools/s3_utils.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def get_s3_client(endpoint_url: str | None = None) -> boto3.client:
    """
    Creates and returns a boto3 S3 client.

    Args:
        endpoint_url: The S3 endpoint URL. If None, it attempts to use
                      the COPERNICUS_S3_ENDPOINT environment variable.

    Returns:
        A boto3 S3 client.
    """
    if not endpoint_url:
        endpoint_url = os.environ.get("COPERNICUS_S3_ENDPOINT", "https://eodata.dataspace.copernicus.eu")

    access_key = os.environ.get("AWS_ACCESS_KEY_ID") or None
    secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY") or None

    if not access_key or not secret_key:
        LOGGER.warning("AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY not found in environment.")

    LOGGER.info(f"Creating S3 client with endpoint: [{endpoint_url}]")

    # Note: boto3 automatically picks up AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
    # from the environment, but we can also pass them explicitly if needed.
    return boto3.client(
        "s3",
        endpoint_url=endpoint_url,
        aws_access_key_id=access_key,
        aws_secret_access_key=secret_key,
    )

parse_s3_url #

parse_s3_url(url: str) -> tuple[str, str]

Parses an S3 URL or a CDSE STAC href to extract the bucket and key.

Expected formats: - s3://bucket/key - https://eodata.dataspace.copernicus.eu/bucket/key - https://zipper.dataspace.copernicus.eu/download/uuid (this might not be a direct S3 key)

Parameters:

Name Type Description Default
url str

The URL to parse.

required

Returns:

Type Description
tuple[str, str]

A tuple of (bucket, key).

Raises:

Type Description
ValueError

If the URL cannot be parsed into a bucket and key.

Source code in src/geospatial_tools/s3_utils.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def parse_s3_url(url: str) -> tuple[str, str]:
    """
    Parses an S3 URL or a CDSE STAC href to extract the bucket and key.

    Expected formats:
    - s3://bucket/key
    - https://eodata.dataspace.copernicus.eu/bucket/key
    - https://zipper.dataspace.copernicus.eu/download/uuid (this might not be a direct S3 key)

    Args:
        url: The URL to parse.

    Returns:
        A tuple of (bucket, key).

    Raises:
        ValueError: If the URL cannot be parsed into a bucket and key.
    """
    parsed = urlparse(url)

    if parsed.scheme == "s3":
        bucket = parsed.netloc
        key = parsed.path.lstrip("/")
        return bucket, key

    if parsed.scheme in ["http", "https"]:
        # For CDSE eodata endpoint, the path starts with the bucket
        # e.g., /Sentinel-2/MSI/L2A/2023/01/01/...
        path_parts = parsed.path.lstrip("/").split("/")
        if len(path_parts) < 2:
            raise ValueError(f"URL path does not contain enough parts to determine bucket and key: {url}")

        bucket = path_parts[0]
        key = "/".join(path_parts[1:])
        return bucket, key

    raise ValueError(f"Unsupported URL scheme: {parsed.scheme} in URL: {url}")