From 948b55c7ffbb804192aa8408c492c1d80ce0323a Mon Sep 17 00:00:00 2001 From: Brandon Jackson Date: Fri, 8 May 2026 19:36:01 -0500 Subject: [PATCH 1/4] feat(catalog_integration): add IcebergRestCatalogIntegration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds support for `CATALOG_SOURCE = ICEBERG_REST` catalog integrations, which are the AWS-recommended path for accessing Amazon S3 Tables federated catalogs from Snowflake (and any other Iceberg REST-compatible catalog). Previously snowcap only modeled `GLUE` and `OBJECT_STORE`, and the legacy GLUE source rejects the federated S3 Tables form ':s3tablescatalog/' for `GLUE_CATALOG_ID`, leaving S3 Tables-managed iceberg unmodelable in YAML. Resource shape: catalog_integrations: - name: ci_s3_tables_dev catalog_source: ICEBERG_REST catalog_namespace: my_namespace rest_config: catalog_uri: https://glue.us-east-1.amazonaws.com/iceberg catalog_api_type: AWS_GLUE catalog_name: '123456789012:s3tablescatalog/my_table_bucket' access_delegation_mode: VENDED_CREDENTIALS rest_authentication: type: SIGV4 sigv4_iam_role: arn:aws:iam::123456789012:role/snowflake-s3-tables-read sigv4_signing_region: us-east-1 enabled: true `rest_config` and `rest_authentication` are nested PropSet blocks, emitting `REST_CONFIG = (KEY = VAL ...)` / `REST_AUTHENTICATION = (...)` in the CREATE DDL. Both fields are marked `fetchable: False` because Snowflake auto-populates `WAREHOUSE` (echoes `CATALOG_NAME` when not explicitly set) and `PREFIX` (null) — so YAML is the source of truth for nested block contents (matches Stage.encryption precedent). Adds a `_parse_enum_map` helper in `data_provider.py` for the `{KEY=VAL, KEY=VAL}` EnumMap response type that DESC CATALOG INTEGRATION uses for nested config blocks. Adds `CatalogApiType` (AWS_GLUE, AWS_API_GATEWAY, PUBLIC), `AccessDelegationMode` (VENDED_CREDENTIALS), and `RestAuthenticationType` (SIGV4, OAUTH, BEARER, NONE) enums to validate nested block values. Validated end-to-end against a live Snowflake account with an existing S3 Tables federated catalog: `snowcap plan` no longer flags the existing integration (no false diff), and `create_sql()` emits a DDL that Snowflake accepts. Adds a fetch test for the new resource type alongside the existing GlueCatalogIntegration test. Refs: https://docs.snowflake.com/en/user-guide/tables-iceberg-configure-catalog-integration-vended-credentials Co-Authored-By: Claude Opus 4.7 (1M context) --- snowcap/data_provider.py | 53 +++++ snowcap/resources/__init__.py | 7 +- snowcap/resources/catalog_integration.py | 191 +++++++++++++++++- .../test_fetch_resource_simple.py | 17 ++ 4 files changed, 265 insertions(+), 3 deletions(-) diff --git a/snowcap/data_provider.py b/snowcap/data_provider.py index 06f36d3..13ed9ec 100644 --- a/snowcap/data_provider.py +++ b/snowcap/data_provider.py @@ -1410,10 +1410,63 @@ def fetch_catalog_integration(session: SnowflakeConnection, fqn: FQN): "owner": owner, "comment": data["comment"] or None, } + elif properties["catalog_source"] == "ICEBERG_REST": + # REST_CONFIG / REST_AUTHENTICATION come back from DESC as EnumMap-typed + # values. They're declared non-fetchable on the dataclass (YAML is the + # source of truth), but we still parse them so consumers introspecting + # the fetched state can see what's there. + return { + "name": _quote_snowflake_identifier(data["name"]), + "catalog_source": properties["catalog_source"], + "table_format": properties["table_format"], + "catalog_namespace": properties.get("catalog_namespace"), + "rest_config": _parse_enum_map(properties.get("rest_config")), + "rest_authentication": _parse_enum_map(properties.get("rest_authentication")), + "enabled": properties["enabled"], + "refresh_interval_seconds": properties.get("refresh_interval_seconds"), + "owner": owner, + "comment": data["comment"] or None, + } else: raise Exception(f"Unsupported catalog integration: {properties['catalog_source']}") +def _parse_enum_map(value): + """Parse Snowflake DESC EnumMap values like '{KEY=VAL, KEY=VAL}' into a dict. + + Values may contain '=' (e.g., URLs, base64) so we split each key=value pair + on the first '=' only. Empty/None inputs return None. 'null' values map to + None to match Snowflake's representation. + """ + if value is None: + return None + if isinstance(value, dict): + return value + if not isinstance(value, str): + return value + body = value.strip() + if not body.startswith("{") or not body.endswith("}"): + return value + body = body[1:-1].strip() + if not body: + return {} + out = {} + # Snowflake separates pairs with ", " — values don't contain literal ", " so + # this is safe enough; falls back to single-pair if no comma. Drop null + # values so the parsed dict only contains explicitly-set fields (Snowflake + # echoes optional fields as null when unset). + for pair in body.split(", "): + if "=" not in pair: + continue + k, v = pair.split("=", 1) + k = k.strip().lower() + v = v.strip() + if v == "null" or v == "": + continue + out[k] = v + return out + + def fetch_columns(session: SnowflakeConnection, resource_type: str, fqn: FQN): desc_result = execute(session, f"DESC {resource_type} {fqn}") columns = [] diff --git a/snowcap/resources/__init__.py b/snowcap/resources/__init__.py index a6d7983..d8d5f99 100644 --- a/snowcap/resources/__init__.py +++ b/snowcap/resources/__init__.py @@ -4,7 +4,11 @@ from .alert import Alert from .api_integration import APIIntegration from .authentication_policy import AuthenticationPolicy -from .catalog_integration import GlueCatalogIntegration, ObjectStoreCatalogIntegration +from .catalog_integration import ( + GlueCatalogIntegration, + IcebergRestCatalogIntegration, + ObjectStoreCatalogIntegration, +) from .column import Column from .compute_pool import ComputePool from .database import Database @@ -102,6 +106,7 @@ "GlueCatalogIntegration", "Grant", "HybridTable", + "IcebergRestCatalogIntegration", "ImageRepository", "InternalStage", "JavascriptUDF", diff --git a/snowcap/resources/catalog_integration.py b/snowcap/resources/catalog_integration.py index 7a44673..a530116 100644 --- a/snowcap/resources/catalog_integration.py +++ b/snowcap/resources/catalog_integration.py @@ -1,9 +1,9 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from .resource import Resource, ResourceSpec, NamedResource from .role import Role from ..enums import ParseableEnum, ResourceType -from ..props import Props, EnumProp, StringProp, BoolProp +from ..props import Props, EnumProp, StringProp, BoolProp, IntProp, PropSet from ..resource_name import ResourceName from ..scope import AccountScope @@ -11,6 +11,24 @@ class CatalogSource(ParseableEnum): GLUE = "GLUE" OBJECT_STORE = "OBJECT_STORE" + ICEBERG_REST = "ICEBERG_REST" + + +class CatalogApiType(ParseableEnum): + AWS_GLUE = "AWS_GLUE" + AWS_API_GATEWAY = "AWS_API_GATEWAY" + PUBLIC = "PUBLIC" + + +class AccessDelegationMode(ParseableEnum): + VENDED_CREDENTIALS = "VENDED_CREDENTIALS" + + +class RestAuthenticationType(ParseableEnum): + SIGV4 = "SIGV4" + OAUTH = "OAUTH" + BEARER = "BEARER" + NONE = "NONE" class CatalogTableFormat(ParseableEnum): @@ -212,9 +230,178 @@ def __init__( ) +@dataclass(unsafe_hash=True) +class _IcebergRestCatalogIntegration(ResourceSpec): + name: ResourceName + # rest_config / rest_authentication contain auto-populated fields when + # fetched (Snowflake echoes WAREHOUSE = CATALOG_NAME when not explicitly + # set, etc.) so we mark them non-fetchable: YAML is authoritative. Same + # precedent as Stage.encryption. + rest_config: dict = field(default=None, metadata={"fetchable": False}) + rest_authentication: dict = field(default=None, metadata={"fetchable": False}) + catalog_namespace: str = None + enabled: bool = True + refresh_interval_seconds: int = None + catalog_source: CatalogSource = CatalogSource.ICEBERG_REST + table_format: CatalogTableFormat = CatalogTableFormat.ICEBERG + owner: Role = "ACCOUNTADMIN" + comment: str = None + + def __post_init__(self): + super().__post_init__() + if self.catalog_source not in [CatalogSource.ICEBERG_REST]: + raise ValueError(f"Invalid catalog source: {self.catalog_source}") + if self.table_format not in [CatalogTableFormat.ICEBERG]: + raise ValueError(f"Invalid table format: {self.table_format}") + if not self.rest_config: + raise ValueError("rest_config is required for IcebergRestCatalogIntegration") + if "catalog_uri" not in self.rest_config: + raise ValueError("rest_config.catalog_uri is required") + if not self.rest_authentication: + raise ValueError("rest_authentication is required for IcebergRestCatalogIntegration") + if "type" not in self.rest_authentication: + raise ValueError("rest_authentication.type is required (e.g., SIGV4, OAUTH, BEARER, NONE)") + + +class IcebergRestCatalogIntegration(NamedResource, Resource): + """ + Description: + Manages an Apache Iceberg REST catalog integration in Snowflake. This is the + right choice for AWS S3 Tables (federated catalogs reachable via the Glue + Iceberg REST endpoint) and any other Iceberg REST-compatible catalog. + + Snowflake Docs: + https://docs.snowflake.com/en/sql-reference/sql/create-catalog-integration-rest + + Fields: + name (string, required): The name of the catalog integration. + rest_config (dict, required): Iceberg REST configuration. Required keys: + ``catalog_uri``. Common keys: ``catalog_api_type`` (AWS_GLUE, + AWS_API_GATEWAY, PUBLIC), ``catalog_name``, ``warehouse``, ``prefix``, + ``access_delegation_mode`` (VENDED_CREDENTIALS). + rest_authentication (dict, required): Authentication block. Required key: + ``type`` (SIGV4, OAUTH, BEARER, NONE). For SIGV4 against AWS, also set + ``sigv4_iam_role`` (and optionally ``sigv4_signing_region``, + ``sigv4_external_id``). For OAUTH set ``oauth_token_uri``, + ``oauth_client_id``, ``oauth_client_secret``, optionally + ``oauth_allowed_scopes``. For BEARER set ``bearer_token``. + catalog_namespace (string): The default namespace for tables that reference + this catalog integration (e.g., a Glue database / S3 Tables namespace). + enabled (bool): Whether the catalog integration is enabled. Defaults to True. + refresh_interval_seconds (int): Optional metadata refresh interval. + table_format (string): The table format. Only ICEBERG is supported. + owner (string or Role): The owner role. Defaults to ``ACCOUNTADMIN``. + comment (string): Optional comment. + + Python: + + ```python + catalog = IcebergRestCatalogIntegration( + name="ci_s3_tables_dev", + catalog_namespace="my_namespace", + rest_config={ + "catalog_uri": "https://glue.us-east-1.amazonaws.com/iceberg", + "catalog_api_type": "AWS_GLUE", + "catalog_name": "123456789012:s3tablescatalog/my_table_bucket", + "access_delegation_mode": "VENDED_CREDENTIALS", + }, + rest_authentication={ + "type": "SIGV4", + "sigv4_iam_role": "arn:aws:iam::123456789012:role/snowflake-s3-tables-read", + "sigv4_signing_region": "us-east-1", + }, + enabled=True, + ) + ``` + + Yaml: + + ```yaml + catalog_integrations: + - name: ci_s3_tables_dev + catalog_source: ICEBERG_REST + catalog_namespace: my_namespace + rest_config: + catalog_uri: https://glue.us-east-1.amazonaws.com/iceberg + catalog_api_type: AWS_GLUE + catalog_name: '123456789012:s3tablescatalog/my_table_bucket' + access_delegation_mode: VENDED_CREDENTIALS + rest_authentication: + type: SIGV4 + sigv4_iam_role: arn:aws:iam::123456789012:role/snowflake-s3-tables-read + sigv4_signing_region: us-east-1 + enabled: true + ``` + """ + + resource_type = ResourceType.CATALOG_INTEGRATION + props = Props( + catalog_source=EnumProp("catalog_source", CatalogSource), + table_format=EnumProp("table_format", CatalogTableFormat), + catalog_namespace=StringProp("catalog_namespace"), + rest_config=PropSet( + "rest_config", + Props( + catalog_uri=StringProp("catalog_uri"), + catalog_api_type=EnumProp("catalog_api_type", CatalogApiType), + catalog_name=StringProp("catalog_name"), + warehouse=StringProp("warehouse"), + prefix=StringProp("prefix"), + access_delegation_mode=EnumProp("access_delegation_mode", AccessDelegationMode), + ), + ), + rest_authentication=PropSet( + "rest_authentication", + Props( + type=EnumProp("type", RestAuthenticationType), + sigv4_iam_role=StringProp("sigv4_iam_role"), + sigv4_signing_region=StringProp("sigv4_signing_region"), + sigv4_external_id=StringProp("sigv4_external_id"), + oauth_token_uri=StringProp("oauth_token_uri"), + oauth_client_id=StringProp("oauth_client_id"), + oauth_client_secret=StringProp("oauth_client_secret"), + bearer_token=StringProp("bearer_token"), + ), + ), + enabled=BoolProp("enabled"), + refresh_interval_seconds=IntProp("refresh_interval_seconds"), + comment=StringProp("comment"), + ) + scope = AccountScope() + spec = _IcebergRestCatalogIntegration + + def __init__( + self, + name: str, + rest_config: dict, + rest_authentication: dict, + catalog_namespace: str = None, + enabled: bool = True, + refresh_interval_seconds: int = None, + table_format: CatalogTableFormat = CatalogTableFormat.ICEBERG, + owner: str = "ACCOUNTADMIN", + comment: str = None, + **kwargs, + ): + kwargs.pop("catalog_source", None) + super().__init__(name, **kwargs) + self._data: _IcebergRestCatalogIntegration = _IcebergRestCatalogIntegration( + name=self._name, + rest_config=rest_config, + rest_authentication=rest_authentication, + catalog_namespace=catalog_namespace, + enabled=enabled, + refresh_interval_seconds=refresh_interval_seconds, + table_format=table_format, + owner=owner, + comment=comment, + ) + + CatalogIntegrationMap = { CatalogSource.GLUE: GlueCatalogIntegration, CatalogSource.OBJECT_STORE: ObjectStoreCatalogIntegration, + CatalogSource.ICEBERG_REST: IcebergRestCatalogIntegration, } diff --git a/tests/integration/data_provider/test_fetch_resource_simple.py b/tests/integration/data_provider/test_fetch_resource_simple.py index 120e3d1..249da98 100644 --- a/tests/integration/data_provider/test_fetch_resource_simple.py +++ b/tests/integration/data_provider/test_fetch_resource_simple.py @@ -126,6 +126,23 @@ def resource_fixtures() -> list: comment="Integration for AWS Glue with Snowflake.", owner=TEST_ROLE, ), + res.IcebergRestCatalogIntegration( + name="TEST_FETCH_ICEBERG_REST_CATALOG_INTEGRATION", + catalog_namespace="some_namespace", + rest_config={ + "catalog_uri": "https://glue.us-east-1.amazonaws.com/iceberg", + "catalog_api_type": "AWS_GLUE", + "catalog_name": "123456789012", + }, + rest_authentication={ + "type": "SIGV4", + "sigv4_iam_role": "arn:aws:iam::123456789012:role/SnowflakeAccess", + "sigv4_signing_region": "us-east-1", + }, + enabled=True, + comment="Iceberg REST catalog integration for AWS Glue.", + owner=TEST_ROLE, + ), res.ImageRepository( name="TEST_FETCH_IMAGE_REPOSITORY", owner=TEST_ROLE, From 13da5b6118e3ac5075f4d6afb1d6fa235ed5f159 Mon Sep 17 00:00:00 2001 From: Brandon Jackson Date: Sat, 9 May 2026 16:45:14 -0500 Subject: [PATCH 2/4] feat(catalog_integration): address PR #16 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @noel's review on https://github.com/datacoves/snowcap/pull/16: 1. Add tests/fixtures/json/iceberg_rest_catalog_integration.json so test_polymorphic_resources can resolve the new ICEBERG_REST subtype from a fixture (matches the reviewer's suggested template). 2. Add oauth_allowed_scopes to the rest_authentication PropSet — was mentioned in the docstring but missing from the actual props. Now round-trips correctly: tested via create_sql() with type=OAUTH. Verified locally: all 28 tests in test_polymorphic_resources pass, including the new IcebergRestCatalogIntegration polymorphic resolution. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- snowcap/resources/catalog_integration.py | 1 + .../iceberg_rest_catalog_integration.json | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/fixtures/json/iceberg_rest_catalog_integration.json diff --git a/snowcap/resources/catalog_integration.py b/snowcap/resources/catalog_integration.py index a530116..5311a73 100644 --- a/snowcap/resources/catalog_integration.py +++ b/snowcap/resources/catalog_integration.py @@ -360,6 +360,7 @@ class IcebergRestCatalogIntegration(NamedResource, Resource): oauth_token_uri=StringProp("oauth_token_uri"), oauth_client_id=StringProp("oauth_client_id"), oauth_client_secret=StringProp("oauth_client_secret"), + oauth_allowed_scopes=StringProp("oauth_allowed_scopes"), bearer_token=StringProp("bearer_token"), ), ), diff --git a/tests/fixtures/json/iceberg_rest_catalog_integration.json b/tests/fixtures/json/iceberg_rest_catalog_integration.json new file mode 100644 index 0000000..bbb254f --- /dev/null +++ b/tests/fixtures/json/iceberg_rest_catalog_integration.json @@ -0,0 +1,19 @@ +{ + "name": "ICEBERGRESTCATALOGINT", + "catalog_source": "ICEBERG_REST", + "catalog_namespace": "some_namespace", + "table_format": "ICEBERG", + "rest_config": { + "catalog_uri": "https://glue.us-east-1.amazonaws.com/iceberg", + "catalog_api_type": "AWS_GLUE", + "catalog_name": "123456789012" + }, + "rest_authentication": { + "type": "SIGV4", + "sigv4_iam_role": "arn:aws:iam::123456789012:role/SnowflakeAccess", + "sigv4_signing_region": "us-east-1" + }, + "enabled": true, + "owner": "ACCOUNTADMIN", + "comment": "This is a test Iceberg REST catalog integration" +} From 0c39d9370fec5717aeb475b4ad4da4132469e05c Mon Sep 17 00:00:00 2001 From: Brandon Jackson Date: Mon, 11 May 2026 16:57:50 -0500 Subject: [PATCH 3/4] docs(catalog_integration): IcebergRestCatalogIntegration docs + fixture fix Address @noel's second-round PR #16 feedback: - tests/fixtures/json/iceberg_rest_catalog_integration.json: add "refresh_interval_seconds": null to match resource serialization pattern used by other fixtures. Full unit suite (1480 tests) now passes. - docs/resources/iceberg_rest_catalog_integration.md: new page following glue_catalog_integration.md template. Covers YAML + Python examples for the AWS S3 Tables (SIGV4 + AWS_GLUE) production path and lists rest_authentication fields per auth type (SIGV4/OAUTH/BEARER). - mkdocs.yml: register IcebergRestCatalogIntegration under Integrations > Catalog between Glue and ObjectStore. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../iceberg_rest_catalog_integration.md | 67 +++++++++++++++++++ mkdocs.yml | 1 + .../iceberg_rest_catalog_integration.json | 1 + 3 files changed, 69 insertions(+) create mode 100644 docs/resources/iceberg_rest_catalog_integration.md diff --git a/docs/resources/iceberg_rest_catalog_integration.md b/docs/resources/iceberg_rest_catalog_integration.md new file mode 100644 index 0000000..31b24a5 --- /dev/null +++ b/docs/resources/iceberg_rest_catalog_integration.md @@ -0,0 +1,67 @@ +--- +description: >- + An Iceberg REST catalog integration in Snowflake. +--- + +# IcebergRestCatalogIntegration + +[Snowflake Documentation](https://docs.snowflake.com/en/sql-reference/sql/create-catalog-integration-rest) | Snowcap CLI label: `iceberg_rest_catalog_integration` + +Manages an Apache Iceberg REST catalog integration in Snowflake. This is the right choice for AWS S3 Tables (federated catalogs reachable via the Glue Iceberg REST endpoint) and any other Iceberg REST-compatible catalog. + + +## Examples + +### YAML + +```yaml +catalog_integrations: + - name: ci_s3_tables_dev + catalog_source: ICEBERG_REST + catalog_namespace: my_namespace + rest_config: + catalog_uri: https://glue.us-east-1.amazonaws.com/iceberg + catalog_api_type: AWS_GLUE + catalog_name: '123456789012:s3tablescatalog/my_table_bucket' + access_delegation_mode: VENDED_CREDENTIALS + rest_authentication: + type: SIGV4 + sigv4_iam_role: arn:aws:iam::123456789012:role/snowflake-s3-tables-read + sigv4_signing_region: us-east-1 + enabled: true +``` + + +### Python + +```python +catalog = IcebergRestCatalogIntegration( + name="ci_s3_tables_dev", + catalog_namespace="my_namespace", + rest_config={ + "catalog_uri": "https://glue.us-east-1.amazonaws.com/iceberg", + "catalog_api_type": "AWS_GLUE", + "catalog_name": "123456789012:s3tablescatalog/my_table_bucket", + "access_delegation_mode": "VENDED_CREDENTIALS", + }, + rest_authentication={ + "type": "SIGV4", + "sigv4_iam_role": "arn:aws:iam::123456789012:role/snowflake-s3-tables-read", + "sigv4_signing_region": "us-east-1", + }, + enabled=True, +) +``` + + +## Fields + +* `name` (string, required) - The name of the catalog integration. +* `rest_config` (dict, required) - Iceberg REST configuration. Required key: `catalog_uri`. Optional keys: `catalog_api_type`, `catalog_name`, `warehouse`, `prefix`, `access_delegation_mode`. +* `rest_authentication` (dict, required) - Authentication block. Required key: `type` (one of `SIGV4`, `OAUTH`, `BEARER`, `NONE`). Auth-specific fields: `sigv4_iam_role`, `sigv4_signing_region`, `sigv4_external_id` (SIGV4); `oauth_client_id`, `oauth_client_secret`, `oauth_token_uri`, `oauth_allowed_scopes` (OAUTH); `bearer_token` (BEARER). +* `catalog_namespace` (string) - Default namespace for tables referencing this catalog. +* `enabled` (bool) - Whether the integration is enabled. Defaults to True. +* `refresh_interval_seconds` (int) - Optional metadata refresh interval. +* `table_format` (string or CatalogTableFormat) - Table format. Only `ICEBERG` is supported. Defaults to `ICEBERG`. +* `owner` (string or [Role](role.md)) - The owner role of the catalog integration. Defaults to "ACCOUNTADMIN". +* `comment` (string) - An optional comment describing the catalog integration. diff --git a/mkdocs.yml b/mkdocs.yml index 0ab4523..2a84e6b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -154,6 +154,7 @@ nav: - Integrations: - Catalog: - GlueCatalogIntegration: resources/glue_catalog_integration.md + - IcebergRestCatalogIntegration: resources/iceberg_rest_catalog_integration.md - ObjectStoreCatalogIntegration: resources/object_store_catalog_integration.md - Other: - APIIntegration: resources/api_integration.md diff --git a/tests/fixtures/json/iceberg_rest_catalog_integration.json b/tests/fixtures/json/iceberg_rest_catalog_integration.json index bbb254f..2077ac7 100644 --- a/tests/fixtures/json/iceberg_rest_catalog_integration.json +++ b/tests/fixtures/json/iceberg_rest_catalog_integration.json @@ -14,6 +14,7 @@ "sigv4_signing_region": "us-east-1" }, "enabled": true, + "refresh_interval_seconds": null, "owner": "ACCOUNTADMIN", "comment": "This is a test Iceberg REST catalog integration" } From 8eb07b3e12a7ca5d8e1c0a5cde6a08851cd565a5 Mon Sep 17 00:00:00 2001 From: Brandon Jackson Date: Mon, 11 May 2026 17:02:07 -0500 Subject: [PATCH 4/4] docs(catalog_integration): add minimal AWS S3 Tables + Lake Formation example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an end-to-end "Minimal example: AWS S3 Tables behind Lake Formation" section to the IcebergRestCatalogIntegration doc, distilled from the BCS production onboarding for our Epicor P21 → S3 Tables → Snowflake pipeline. Covers the four AWS-side pieces (S3 Tables bucket, Lake Formation integration, LF grants, cross-account IAM role) and the two Snowcap resources that bind them to Snowflake (catalog_integrations + storage_integrations together), plus the post-deploy CREATE ICEBERG TABLE + SYSTEM$VERIFY_CATALOG_INTEGRATION verification step. Includes the gotchas we hit during onboarding: - GlueCatalogIntegration/CATALOG_SOURCE=GLUE rejects the federated `:s3tablescatalog/` form for GLUE_CATALOG_ID with SQL compilation error 22023/1008 — this is the failure mode that motivated adding IcebergRestCatalogIntegration in the first place. - Lake Formation DESCRIBE/SELECT grants are easy to forget; DESC CATALOG INTEGRATION succeeds without them but CREATE ICEBERG TABLE fails with a vague 403. - access_delegation_mode=VENDED_CREDENTIALS is required when the writer (e.g., pyiceberg-rest on EC2) relies on Lake Formation to vend temporary S3 credentials. - sigv4_signing_region must match the S3 Tables bucket region. Test suite (tests/test_polymorphic_resources.py) still passes (28/28). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../iceberg_rest_catalog_integration.md | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/docs/resources/iceberg_rest_catalog_integration.md b/docs/resources/iceberg_rest_catalog_integration.md index 31b24a5..b2ba4b0 100644 --- a/docs/resources/iceberg_rest_catalog_integration.md +++ b/docs/resources/iceberg_rest_catalog_integration.md @@ -54,6 +54,81 @@ catalog = IcebergRestCatalogIntegration( ``` +### Minimal end-to-end example: AWS S3 Tables behind Lake Formation + +This is the configuration we run in production at Building Controls & Solutions for our Epicor P21 → S3 Tables → Snowflake pipeline. It documents the four AWS-side pieces a Snowflake reader needs *outside* Snowcap, and the two Snowcap resources that bind them to Snowflake. + +**The pieces (AWS side, configured once per environment):** + +1. **S3 Tables bucket** — e.g., `bcs-iceberg-raw-prd`. Created via `aws s3tables create-table-bucket`. +2. **Lake Formation S3 Tables integration** — enabling it auto-creates a *federated* Glue catalog named `s3tablescatalog/` under the bucket-owner account. This is what Snowflake's REST catalog will talk to over HTTPS. +3. **Lake Formation grants** — the IAM role below needs at minimum `DESCRIBE` on the federated catalog and `SELECT` (and `DESCRIBE`) on every namespace/table you want Snowflake to read. +4. **Cross-account IAM role** (e.g., `snowflake-s3-tables-read`) — Snowflake's account assumes this via SIGV4. It needs `glue:GetCatalog`, `glue:GetDatabase*`, `glue:GetTable*`, `lakeformation:GetDataAccess`, and `s3tables:Get*`/`s3tables:List*` on the bucket, with a trust policy that lets the Snowflake account assume it (use the `STORAGE_AWS_EXTERNAL_ID` Snowflake gives you after `CREATE STORAGE INTEGRATION`). + +**Snowcap side (declarative):** + +```yaml +# catalog_integrations: tells Snowflake where Iceberg *metadata* lives. +# CATALOG_NAME is the federated `:s3tablescatalog/` form +# that Lake Formation auto-creates — note this DOES NOT work with the +# legacy GlueCatalogIntegration / CATALOG_SOURCE=GLUE path; you must use +# ICEBERG_REST + CATALOG_API_TYPE=AWS_GLUE for S3 Tables. +catalog_integrations: + - name: ci_p21_iceberg_prd + catalog_source: ICEBERG_REST + table_format: ICEBERG + catalog_namespace: p21 + rest_config: + catalog_uri: https://glue.us-east-1.amazonaws.com/iceberg + catalog_api_type: AWS_GLUE + catalog_name: '123456789012:s3tablescatalog/bcs-iceberg-raw-prd' + access_delegation_mode: VENDED_CREDENTIALS + rest_authentication: + type: SIGV4 + sigv4_iam_role: arn:aws:iam::123456789012:role/snowflake-s3-tables-read + sigv4_signing_region: us-east-1 + enabled: true + comment: 'P21 raw Iceberg tables (PRD) - S3 Tables federated catalog via ICEBERG_REST.' + +# storage_integrations: tells Snowflake where the Iceberg *data files* live. +# Bucket-level allow so any namespace (p21, spire, future sources) under +# the same bucket is reachable without per-namespace edits. The catalog +# integration's CATALOG_NAMESPACE controls which tables are actually exposed. +storage_integrations: + - name: si_p21_raw_prd + storage_provider: S3 + enabled: true + storage_aws_role_arn: arn:aws:iam::123456789012:role/snowflake-s3-tables-read + storage_allowed_locations: + - 's3://bcs-iceberg-raw-prd/' + storage_aws_object_acl: bucket-owner-full-control + comment: 'Snowflake read access to PRD raw Iceberg bucket (all namespaces).' +``` + +**Using it from Snowflake** (post-deploy, in a SQL worksheet — these statements are not managed by Snowcap): + +```sql +CREATE OR REPLACE ICEBERG TABLE raw_prd.p21.oe_hdr + CATALOG = 'CI_P21_ICEBERG_PRD' -- catalog_integration name, uppercased + EXTERNAL_VOLUME = 'SI_P21_RAW_PRD' -- storage_integration name, uppercased + CATALOG_TABLE_NAME = 'oe_hdr'; -- table inside namespace `p21` +``` + +**Verifying the integration is reachable** before pointing tables at it: + +```sql +DESC CATALOG INTEGRATION ci_p21_iceberg_prd; +SELECT SYSTEM$VERIFY_CATALOG_INTEGRATION('CI_P21_ICEBERG_PRD'); +``` + +**Gotchas we hit during onboarding:** + +* `CATALOG_SOURCE = GLUE` (the legacy `GlueCatalogIntegration` path) rejects the federated `:s3tablescatalog/` form for `GLUE_CATALOG_ID` with SQL compilation error 22023/1008. S3 Tables *must* go through `ICEBERG_REST` with `CATALOG_API_TYPE = AWS_GLUE` — that's why this resource exists. +* Lake Formation grants are easy to forget. Without `DESCRIBE` on the federated catalog and `SELECT` on the namespace, `DESC CATALOG INTEGRATION` succeeds but `CREATE ICEBERG TABLE ... FROM CATALOG` fails with a vague 403. +* `access_delegation_mode: VENDED_CREDENTIALS` is required if the writer (e.g., a `pyiceberg-rest` loader on EC2) relies on Lake Formation to vend temporary S3 credentials; without it the catalog returns data-file URIs the SIGV4 role can't read. +* The Glue Iceberg REST endpoint is regional — match `sigv4_signing_region` to the S3 Tables bucket region. + + ## Fields * `name` (string, required) - The name of the catalog integration.