Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Run examples

on:
pull_request:
push:
branches: [ main ]

permissions:
contents: read

jobs:
example:
name: Run ${{ matrix.example }}
runs-on: ubuntu-latest
strategy:
matrix:
example:
- builder_example.py
- duckdb_example.py
- adbc_example.py
- pyarrow_example.py
steps:
- name: Checkout code
uses: actions/checkout@v5
with:
submodules: recursive
- name: Install uv with python
uses: astral-sh/setup-uv@v7
with:
python-version: "3.10"
- name: Install package dependencies
run: |
uv sync --frozen --extra extensions
- name: Run ${{ matrix.example }}
run: |
uv run examples/${{ matrix.example }}
2 changes: 1 addition & 1 deletion examples/adbc_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def read_adbc_named_table(name: str, conn):
table = filter(
table,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"gte",
expressions=[column("ints"), literal(3, i64())],
),
Expand Down
59 changes: 33 additions & 26 deletions examples/builder_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
def basic_example():
ns = named_struct(
names=["id", "is_applicable"],
struct=struct(types=[i64(nullable=False), boolean()]),
struct=struct(types=[i64(nullable=False), boolean()], nullable=False),
)

table = read_named_table("example_table", ns)
table = filter(table, expression=column("is_applicable"))
table = filter(
table,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"lt",
expressions=[column("id"), literal(100, i64())],
expressions=[column("id"), literal(100, i64(nullable=False))],
),
)
table = project(table, expressions=[column("id")])
Expand All @@ -41,14 +41,15 @@ def basic_example():

"""
extension_uris {
extension_uri_anchor: 13
uri: "functions_comparison.yaml"
extension_uri_anchor: 2
uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml"
}
extensions {
extension_function {
extension_uri_reference: 13
function_anchor: 495
name: "lt"
extension_uri_reference: 2
function_anchor: 124
name: "lt:any_any"
extension_urn_reference: 2
}
}
relations {
Expand Down Expand Up @@ -84,7 +85,7 @@ def basic_example():
nullability: NULLABILITY_NULLABLE
}
}
nullability: NULLABILITY_NULLABLE
nullability: NULLABILITY_REQUIRED
}
}
named_table {
Expand All @@ -107,10 +108,10 @@ def basic_example():
}
condition {
scalar_function {
function_reference: 495
function_reference: 124
output_type {
bool {
nullability: NULLABILITY_NULLABLE
nullability: NULLABILITY_REQUIRED
}
}
arguments {
Expand All @@ -129,7 +130,6 @@ def basic_example():
value {
literal {
i64: 100
nullable: true
}
}
}
Expand All @@ -152,25 +152,29 @@ def basic_example():
names: "id"
}
}
"""
extension_urns {
extension_urn_anchor: 2
urn: "extension:io.substrait:functions_comparison"
}
"""


def advanced_example():
print("=== Simple Example ===")
# Simple example (original)
ns = named_struct(
names=["id", "is_applicable"],
struct=struct(types=[i64(nullable=False), boolean()]),
struct=struct(types=[i64(nullable=False), boolean()], nullable=False),
)

table = read_named_table("example_table", ns)
table = filter(table, expression=column("is_applicable"))
table = filter(
table,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"lt",
expressions=[column("id"), literal(100, i64())],
expressions=[column("id"), literal(100, i64(nullable=False))],
),
)
table = project(table, expressions=[column("id")])
Expand All @@ -190,7 +194,8 @@ def advanced_example():
string(nullable=False), # name
i64(nullable=False), # age
fp64(nullable=False), # salary
]
],
nullable=False,
),
)

Expand All @@ -200,7 +205,7 @@ def advanced_example():
adult_users = filter(
users,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"gt",
expressions=[column("age"), literal(25, i64())],
),
Expand All @@ -216,7 +221,7 @@ def advanced_example():
column("salary"),
# Add a calculated field (this would show function options if available)
scalar_function(
"functions_arithmetic.yaml",
"extension:io.substrait:functions_arithmetic",
"multiply",
expressions=[column("salary"), literal(1.1, fp64())],
alias="salary_with_bonus",
Expand All @@ -238,7 +243,8 @@ def advanced_example():
i64(nullable=False), # order_id
fp64(nullable=False), # amount
string(nullable=False), # status
]
],
nullable=False,
),
)

Expand All @@ -248,7 +254,7 @@ def advanced_example():
high_value_orders = filter(
orders,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"gt",
expressions=[column("amount"), literal(50.0, fp64())],
),
Expand Down Expand Up @@ -280,16 +286,16 @@ def expression_only_example():
print("=== Expression-Only Example ===")
# Show complex expression structure
complex_expr = scalar_function(
"functions_arithmetic.yaml",
"extension:io.substrait:functions_arithmetic",
"multiply",
expressions=[
scalar_function(
"functions_arithmetic.yaml",
"extension:io.substrait:functions_arithmetic",
"add",
expressions=[
column("base_salary"),
scalar_function(
"functions_arithmetic.yaml",
"extension:io.substrait:functions_arithmetic",
"multiply",
expressions=[
column("base_salary"),
Expand All @@ -299,7 +305,7 @@ def expression_only_example():
],
),
scalar_function(
"functions_arithmetic.yaml",
"extension:io.substrait:functions_arithmetic",
"subtract",
expressions=[
literal(1.0, fp64()),
Expand All @@ -312,7 +318,8 @@ def expression_only_example():
print("Complex salary calculation expression:")
# Create a simple plan to wrap the expression
dummy_schema = named_struct(
names=["base_salary"], struct=struct(types=[fp64(nullable=False)])
names=["base_salary"],
struct=struct(types=[fp64(nullable=False)], nullable=False),
)
dummy_table = read_named_table("dummy", dummy_schema)
dummy_plan = project(dummy_table, expressions=[complex_expr])
Expand Down
8 changes: 3 additions & 5 deletions examples/duckdb_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from substrait.builders.extended_expression import column, scalar_function, literal
from substrait.builders.type import i32
from substrait.extension_registry import ExtensionRegistry
from substrait.json import dump_json
import pyarrow.substrait as pa_substrait

try:
Expand Down Expand Up @@ -42,14 +41,13 @@ def read_duckdb_named_table(name: str, conn):
table = filter(
table,
expression=scalar_function(
"functions_comparison.yaml",
"extension:io.substrait:functions_comparison",
"equal",
expressions=[column("c_nationkey"), literal(3, i32())],
),
)
table = project(
table, expressions=[column("c_name"), column("c_address"), column("c_nationkey")]
)

sql = f"CALL from_substrait_json('{dump_json(table(registry))}')"
print(duckdb.sql(sql))
sql = "CALL from_substrait(?)"
print(duckdb.sql(sql, params=[table(registry).SerializeToString()]))
4 changes: 2 additions & 2 deletions src/substrait/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
except ImportError:
pass

__substrait_version__ = "0.74.0"
__substrait_hash__ = "793c64b"
__substrait_version__ = "0.77.0"
__substrait_hash__ = "3c25b1b"
__minimum_substrait_version__ = "0.30.0"
102 changes: 102 additions & 0 deletions src/substrait/bimap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""
Bidirectional map for URI <-> URN conversion during the migration period.

This module provides a UriUrnBiDiMap class that maintains a bidirectional mapping
between URIs and URNs.

NOTE: This file is temporary and can be removed once the URI -> URN migration
is complete across all Substrait implementations. At that point, only URN-based
extension references will be used.
"""

from typing import Optional


class UriUrnBiDiMap:
"""Bidirectional map for URI <-> URN mappings.

Maintains two internal dictionaries to enable O(1) lookups in both directions.
Enforces that each URI maps to exactly one URN and vice versa.
"""

def __init__(self):
self._uri_to_urn: dict[str, str] = {}
self._urn_to_uri: dict[str, str] = {}

def put(self, uri: str, urn: str) -> None:
"""Add a bidirectional URI <-> URN mapping.

Args:
uri: The extension URI (e.g., "https://github.com/.../functions_arithmetic.yaml")
urn: The extension URN (e.g., "extension:io.substrait:functions_arithmetic")

Raises:
ValueError: If the URI or URN already exists with a different mapping
"""
# Check for conflicts
if self.contains_uri(uri):
existing_urn = self.get_urn(uri)
if existing_urn != urn:
raise ValueError(
f"URI '{uri}' is already mapped to URN '{existing_urn}', "
f"cannot remap to '{urn}'"
)
# Already have this exact mapping, nothing to do
return

if self.contains_urn(urn):
existing_uri = self.get_uri(urn)
if existing_uri != uri:
raise ValueError(
f"URN '{urn}' is already mapped to URI '{existing_uri}', "
f"cannot remap to '{uri}'"
)
# Already have this exact mapping, nothing to do
return

self._uri_to_urn[uri] = urn
self._urn_to_uri[urn] = uri

def get_urn(self, uri: str) -> Optional[str]:
"""Convert a URI to its corresponding URN.

Args:
uri: The extension URI to look up

Returns:
The corresponding URN, or None if the URI is not in the map
"""
return self._uri_to_urn.get(uri)

def get_uri(self, urn: str) -> Optional[str]:
"""Convert a URN to its corresponding URI.

Args:
urn: The extension URN to look up

Returns:
The corresponding URI, or None if the URN is not in the map
"""
return self._urn_to_uri.get(urn)

def contains_uri(self, uri: str) -> bool:
"""Check if a URI exists in the map.

Args:
uri: The URI to check

Returns:
True if the URI is in the map, False otherwise
"""
return uri in self._uri_to_urn

def contains_urn(self, urn: str) -> bool:
"""Check if a URN exists in the map.

Args:
urn: The URN to check

Returns:
True if the URN is in the map, False otherwise
"""
return urn in self._urn_to_uri
Loading