Skip to content

Commit 674f8e7

Browse files
committed
feat: add substrait spec version to plan
This PR retrieves the Substrait spec version from the Git submodule and writes it into the plan when using the builders. Signed-off-by: Niels Pardon <[email protected]>
1 parent fa12088 commit 674f8e7

File tree

17 files changed

+96
-102
lines changed

17 files changed

+96
-102
lines changed

.github/workflows/codegen-check.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ jobs:
2020
uses: devcontainers/[email protected]
2121
with:
2222
runCmd: |
23+
# fetch submodule tags since actions/checkout does not
24+
git submodule foreach 'git fetch --unshallow || true'
2325
# Ensure dependencies are installed
2426
uv sync --extra test --extra gen_proto
2527
# Run all code generation steps
26-
make antlr
27-
./gen_proto.sh
28-
make codegen-extensions
28+
make codegen
2929
3030
- name: Check for uncommitted changes
3131
run: |
@@ -36,9 +36,7 @@ jobs:
3636
git diff src/substrait/gen/
3737
echo ""
3838
echo "To fix this, run:"
39-
echo " make antlr"
40-
echo " ./gen_proto.sh"
41-
echo " make codegen-extensions"
39+
echo " make codegen"
4240
echo "Then commit the changes."
4341
exit 1
4442
fi

CONTRIBUTING.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ git submodule update --init --recursive
2222

2323
# Code generation
2424

25+
You can run the full code generation using the following command or use the individual commands to selectively regenerate the generated code. This does not update the Substrait Git submodule.
26+
27+
```
28+
make codegen
29+
```
30+
2531
## Protobuf stubs
2632

2733
Run the upgrade script to upgrade the submodule and regenerate the protobuf stubs.
@@ -31,6 +37,12 @@ uv sync --extra gen_proto
3137
uv run ./update_proto.sh <version>
3238
```
3339

40+
Or run the proto codegen without updating the Substrait Git submodule:
41+
42+
```
43+
make codegen-proto
44+
```
45+
3446
## Antlr grammar
3547

3648
Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and ANTLR_JAR environment variable is set. Take a look at .devcontainer/Dockerfile for example setup.

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
1+
codegen: antlr codegen-proto codegen-extensions codegen-version
2+
3+
14
antlr:
25
cd third_party/substrait/grammar \
36
&& java -jar ${ANTLR_JAR} -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \
47
&& rm ../../../src/substrait/gen/antlr/*.tokens \
58
&& rm ../../../src/substrait/gen/antlr/*.interp
69

10+
codegen-version:
11+
echo -n 'substrait_version = "' > src/substrait/gen/version.py \
12+
&& cd third_party/substrait && git describe --tags | tr -d 'v\n' >> ../../src/substrait/gen/version.py && cd ../.. \
13+
&& echo '"' >> src/substrait/gen/version.py
14+
15+
codegen-proto:
16+
./gen_proto.sh
17+
718
codegen-extensions:
819
uv run --with datamodel-code-generator datamodel-codegen \
920
--input-file-type jsonschema \

src/substrait/builders/plan.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
from typing import Iterable, Optional, Union, Callable
9+
import re
910

1011
import substrait.gen.proto.algebra_pb2 as stalg
1112
from substrait.gen.proto.extensions.extensions_pb2 import AdvancedExtension
@@ -23,11 +24,21 @@
2324
merge_extension_urns,
2425
merge_extension_uris,
2526
)
27+
from substrait.gen.version import substrait_version
2628

2729
UnboundPlan = Callable[[ExtensionRegistry], stp.Plan]
2830

2931
PlanOrUnbound = Union[stp.Plan, UnboundPlan]
3032

33+
p = re.compile(r"(\d+)\.(\d+)\.(\d+)")
34+
m = p.match(substrait_version)
35+
major = int(m.group(1))
36+
minor = int(m.group(2))
37+
patch = int(m.group(3))
38+
default_version = stp.Version(
39+
major_number=major, minor_number=minor, patch_number=patch
40+
)
41+
3142

3243
def _merge_extensions(*objs):
3344
"""Merge extension URIs, URNs, and declarations from multiple plan/expression objects.
@@ -65,9 +76,10 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
6576
)
6677

6778
return stp.Plan(
79+
version=default_version,
6880
relations=[
6981
stp.PlanRel(root=stalg.RelRoot(input=rel, names=named_struct.names))
70-
]
82+
],
7183
)
7284

7385
return resolve
@@ -107,6 +119,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
107119
)
108120

109121
return stp.Plan(
122+
version=default_version,
110123
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
111124
**_merge_extensions(_plan, *bound_expressions),
112125
)
@@ -137,6 +150,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
137150
names = ns.names
138151

139152
return stp.Plan(
153+
version=default_version,
140154
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
141155
**_merge_extensions(bound_plan, bound_expression),
142156
)
@@ -183,6 +197,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
183197
)
184198

185199
return stp.Plan(
200+
version=default_version,
186201
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
187202
**_merge_extensions(bound_plan, *[e[0] for e in bound_expressions]),
188203
)
@@ -200,6 +215,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
200215
)
201216

202217
return stp.Plan(
218+
version=default_version,
203219
relations=[
204220
stp.PlanRel(
205221
root=stalg.RelRoot(
@@ -238,6 +254,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
238254
)
239255

240256
return stp.Plan(
257+
version=default_version,
241258
relations=[
242259
stp.PlanRel(
243260
root=stalg.RelRoot(
@@ -286,6 +303,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
286303
)
287304

288305
return stp.Plan(
306+
version=default_version,
289307
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
290308
**_merge_extensions(bound_left, bound_right, bound_expression),
291309
)
@@ -321,6 +339,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
321339
)
322340

323341
return stp.Plan(
342+
version=default_version,
324343
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
325344
**_merge_extensions(bound_left, bound_right),
326345
)
@@ -372,6 +391,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
372391
] + [e.referred_expr[0].output_names[0] for e in bound_measures]
373392

374393
return stp.Plan(
394+
version=default_version,
375395
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
376396
**_merge_extensions(
377397
bound_input, *bound_grouping_expressions, *bound_measures

src/substrait/gen/__init__.pyi

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/substrait/gen/version.py

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/builders/plan/test_aggregate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import substrait.gen.proto.algebra_pb2 as stalg
44
import substrait.gen.proto.extensions.extensions_pb2 as ste
55
from substrait.builders.type import boolean, i64
6-
from substrait.builders.plan import read_named_table, aggregate
6+
from substrait.builders.plan import read_named_table, aggregate, default_version
77
from substrait.builders.extended_expression import column, aggregate_function
88
from substrait.extension_registry import ExtensionRegistry
99
from substrait.type_inference import infer_plan_schema
@@ -56,6 +56,7 @@ def test_aggregate():
5656
ns = infer_plan_schema(table(None))
5757

5858
expected = stp.Plan(
59+
version=default_version,
5960
extension_urns=[
6061
ste.SimpleExtensionURN(extension_urn_anchor=1, urn="extension:test:urn")
6162
],

tests/builders/plan/test_cross.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64, string
5-
from substrait.builders.plan import read_named_table, cross
5+
from substrait.builders.plan import read_named_table, cross, default_version
66
from substrait.extension_registry import ExtensionRegistry
77

88
registry = ExtensionRegistry(load_default_extensions=False)
@@ -28,6 +28,7 @@ def test_cross_join():
2828
actual = cross(table, table2)(registry)
2929

3030
expected = stp.Plan(
31+
version=default_version,
3132
relations=[
3233
stp.PlanRel(
3334
root=stalg.RelRoot(
@@ -40,7 +41,7 @@ def test_cross_join():
4041
names=["id", "is_applicable", "fk_id", "name"],
4142
)
4243
)
43-
]
44+
],
4445
)
4546

4647
assert actual == expected

tests/builders/plan/test_fetch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64
5-
from substrait.builders.plan import read_named_table, fetch
5+
from substrait.builders.plan import read_named_table, fetch, default_version
66
from substrait.builders.extended_expression import literal
77
from substrait.extension_registry import ExtensionRegistry
88

@@ -24,6 +24,7 @@ def test_fetch():
2424
actual = fetch(table, offset=offset, count=count)(registry)
2525

2626
expected = stp.Plan(
27+
version=default_version,
2728
relations=[
2829
stp.PlanRel(
2930
root=stalg.RelRoot(
@@ -37,7 +38,7 @@ def test_fetch():
3738
names=["id", "is_applicable"],
3839
)
3940
)
40-
]
41+
],
4142
)
4243

4344
assert actual == expected

tests/builders/plan/test_filter.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64
5-
from substrait.builders.plan import read_named_table, filter
5+
from substrait.builders.plan import read_named_table, filter, default_version
66
from substrait.builders.extended_expression import literal
77
from substrait.extension_registry import ExtensionRegistry
88

@@ -21,6 +21,7 @@ def test_filter():
2121
actual = filter(table, literal(True, boolean()))(registry)
2222

2323
expected = stp.Plan(
24+
version=default_version,
2425
relations=[
2526
stp.PlanRel(
2627
root=stalg.RelRoot(
@@ -37,7 +38,7 @@ def test_filter():
3738
names=["id", "is_applicable"],
3839
)
3940
)
40-
]
41+
],
4142
)
4243

4344
assert actual == expected

0 commit comments

Comments
 (0)