Skip to content

Commit 9bb6bb9

Browse files
feat: add substrait spec version to plan (#132)
This PR retrieves the Substrait spec version from the Git submodule and writes it into the plan when using the builders. Signed-off-by: Niels Pardon <[email protected]> Co-authored-by: Andrew Coleman <[email protected]>
1 parent ddede68 commit 9bb6bb9

File tree

17 files changed

+101
-102
lines changed

17 files changed

+101
-102
lines changed

.github/workflows/codegen-check.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ jobs:
2020
uses: devcontainers/[email protected]
2121
with:
2222
runCmd: |
23+
# fetch submodule tags since actions/checkout does not
24+
git submodule foreach 'git fetch --unshallow || true'
2325
# Ensure dependencies are installed
2426
uv sync --extra test --extra gen_proto
2527
# Run all code generation steps
26-
make antlr
27-
./gen_proto.sh
28-
make codegen-extensions
28+
make codegen
2929
3030
- name: Check for uncommitted changes
3131
run: |
@@ -36,9 +36,7 @@ jobs:
3636
git diff src/substrait/gen/
3737
echo ""
3838
echo "To fix this, run:"
39-
echo " make antlr"
40-
echo " ./gen_proto.sh"
41-
echo " make codegen-extensions"
39+
echo " make codegen"
4240
echo "Then commit the changes."
4341
exit 1
4442
fi

CONTRIBUTING.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ git submodule update --init --recursive
2222

2323
# Code generation
2424

25+
You can run the full code generation using the following command or use the individual commands to selectively regenerate the generated code. This does not update the Substrait Git submodule.
26+
27+
```
28+
make codegen
29+
```
30+
2531
## Protobuf stubs
2632

2733
Run the upgrade script to upgrade the submodule and regenerate the protobuf stubs.
@@ -31,6 +37,12 @@ uv sync --extra gen_proto
3137
uv run ./update_proto.sh <version>
3238
```
3339

40+
Or run the proto codegen without updating the Substrait Git submodule:
41+
42+
```
43+
make codegen-proto
44+
```
45+
3446
## Antlr grammar
3547

3648
Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and ANTLR_JAR environment variable is set. Take a look at .devcontainer/Dockerfile for example setup.

Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
1+
codegen: antlr codegen-proto codegen-extensions codegen-version
2+
3+
14
antlr:
25
cd third_party/substrait/grammar \
36
&& java -jar ${ANTLR_JAR} -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \
47
&& rm ../../../src/substrait/gen/antlr/*.tokens \
58
&& rm ../../../src/substrait/gen/antlr/*.interp
69

10+
codegen-version:
11+
echo -n 'substrait_version = "' > src/substrait/gen/version.py \
12+
&& cd third_party/substrait && git describe --tags | tr -d 'v\n' >> ../../src/substrait/gen/version.py && cd ../.. \
13+
&& echo '"' >> src/substrait/gen/version.py
14+
15+
codegen-proto:
16+
./gen_proto.sh
17+
718
codegen-extensions:
819
uv run --with datamodel-code-generator datamodel-codegen \
920
--input-file-type jsonschema \

src/substrait/builders/plan.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
See `examples/builder_example.py` for usage.
66
"""
77

8+
import re
89
from typing import Callable, Iterable, Optional, Union
910

1011
import substrait.gen.proto.algebra_pb2 as stalg
@@ -23,12 +24,27 @@
2324
merge_extension_uris,
2425
merge_extension_urns,
2526
)
27+
from substrait.gen.version import substrait_version
2628

2729
UnboundPlan = Callable[[ExtensionRegistry], stp.Plan]
2830

2931
PlanOrUnbound = Union[stp.Plan, UnboundPlan]
3032

3133

34+
def _create_default_version():
35+
p = re.compile(r"(\d+)\.(\d+)\.(\d+)")
36+
m = p.match(substrait_version)
37+
global default_version
38+
default_version = stp.Version(
39+
major_number=int(m.group(1)),
40+
minor_number=int(m.group(2)),
41+
patch_number=int(m.group(3)),
42+
)
43+
44+
45+
_create_default_version()
46+
47+
3248
def _merge_extensions(*objs):
3349
"""Merge extension URIs, URNs, and declarations from multiple plan/expression objects.
3450
@@ -65,9 +81,10 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
6581
)
6682

6783
return stp.Plan(
84+
version=default_version,
6885
relations=[
6986
stp.PlanRel(root=stalg.RelRoot(input=rel, names=named_struct.names))
70-
]
87+
],
7188
)
7289

7390
return resolve
@@ -107,6 +124,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
107124
)
108125

109126
return stp.Plan(
127+
version=default_version,
110128
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
111129
**_merge_extensions(_plan, *bound_expressions),
112130
)
@@ -137,6 +155,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
137155
names = ns.names
138156

139157
return stp.Plan(
158+
version=default_version,
140159
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
141160
**_merge_extensions(bound_plan, bound_expression),
142161
)
@@ -183,6 +202,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
183202
)
184203

185204
return stp.Plan(
205+
version=default_version,
186206
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
187207
**_merge_extensions(bound_plan, *[e[0] for e in bound_expressions]),
188208
)
@@ -200,6 +220,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
200220
)
201221

202222
return stp.Plan(
223+
version=default_version,
203224
relations=[
204225
stp.PlanRel(
205226
root=stalg.RelRoot(
@@ -238,6 +259,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
238259
)
239260

240261
return stp.Plan(
262+
version=default_version,
241263
relations=[
242264
stp.PlanRel(
243265
root=stalg.RelRoot(
@@ -286,6 +308,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
286308
)
287309

288310
return stp.Plan(
311+
version=default_version,
289312
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
290313
**_merge_extensions(bound_left, bound_right, bound_expression),
291314
)
@@ -321,6 +344,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
321344
)
322345

323346
return stp.Plan(
347+
version=default_version,
324348
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=ns.names))],
325349
**_merge_extensions(bound_left, bound_right),
326350
)
@@ -372,6 +396,7 @@ def resolve(registry: ExtensionRegistry) -> stp.Plan:
372396
] + [e.referred_expr[0].output_names[0] for e in bound_measures]
373397

374398
return stp.Plan(
399+
version=default_version,
375400
relations=[stp.PlanRel(root=stalg.RelRoot(input=rel, names=names))],
376401
**_merge_extensions(
377402
bound_input, *bound_grouping_expressions, *bound_measures

src/substrait/gen/__init__.pyi

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/substrait/gen/version.py

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/builders/plan/test_aggregate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import substrait.gen.proto.algebra_pb2 as stalg
44
import substrait.gen.proto.extensions.extensions_pb2 as ste
55
from substrait.builders.type import boolean, i64
6-
from substrait.builders.plan import read_named_table, aggregate
6+
from substrait.builders.plan import read_named_table, aggregate, default_version
77
from substrait.builders.extended_expression import column, aggregate_function
88
from substrait.extension_registry import ExtensionRegistry
99
from substrait.type_inference import infer_plan_schema
@@ -56,6 +56,7 @@ def test_aggregate():
5656
ns = infer_plan_schema(table(None))
5757

5858
expected = stp.Plan(
59+
version=default_version,
5960
extension_urns=[
6061
ste.SimpleExtensionURN(extension_urn_anchor=1, urn="extension:test:urn")
6162
],

tests/builders/plan/test_cross.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64, string
5-
from substrait.builders.plan import read_named_table, cross
5+
from substrait.builders.plan import read_named_table, cross, default_version
66
from substrait.extension_registry import ExtensionRegistry
77

88
registry = ExtensionRegistry(load_default_extensions=False)
@@ -28,6 +28,7 @@ def test_cross_join():
2828
actual = cross(table, table2)(registry)
2929

3030
expected = stp.Plan(
31+
version=default_version,
3132
relations=[
3233
stp.PlanRel(
3334
root=stalg.RelRoot(
@@ -40,7 +41,7 @@ def test_cross_join():
4041
names=["id", "is_applicable", "fk_id", "name"],
4142
)
4243
)
43-
]
44+
],
4445
)
4546

4647
assert actual == expected

tests/builders/plan/test_fetch.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64
5-
from substrait.builders.plan import read_named_table, fetch
5+
from substrait.builders.plan import read_named_table, fetch, default_version
66
from substrait.builders.extended_expression import literal
77
from substrait.extension_registry import ExtensionRegistry
88

@@ -24,6 +24,7 @@ def test_fetch():
2424
actual = fetch(table, offset=offset, count=count)(registry)
2525

2626
expected = stp.Plan(
27+
version=default_version,
2728
relations=[
2829
stp.PlanRel(
2930
root=stalg.RelRoot(
@@ -37,7 +38,7 @@ def test_fetch():
3738
names=["id", "is_applicable"],
3839
)
3940
)
40-
]
41+
],
4142
)
4243

4344
assert actual == expected

tests/builders/plan/test_filter.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import substrait.gen.proto.plan_pb2 as stp
33
import substrait.gen.proto.algebra_pb2 as stalg
44
from substrait.builders.type import boolean, i64
5-
from substrait.builders.plan import read_named_table, filter
5+
from substrait.builders.plan import read_named_table, filter, default_version
66
from substrait.builders.extended_expression import literal
77
from substrait.extension_registry import ExtensionRegistry
88

@@ -21,6 +21,7 @@ def test_filter():
2121
actual = filter(table, literal(True, boolean()))(registry)
2222

2323
expected = stp.Plan(
24+
version=default_version,
2425
relations=[
2526
stp.PlanRel(
2627
root=stalg.RelRoot(
@@ -37,7 +38,7 @@ def test_filter():
3738
names=["id", "is_applicable"],
3839
)
3940
)
40-
]
41+
],
4142
)
4243

4344
assert actual == expected

0 commit comments

Comments
 (0)