Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ USER vscode
RUN curl -s "https://get.sdkman.io" | bash
RUN bash -c "source $HOME/.sdkman/bin/sdkman-init.sh && \
sdk install java 25-graalce"
RUN mkdir -p ~/lib && cd ~/lib && curl -L -O http://www.antlr.org/download/antlr-4.13.2-complete.jar
ENV ANTLR_JAR="~/lib/antlr-4.13.2-complete.jar"
COPY scripts/setup_antlr.sh /tmp/setup_antlr.sh
RUN bash /tmp/setup_antlr.sh ~/lib
ENV ANTLR_JAR="~/lib/antlr-complete.jar"
# protoc 29.5 is the last version with protobuf python v5 which is compatible with protoletariat v3
RUN cd ~ && curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.5/protoc-29.5-linux-x86_64.zip && \
unzip protoc-29.5-linux-x86_64.zip -d ~/.local && \
Expand Down
23 changes: 21 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Run the upgrade script to upgrade the submodule and regenerate the protobuf stub

```
uv sync --extra gen_proto
uv run ./update_proto.sh <version>
uv run scripts/update_proto.sh <version>
```

Or run the proto codegen without updating the Substrait Git submodule:
Expand All @@ -45,7 +45,7 @@ make codegen-proto

## Antlr grammar

Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and ANTLR_JAR environment variable is set. Take a look at .devcontainer/Dockerfile for example setup.
Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and antlr, by using running `make setup-antlr`.

```
make antlr
Expand Down Expand Up @@ -74,3 +74,22 @@ Run tests in the project's root dir.
uv sync --extra test
uv run pytest
```

# Pre-Push Checklist

Before pushing your changes, run the following command to ensure all requirements are met:

```
make pre_push
```

This command performs the following checks and updates:
1. Sets up ANTLR dependencies (`setup-antlr`)
2. Formats code with ruff (`format`)
3. Fixes linting issues with ruff (`lint_fix`)
4. Regenerates ANTLR grammar (`antlr`)
5. Regenerates extension stubs (`codegen-extensions`)
6. Syncs dependencies (`uv sync --extra test`)
7. Runs tests (`uv run pytest`)

This ensures your code is properly formatted, linted, all generated files are up-to-date, and all tests pass.
14 changes: 11 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
setup-antlr:
@bash scripts/setup_antlr.sh > /dev/null


codegen: antlr codegen-proto codegen-extensions codegen-version


antlr:
antlr: setup-antlr
cd third_party/substrait/grammar \
&& java -jar ${ANTLR_JAR} -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \
&& java -jar ../../../lib/antlr-complete.jar -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \
&& rm ../../../src/substrait/gen/antlr/*.tokens \
&& rm ../../../src/substrait/gen/antlr/*.interp

Expand All @@ -13,7 +17,7 @@ codegen-version:
&& echo '"' >> src/substrait/gen/version.py

codegen-proto:
./gen_proto.sh
./scripts/gen_proto.sh

codegen-extensions:
uv run --with datamodel-code-generator datamodel-codegen \
Expand All @@ -32,3 +36,7 @@ lint_fix:

format:
uvx [email protected] format

pre_push: format lint_fix antlr codegen-extensions
uv sync --extra test
uv run pytest
File renamed without changes.
47 changes: 47 additions & 0 deletions scripts/setup_antlr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# Setup ANTLR for Substrait Python
# Usage: setup_antlr.sh [ANTLR_JAR_DIR]
# If ANTLR_JAR_DIR is not provided, defaults to project root/lib

set -e

ANTLR_VERSION="4.13.2"

# Determine ANTLR_JAR_DIR
if [ -n "$1" ]; then
# Use provided argument
ANTLR_JAR_DIR="$1"
else
ANTLR_JAR_DIR="lib"
fi

ANTLR_JAR="${ANTLR_JAR_DIR}/antlr-complete.jar"
ANTLR_URL="https://www.antlr.org/download/antlr-${ANTLR_VERSION}-complete.jar"
VERSION_FILE="${ANTLR_JAR_DIR}/.antlr_version"

echo "Setting up ANTLR ${ANTLR_VERSION}..." >&2

# Create directory if it doesn't exist
mkdir -p "${ANTLR_JAR_DIR}"

# Check if installed version matches required version
INSTALLED_VERSION=""
if [ -f "${VERSION_FILE}" ]; then
INSTALLED_VERSION=$(cat "${VERSION_FILE}")
fi

if [ "${INSTALLED_VERSION}" = "${ANTLR_VERSION}" ] && [ -f "${ANTLR_JAR}" ]; then
echo "ANTLR ${ANTLR_VERSION} is already installed" >&2
else
echo "Downloading ANTLR ${ANTLR_VERSION}..." >&2
rm -f "${ANTLR_JAR}"
if ! curl -s -L -f -o "${ANTLR_JAR}" "${ANTLR_URL}"; then
echo "Failed to download ANTLR from ${ANTLR_URL}" >&2
exit 1
fi
echo "${ANTLR_VERSION}" > "${VERSION_FILE}"
echo "ANTLR ${ANTLR_VERSION} downloaded successfully" >&2
fi

# Output the path so it can be captured
echo "${ANTLR_JAR}"
File renamed without changes.