diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8d3354b..a04e35f 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,8 +3,9 @@ USER vscode RUN curl -s "https://get.sdkman.io" | bash RUN bash -c "source $HOME/.sdkman/bin/sdkman-init.sh && \ sdk install java 25-graalce" -RUN mkdir -p ~/lib && cd ~/lib && curl -L -O http://www.antlr.org/download/antlr-4.13.2-complete.jar -ENV ANTLR_JAR="~/lib/antlr-4.13.2-complete.jar" +COPY scripts/setup_antlr.sh /tmp/setup_antlr.sh +RUN bash /tmp/setup_antlr.sh ~/lib +ENV ANTLR_JAR="~/lib/antlr-complete.jar" # protoc 29.5 is the last version with protobuf python v5 which is compatible with protoletariat v3 RUN cd ~ && curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v29.5/protoc-29.5-linux-x86_64.zip && \ unzip protoc-29.5-linux-x86_64.zip -d ~/.local && \ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 39d0164..d0d3b95 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,7 +34,7 @@ Run the upgrade script to upgrade the submodule and regenerate the protobuf stub ``` uv sync --extra gen_proto -uv run ./update_proto.sh +uv run scripts/update_proto.sh ``` Or run the proto codegen without updating the Substrait Git submodule: @@ -45,7 +45,7 @@ make codegen-proto ## Antlr grammar -Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and ANTLR_JAR environment variable is set. Take a look at .devcontainer/Dockerfile for example setup. +Substrait uses antlr grammar to derive output types of extension functions. Make sure java is installed and antlr, by using running `make setup-antlr`. ``` make antlr @@ -74,3 +74,22 @@ Run tests in the project's root dir. uv sync --extra test uv run pytest ``` + +# Pre-Push Checklist + +Before pushing your changes, run the following command to ensure all requirements are met: + +``` +make pre_push +``` + +This command performs the following checks and updates: +1. Sets up ANTLR dependencies (`setup-antlr`) +2. Formats code with ruff (`format`) +3. Fixes linting issues with ruff (`lint_fix`) +4. Regenerates ANTLR grammar (`antlr`) +5. Regenerates extension stubs (`codegen-extensions`) +6. Syncs dependencies (`uv sync --extra test`) +7. Runs tests (`uv run pytest`) + +This ensures your code is properly formatted, linted, all generated files are up-to-date, and all tests pass. diff --git a/Makefile b/Makefile index 865b20f..0d12557 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,13 @@ +setup-antlr: + @bash scripts/setup_antlr.sh > /dev/null + + codegen: antlr codegen-proto codegen-extensions codegen-version -antlr: +antlr: setup-antlr cd third_party/substrait/grammar \ - && java -jar ${ANTLR_JAR} -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \ + && java -jar ../../../lib/antlr-complete.jar -o ../../../src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4 \ && rm ../../../src/substrait/gen/antlr/*.tokens \ && rm ../../../src/substrait/gen/antlr/*.interp @@ -13,7 +17,7 @@ codegen-version: && echo '"' >> src/substrait/gen/version.py codegen-proto: - ./gen_proto.sh + ./scripts/gen_proto.sh codegen-extensions: uv run --with datamodel-code-generator datamodel-codegen \ @@ -32,3 +36,7 @@ lint_fix: format: uvx ruff@0.11.11 format + +pre_push: format lint_fix antlr codegen-extensions + uv sync --extra test + uv run pytest diff --git a/gen_proto.sh b/scripts/gen_proto.sh similarity index 100% rename from gen_proto.sh rename to scripts/gen_proto.sh diff --git a/scripts/setup_antlr.sh b/scripts/setup_antlr.sh new file mode 100644 index 0000000..c8df094 --- /dev/null +++ b/scripts/setup_antlr.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Setup ANTLR for Substrait Python +# Usage: setup_antlr.sh [ANTLR_JAR_DIR] +# If ANTLR_JAR_DIR is not provided, defaults to project root/lib + +set -e + +ANTLR_VERSION="4.13.2" + +# Determine ANTLR_JAR_DIR +if [ -n "$1" ]; then + # Use provided argument + ANTLR_JAR_DIR="$1" +else + ANTLR_JAR_DIR="lib" +fi + +ANTLR_JAR="${ANTLR_JAR_DIR}/antlr-complete.jar" +ANTLR_URL="https://www.antlr.org/download/antlr-${ANTLR_VERSION}-complete.jar" +VERSION_FILE="${ANTLR_JAR_DIR}/.antlr_version" + +echo "Setting up ANTLR ${ANTLR_VERSION}..." >&2 + +# Create directory if it doesn't exist +mkdir -p "${ANTLR_JAR_DIR}" + +# Check if installed version matches required version +INSTALLED_VERSION="" +if [ -f "${VERSION_FILE}" ]; then + INSTALLED_VERSION=$(cat "${VERSION_FILE}") +fi + +if [ "${INSTALLED_VERSION}" = "${ANTLR_VERSION}" ] && [ -f "${ANTLR_JAR}" ]; then + echo "ANTLR ${ANTLR_VERSION} is already installed" >&2 +else + echo "Downloading ANTLR ${ANTLR_VERSION}..." >&2 + rm -f "${ANTLR_JAR}" + if ! curl -s -L -f -o "${ANTLR_JAR}" "${ANTLR_URL}"; then + echo "Failed to download ANTLR from ${ANTLR_URL}" >&2 + exit 1 + fi + echo "${ANTLR_VERSION}" > "${VERSION_FILE}" + echo "ANTLR ${ANTLR_VERSION} downloaded successfully" >&2 +fi + +# Output the path so it can be captured +echo "${ANTLR_JAR}" diff --git a/update_proto.sh b/scripts/update_proto.sh similarity index 100% rename from update_proto.sh rename to scripts/update_proto.sh