Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
338 changes: 338 additions & 0 deletions .github/workflows/build-hsfs-jars.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
name: Build hsfs jars and Hudi/Spark dependencies with hopsfs

on:
workflow_call:
inputs:
ref:
description: 'The ref to checkout for the hopsworks-api repo, default is main'
required: false
type: string
default: 'main'
hopsfs-url:
description: 'The URL to use to get hopsfs package'
required: false
type: string
default: ''
jira_tag:
description: 'The tag to use for the jira release, default is the version from version.txt'
required: false
type: string
default: 'NOJIRA'
runner:
description: 'The runner to use, default is ubuntu-latest'
required: false
type: string
default: 'ubuntu-latest'
secrets:
NEXUS_HARBOR_PASSWORD:
description: 'Nexus harbor password'
required: true
outputs:
pom_version:
value: ${{ jobs.build-hsfs-jars.outputs.pom_version }}
description: 'The version from the pom.xml'
commit_hash:
value: ${{ jobs.build-hsfs-jars.outputs.commit_hash }}
description: 'The commit hash of the hopsworks-api repo'
jira_tag:
value: ${{ jobs.build-hsfs-jars.outputs.jira_tag }}
description: 'The jira tag used for the build'
hsfs_jar_with_dependencies_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_jar_with_dependencies_url }}
description: 'The URL of the hsfs jar with dependencies file'
hsfs_jar_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_jar_url }}
description: 'The URL of the hsfs jar file'
hsfs_spark_jar_with_dependencies_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_spark_jar_with_dependencies_url }}
description: 'The URL of the hsfs spark jar with dependencies file'
hsfs_spark_jar_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_spark_jar_url }}
description: 'The URL of the hsfs spark jar file'
hsfs_utils_jar_with_dependencies_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_utils_jar_with_dependencies_url }}
description: 'The URL of the hsfs utils jar with dependencies file'
hsfs_utils_jar_url:
value: ${{ jobs.build-hsfs-jars.outputs.hsfs_utils_jar_url }}
description: 'The URL of the hsfs utils jar file'
hudi_pom_version_no_jira:
value: ${{ jobs.build-hudi.outputs.pom_version_no_jira }}
description: 'The version from the pom.xml without the jira tag'
hudi_pom_version:
value: ${{ jobs.build-hudi.outputs.pom_version }}
description: 'The version from the pom.xml with the jira tag'
hudi_commit_hash:
value: ${{ jobs.build-hudi.outputs.commit_hash }}
description: 'The commit hash of the hudi repo that was built'
hudi_utils_jar_name:
value: ${{ jobs.build-hudi.outputs.hudi_utils_jar_name }}
description: 'The name of the hudi utilities jar that was built and uploaded to nexus'
hudi_utils_jar_url:
value: ${{ jobs.build-hudi.outputs.hudi_utils_jar_url }}
description: 'The url of the hudi utilities jar that was built and uploaded to nexus'
hudi_spark_jar_name:
value: ${{ jobs.build-hudi.outputs.hudi_spark_jar_name }}
description: 'The name of the hudi spark jar that was built and uploaded to nexus'
hudi_spark_jar_url:
value: ${{ jobs.build-hudi.outputs.hudi_spark_jar_url }}
description: 'The url of the hudi spark jar that was built and uploaded to nexus'
hopsfs_version:
value: ${{ jobs.build-hudi.outputs.hopsfs_version }}
description: 'The version of hopsfs that was used to build hudi'
spark_pom_version_no_jira:
value: ${{ jobs.build-spark.outputs.pom_version_no_jira }}
description: 'The pom version without the jira tag'
spark_pom_version:
value: ${{ jobs.build-spark.outputs.pom_version }}
description: 'The pom version with the jira tag'
spark_commit_hash:
value: ${{ jobs.build-spark.outputs.commit_hash }}
description: 'The commit hash of the spark repo'
spark_tar_name:
value: ${{ jobs.build-spark.outputs.spark_tar_name }}
description: 'The name of the spark tar file'
spark_tar_url:
value: ${{ jobs.build-spark.outputs.spark_tar_url }}
description: 'The url of the spark tar file'
spark_hopsfs_version:
value: ${{ jobs.build-spark.outputs.hopsfs_version }}
description: 'The version of hopsfs used in the build'
workflow_dispatch:
inputs:
ref:
description: 'The ref to checkout for the hopsworks-api repo, default is main'
required: false
type: string
default: 'main'
hopsfs-url:
description: 'The URL to use to get hopsfs package'
required: false
type: string
default: ''
jira_tag:
description: 'The tag to use for the jira release, default is the version from version.txt'
required: false
type: string
default: 'NOJIRA'
runner:
description: 'The runner to use, default is ubuntu-latest'
required: false
type: string
default: 'ubuntu-latest'
pull_request:


# Used to avoid error on PRs
env:
HUDI_VERSION: "1.0.2"
DEFAULT_SPARK_URL: https://repo.hops.works/master
DEFAULT_HUDI_URL: https://repo.hops.works/master

jobs:
build-hudi:
uses: logicalclocks/hudi/.github/workflows/build_hudi_with_hopsfs.yml@HWORKS-2204-vatj
with:
ref: HWORKS-2204-vatj
runner: ${{ inputs.runner || 'ubuntu-latest' }}
jira_tag: ${{ inputs.jira_tag || 'NOJIRA' }}
build: ${{ (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'build-hudi')) }}
secrets:
NEXUS_HARBOR_PASSWORD: ${{ secrets.NEXUS_HARBOR_PASSWORD }}

build-spark:
uses: logicalclocks/spark/.github/workflows/build_spark_with_hopsfs.yaml@IS-153
with:
ref: IS-153
runner: ${{ inputs.runner || 'ubuntu-latest' }}
jira_tag: ${{ inputs.jira_tag || 'NOJIRA' }}
build: ${{ (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'build-spark')) }}
secrets:
NEXUS_HARBOR_PASSWORD: ${{ secrets.NEXUS_HARBOR_PASSWORD }}

build-hsfs-jars:
needs:
- build-hudi
- build-spark
runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
outputs:
pom_version: ${{ steps.prep_version.outputs.pom_version }}
commit_hash: ${{ steps.prep_version.outputs.commit_hash }}
jira_tag: ${{ steps.extract_jira_tag.outputs.jira_tag }}
hsfs_utils_python_name: ${{ steps.prep_version.outputs.hsfs_utils_python_name }}
hsfs_utils_python_url: ${{ steps.prep_version.outputs.hsfs_utils_python_url }}
hsfs_jar_url: ${{ steps.prep_version.outputs.hsfs_jar_url }}
hsfs_jar_with_dependencies_url: ${{ steps.prep_version.outputs.hsfs_jar_with_dependencies_url }}
hsfs_spark_jar_url: ${{ steps.prep_version.outputs.hsfs_spark_jar_url }}
hsfs_spark_jar_with_dependencies_url: ${{ steps.prep_version.outputs.hsfs_spark_jar_with_dependencies_url }}
hsfs_utils_jar_url: ${{ steps.prep_version.outputs.hsfs_utils_jar_url }}
hsfs_utils_jar_with_dependencies_url: ${{ steps.prep_version.outputs.hsfs_utils_jar_with_dependencies_url }}
steps:
- name: Checkout hopsworks-api repo
uses: actions/checkout@v4
with:
repository: logicalclocks/hopsworks-api
ref: ${{ inputs.ref || 'main' }}
path: ${{ github.workspace }}/hopsworks-api

- name: Extract jira tag
id: extract_jira_tag
uses: logicalclocks/hosted-runners/.github/actions/extract-jira-tag@main
with:
branch_name: ${{ inputs.ref || github.head_ref || github.ref_name }}
jira_tag: ${{ inputs.jira_tag || 'NOJIRA' }}

- name: Set up .m2 settings.xml
shell: bash
working-directory: ${{ github.workspace }}/hopsworks-api/java/hsfs
env:
M2_HOME: ~/.m2
run: |
echo "M2_HOME var is $M2_HOME" >> $GITHUB_STEP_SUMMARY
mkdir -p ~/.m2
echo "<settings><servers>" > ~/.m2/settings.xml
echo "<server><id>HopsEE</id><username>${{ vars.NEXUS_HARBOR_USER }}</username><password>${{ secrets.NEXUS_HARBOR_PASSWORD }}</password><configuration></configuration></server>" >> ~/.m2/settings.xml
echo "<server><id>HiveEE</id><username>${{ vars.NEXUS_HARBOR_USER }}</username><password>${{ secrets.NEXUS_HARBOR_PASSWORD }}</password><configuration></configuration></server>" >> ~/.m2/settings.xml
echo "</servers></settings>" >> ~/.m2/settings.xml

- name: Cache maven
id: cache-maven
uses: actions/cache@v4
with:
path: |
~/.m2
!~/.m2/settings.xml
key: ${{ runner.os }}-maven-hsfs-${{ hashFiles('hopsworks-api/**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-hsfs-

- name: Prep step version
shell: bash
id: prep_version
working-directory: ${{ github.workspace }}/hopsworks-api/java/hsfs
run: |
COMMIT_HASH=$(git rev-parse --short HEAD)
mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec -l version.log
POM_VERSION=$(cat version.log)
HSFS_JAR_WITH_DEPENDENCIES_NAME=hsfs-${POM_VERSION}-jar-with-dependencies.jar
HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME=hsfs-utils-${POM_VERSION}-jar-with-dependencies.jar
HSFS_JAR_NAME=hsfs-${POM_VERSION}.jar
HSFS_SPARK_JAR_NAME=hsfs-spark-spark3.5-${POM_VERSION}.jar
HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME=hsfs-spark-spark3.5-${POM_VERSION}-jar-with-dependencies.jar
HSFS_UTILS_JAR_NAME=hsfs-utils-${POM_VERSION}.jar
HSFS_UTILS_PYTHON_NAME=hsfs_utils.py
HSFS_JAR_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_JAR_NAME}"
HSFS_JAR_WITH_DEPENDENCIES_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_JAR_WITH_DEPENDENCIES_NAME}"
HSFS_SPARK_JAR_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_SPARK_JAR_NAME}"
HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME}"
HSFS_UTILS_JAR_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_UTILS_JAR_NAME}"
HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME}"
HSFS_UTILS_PYTHON_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${HSFS_UTILS_PYTHON_NAME}"

echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_ENV
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "HSFS_JAR_NAME=${HSFS_JAR_NAME}" >> $GITHUB_ENV
echo "HSFS_JAR_URL=${HSFS_JAR_URL}" >> $GITHUB_ENV
echo "HSFS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_ENV
echo "HSFS_JAR_WITH_DEPENDENCIES_URL=${HSFS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_ENV
echo "HSFS_UTILS_PYTHON_NAME=${HSFS_UTILS_PYTHON_NAME}" >> $GITHUB_ENV
echo "HSFS_UTILS_PYTHON_URL=${HSFS_UTILS_PYTHON_URL}" >> $GITHUB_ENV
echo "HSFS_UTILS_JAR_NAME=${HSFS_UTILS_JAR_NAME}" >> $GITHUB_ENV
echo "HSFS_UTILS_JAR_URL=${HSFS_UTILS_JAR_URL}" >> $GITHUB_ENV
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_ENV
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_ENV
echo "HSFS_SPARK_JAR_NAME=${HSFS_SPARK_JAR_NAME}" >> $GITHUB_ENV
echo "HSFS_SPARK_JAR_URL=${HSFS_SPARK_JAR_URL}" >> $GITHUB_ENV
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_ENV
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_ENV

echo "# Build hsfs-java and hsfs-utils jars" >> $GITHUB_STEP_SUMMARY
echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_STEP_SUMMARY
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_STEP_SUMMARY
echo "HSFS_JAR_NAME=${HSFS_JAR_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_JAR_URL=${HSFS_JAR_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_JAR_WITH_DEPENDENCIES_URL=${HSFS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_PYTHON_NAME=${HSFS_UTILS_PYTHON_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_PYTHON_URL=${HSFS_UTILS_PYTHON_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_JAR_NAME=${HSFS_UTILS_JAR_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_JAR_URL=${HSFS_UTILS_JAR_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_SPARK_JAR_NAME=${HSFS_SPARK_JAR_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_SPARK_JAR_URL=${HSFS_SPARK_JAR_URL}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_STEP_SUMMARY
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_STEP_SUMMARY

echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_OUTPUT
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_OUTPUT
echo "HSFS_JAR_NAME=${HSFS_JAR_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_JAR_URL=${HSFS_JAR_URL}" >> $GITHUB_OUTPUT
echo "HSFS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_JAR_WITH_DEPENDENCIES_URL=${HSFS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_PYTHON_NAME=${HSFS_UTILS_PYTHON_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_PYTHON_URL=${HSFS_UTILS_PYTHON_URL}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_JAR_NAME=${HSFS_UTILS_JAR_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_JAR_URL=${HSFS_UTILS_JAR_URL}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL=${HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_OUTPUT
echo "HSFS_SPARK_JAR_NAME=${HSFS_SPARK_JAR_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_SPARK_JAR_URL=${HSFS_SPARK_JAR_URL}" >> $GITHUB_OUTPUT
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME}" >> $GITHUB_OUTPUT
echo "HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL=${HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL}" >> $GITHUB_OUTPUT

- name: Use jira tagged hudi if exists
working-directory: ${{ github.workspace }}/hopsworks-api
run: |
hudi_exist=$(curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} \
-I "https://nexus.hops.works/repository/hops-artifacts/io/hops/hudi/hudi/${{ env.HUDI_VERSION }}-${JIRA_TAG}-SNAPSHOT/maven-metadata.xml")
if [[ $hudi_exist == *"200 OK"* ]]; then
echo "JIRA tagged Hudi found"
HUDI_JIRA_VERSION=${{ env.HUDI_VERSION }}-${JIRA_TAG}-SNAPSHOT
echo "HUDI_JIRA_VERSION=${HUDI_JIRA_VERSION}" >> $GITHUB_ENV
echo "HUDI_JIRA_VERSION=${HUDI_JIRA_VERSION}" >> $GITHUB_OUTPUT
echo "HUDI_JIRA_VERSION=${HUDI_JIRA_VERSION}" >> $GITHUB_STEP_SUMMARY

find ./java -name pom.xml -exec sed -i "s|<hudi.version>${{ env.HUDI_VERSION }}</hudi.version>|<hudi.version>${HUDI_JIRA_VERSION}</hudi.version>|g" {} \;
find ./utils/java -name pom.xml -exec sed -i "s|<hudi.version>${{ env.HUDI_VERSION }}-SNAPSHOT</hudi.version>|<hudi.version>${HUDI_JIRA_VERSION}</hudi.version>|g" {} \;
sed -i "s|</repositories>|<repository><id>HopsEE</id><name>HopsEE Repo</name><url>https://nexus.hops.works/repository/hops-artifacts</url><releases><enabled>true</enabled></releases><snapshots><enabled>true</enabled></snapshots></repository></repositories>|g" ./java/pom.xml
else
echo "No JIRA tagged Hudi found, using default"
echo "HUDI_JIRA_VERSION=${{ env.HUDI_VERSION }}" >> $GITHUB_ENV
fi


- name: Build hsfs jars
shell: bash
working-directory: ${{ github.workspace }}/hopsworks-api/java
env:
POM_VERSION: ${{ env.POM_VERSION }}
M2_HOME: ~/.m2
run: |
mvn -s ~/.m2/settings.xml clean package generate-sources -Pspark-3.5 -DskipTests

- name: Upload hsfs artifact to Nexus
shell: bash
working-directory: ${{ github.workspace }}/hopsworks-api/java
run: |
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file spark/target/${HSFS_SPARK_JAR_WITH_DEPENDENCIES_NAME} "${HSFS_SPARK_JAR_WITH_DEPENDENCIES_URL}"
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file spark/target/${HSFS_SPARK_JAR_NAME} "${HSFS_SPARK_JAR_URL}"
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file hsfs/target/${HSFS_JAR_WITH_DEPENDENCIES_NAME} "${HSFS_JAR_WITH_DEPENDENCIES_URL}"
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file hsfs/target/${HSFS_JAR_NAME} "${HSFS_JAR_URL}"

- name: Build hsfs-utils jars
shell: bash
working-directory: ${{ github.workspace }}/hopsworks-api/utils/java
env:
POM_VERSION: ${{ env.POM_VERSION }}
M2_HOME: ~/.m2
run: |
mvn -s ~/.m2/settings.xml clean generate-sources package -DskipTests

- name: Upload hsfs-utils artifact to Nexus
shell: bash
working-directory: ${{ github.workspace }}/hopsworks-api/utils/java
run: |
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file ../python/${HSFS_UTILS_PYTHON_NAME} "${HSFS_UTILS_PYTHON_URL}"
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file target/${HSFS_UTILS_JAR_WITH_DEPENDENCIES_NAME} "${HSFS_UTILS_JAR_WITH_DEPENDENCIES_URL}"
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file target/${HSFS_UTILS_JAR_NAME} "${HSFS_UTILS_JAR_URL}"

Loading
Loading