Skip to content

Commit 826ec29

Browse files
authored
[LIVY-1011] Upgrade CI image, fix Spark 3 tests
## What changes were proposed in this pull request? The CI environment is broken due to library version conflicts between the image and the checkout action. The livy-ci Docker image needs to be upgraded to a more recent version of Ubuntu, ensuring that the unit tests and integration tests all pass. Due to a bug in Livy's GitHub workflows, tests had only been executed with Spark 2. Now that this is fixed, we also need to make updates to some of the Python-based tests to work with Python 3 because Spark 3.2 does not support Python 2. Two R-based integration tests are ignored in the Spark 2 integration test runs because SparkR 2 does not work with R 4, and the last version of Ubuntu with R 3.6 is Ubuntu 20. If we want to re-enable these tests, we may need to build R 3.6 for Ubuntu 24. ## How was this patch tested? CI and unit test runs in a private fork of the repo.
1 parent 1f6bd7a commit 826ec29

File tree

8 files changed

+89
-39
lines changed

8 files changed

+89
-39
lines changed

.github/workflows/build-ci-image.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
name: 'Build CI images'
1818
on:
1919
push:
20-
branches: ["main"]
20+
branches: ["master"]
2121
paths:
2222
- 'dev/docker/livy-dev-base/Dockerfile'
2323
jobs:
@@ -43,6 +43,6 @@ jobs:
4343
uses: docker/build-push-action@v4
4444
with:
4545
push: true
46-
context: ./dev/docker
46+
context: ./dev/docker/livy-dev-base
4747
tags: |
48-
ghcr.io/${{ github.repository_owner }}/livy-ci:latest
48+
ghcr.io/${{ github.repository_owner }}/livy-ci:latest

.github/workflows/integration-tests.yaml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,13 @@ env:
2323
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
2424
jobs:
2525
build:
26-
runs-on: ubuntu-20.04
26+
runs-on: ubuntu-24.04
2727
# TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
2828
container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
2929
strategy:
3030
matrix:
3131
maven_profile:
32-
- "-Pscala-2.11 -Pspark2"
33-
- "-Pscala-2.12 -Pspark2"
32+
- "-Pscala-2.11 -Pspark2 -DskipRTests"
3433
- "-Pscala-2.12 -Pspark3"
3534
steps:
3635
-
@@ -46,9 +45,13 @@ jobs:
4645
key: ${{ runner.os }}-maven-${{ hashFiles('pom.xml', '*/pom.xml', 'thriftserver/*/pom.xml', 'core/*/pom.xml', 'repl/*/pom.xml', 'scala-api/*/pom.xml') }}
4746
restore-keys: |
4847
${{ runner.os }}-maven-
48+
-
49+
name: Set Python 3 as default for Spark 3 builds
50+
if: ${{ contains(matrix.maven_profile, 'spark3') }}
51+
run: pyenv global 3
4952
-
5053
name: Build with Maven
51-
run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipTests -Dmaven.javadoc.skip=true -B -V -e verify
54+
run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipTests -Dmaven.javadoc.skip=true -B -V -e verify
5255
-
5356
name: Upload coverage to codecov
5457
uses: codecov/codecov-action@v3

.github/workflows/unit-tests.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ env:
2121
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
2222
jobs:
2323
build:
24-
runs-on: ubuntu-20.04
24+
runs-on: ubuntu-24.04
2525
# TODO: Possibly point to the ./build-ci-image.yaml with the "uses" key
2626
container: ghcr.io/${{ github.repository_owner }}/livy-ci:latest
2727
strategy:
@@ -46,7 +46,7 @@ jobs:
4646
${{ runner.os }}-maven-
4747
-
4848
name: Build with Maven
49-
run: mvn -Pthriftserver ${{ matrix.mvn_profile }} -DskipITs -Dmaven.javadoc.skip=true -B -V -e verify
49+
run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipITs -Dmaven.javadoc.skip=true -B -V -e verify
5050
-
5151
name: Upload coverage to codecov
5252
uses: codecov/codecov-action@v3

dev/docker/livy-dev-base/Dockerfile

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515
# limitations under the License.
1616
#
1717

18-
FROM ubuntu:xenial
18+
FROM ubuntu:noble
1919

2020
# configure locale
2121
RUN apt-get update -qq > /dev/null && apt-get install -qq --yes --no-install-recommends \
2222
locales && \
2323
locale-gen en_US.UTF-8
2424
ENV LANG="en_US.UTF-8" \
2525
LANGUAGE="en_US.UTF-8" \
26-
LC_ALL="en_US.UTF-8"
26+
LC_ALL="en_US.UTF-8" \
27+
TZ=US \
28+
DEBIAN_FRONTEND=noninteractive
2729

2830
# Install necessary dependencies for build/test/debug
2931
# Use `lsof -i -P -n` to find open ports
@@ -32,31 +34,54 @@ RUN apt-get install -qq \
3234
curl \
3335
git \
3436
libkrb5-dev \
37+
openjdk-8-jdk-headless \
38+
r-base \
3539
maven \
36-
openjdk-8-jdk \
37-
python-dev \
38-
python-pip \
39-
python3-pip \
4040
software-properties-common \
4141
vim \
4242
wget \
4343
telnet \
4444
lsof
4545

46-
# R 3.x install - ensure to add the signing key per https://cran.r-project.org/bin/linux/ubuntu/olderreleasesREADME.html
47-
RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu xenial-cran35/' && \
48-
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
49-
apt-get update && \
50-
apt-get -qq install r-base
46+
# This fixes integration tests
47+
# If setsid is available, signals are sent to containers in MiniYarnCluster using negative pids, however
48+
# in the Docker container this results in a kill(0) system call which triggers an ExitCodeException in
49+
# the kill function that breaks test execution. If setsid is removed, pids < 0 are not used.
50+
# See https://github.com/apache/hadoop/blob/rel/release-2.7.3/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java#L238
51+
RUN rm /usr/bin/setsid
52+
53+
# python build
54+
RUN apt-get install -y \
55+
build-essential \
56+
libbz2-dev \
57+
libffi-dev \
58+
liblzma-dev \
59+
libncurses-dev \
60+
libreadline-dev \
61+
libsqlite3-dev \
62+
libssl-dev \
63+
zlib1g-dev
64+
65+
ENV HOME=/root
66+
67+
RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
68+
69+
ENV PYENV_ROOT=$HOME/pyenv
70+
ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"
71+
72+
RUN pyenv install -v 2.7.18 && \
73+
pyenv install -v 3.9.21 && \
74+
pyenv global 2.7.18 3.9.21 && \
75+
pyenv rehash
5176

5277
# Add build dependencies for python2
5378
# - First we upgrade pip because that makes a lot of things better
5479
# - Then we remove the provided version of setuptools and install a different version
5580
# - Then we install additional dependencies
56-
RUN python -m pip install -U "pip < 21.0" && \
57-
apt-get remove -y python-setuptools && \
58-
python -m pip install "setuptools < 36" && \
59-
python -m pip install \
81+
RUN python2 -m pip install -U "pip < 21.0" && \
82+
apt-get remove -y python-setuptools && \
83+
python2 -m pip install "setuptools < 36" && \
84+
python2 -m pip install \
6085
cloudpickle \
6186
codecov \
6287
flake8 \
@@ -70,7 +95,20 @@ RUN python -m pip install -U "pip < 21.0" && \
7095
"responses >= 0.5.1"
7196

7297
# Now do the same for python3
73-
RUN python3 -m pip install -U pip
98+
RUN python3 -m pip install -U pip && pip3 install \
99+
cloudpickle \
100+
codecov \
101+
flake8 \
102+
flaky \
103+
pytest \
104+
pytest-runner \
105+
requests-kerberos \
106+
requests \
107+
responses
108+
109+
RUN pyenv rehash
110+
111+
RUN apt remove -y openjdk-11-jre-headless
74112

75113
WORKDIR /workspace
76114

integration-test/src/test/resources/test_python_api.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,18 @@
1818
import base64
1919
import json
2020
import time
21-
from urlparse import urlparse
21+
try:
22+
from urllib.parse import urlparse
23+
except ImportError:
24+
from urlparse import urlparse
2225
import requests
2326
from requests_kerberos import HTTPKerberosAuth, REQUIRED, OPTIONAL
2427
import cloudpickle
2528
import pytest
26-
import httplib
29+
try:
30+
import httplib
31+
except ImportError:
32+
from http import HTTPStatus as httplib
2733
from flaky import flaky
2834

2935
global session_id, job_id
@@ -144,7 +150,7 @@ def error_job(context):
144150
return "hello" + 1
145151

146152
process_job(error_job,
147-
"TypeError: cannot concatenate 'str' and 'int' objects", True)
153+
"TypeError: ", True)
148154

149155

150156
def test_reconnect():

integration-test/src/test/scala/org/apache/livy/test/BatchIT.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ class BatchIT extends BaseIntegrationTestSuite with BeforeAndAfterAll {
7676
}
7777

7878
test("submit a SparkR application") {
79+
assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R tests.")
7980
val hdfsPath = uploadResource("rtest.R")
8081
withScript(hdfsPath, List.empty) { s =>
8182
s.verifySessionSuccess()

integration-test/src/test/scala/org/apache/livy/test/InteractiveIT.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ class InteractiveIT extends BaseIntegrationTestSuite {
9393
}
9494
s.run("%table x").verifyResult(".*headers.*type.*name.*data.*")
9595
s.run("abcde").verifyError(ename = "NameError", evalue = "name 'abcde' is not defined")
96-
s.run("raise KeyError, 'foo'").verifyError(ename = "KeyError", evalue = "'foo'")
96+
s.run("raise KeyError('foo')").verifyError(ename = "KeyError", evalue = "'foo'")
9797
s.run("print(1)\r\nprint(1)").verifyResult("1\n1")
9898
}
9999
}
100100

101101
test("R interactive session") {
102+
assume(!sys.props.getOrElse("skipRTests", "false").toBoolean, "Skipping R tests.")
102103
withNewSession(SparkR) { s =>
103104
// R's output sometimes includes the count of statements, which makes it annoying to test
104105
// things. This helps a bit.

repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.SparkConf
2121
import org.json4s.{DefaultFormats, JNull, JValue}
2222
import org.json4s.JsonDSL._
2323
import org.scalatest._
24+
import org.scalatest.Inside.inside
2425

2526
import org.apache.livy.rsc.driver.SparkEntries
2627
import org.apache.livy.sessions._
@@ -228,16 +229,16 @@ abstract class PythonBaseInterpreterSpec extends BaseInterpreterSpec {
228229
|'
229230
""".stripMargin)
230231

231-
response should equal(Interpreter.ExecuteError(
232-
"SyntaxError",
233-
"EOL while scanning string literal (<stdin>, line 2)",
234-
List(
235-
" File \"<stdin>\", line 2\n",
236-
" '\n",
237-
" ^\n",
238-
"SyntaxError: EOL while scanning string literal\n"
239-
)
240-
))
232+
inside (response) {
233+
case Interpreter.ExecuteError(ename, evalue, traceback) => {
234+
ename shouldBe "SyntaxError"
235+
evalue should (startWith("EOL while scanning string literal")
236+
or startWith("unterminated string literal"))
237+
traceback.last should (startWith("SyntaxError: EOL while scanning string literal")
238+
or startWith("SyntaxError: unterminated string literal"))
239+
}
240+
case _ => fail()
241+
}
241242

242243
response = intp.execute("x")
243244
response should equal(Interpreter.ExecuteError(

0 commit comments

Comments
 (0)