apache · ArnavBalyan · Sep 28, 2025 · Nov 15, 2025 · Nov 15, 2025 · Nov 15, 2025
diff --git a/README.md b/README.md
@@ -29,20 +29,20 @@ To build Livy, you will need:
 Debian/Ubuntu:
   * mvn (from ``maven`` package or maven3 tarball)
   * openjdk-8-jdk (or Oracle JDK 8)
-  * Python 2.7+
+  * Python 3.x+
   * R 3.x
 
 Redhat/CentOS:
   * mvn (from ``maven`` package or maven3 tarball)
   * java-1.8.0-openjdk (or Oracle JDK 8)
-  * Python 2.7+
+  * Python 3.x+
   * R 3.x
 
 MacOS:
   * Xcode command line tools
   * Oracle's JDK 1.8
   * Maven (Homebrew)
-  * Python 2.7+
+  * Python 3.x+
   * R 3.x
 
 Required python packages for building Livy:

diff --git a/dev/docker/livy-dev-base/Dockerfile b/dev/docker/livy-dev-base/Dockerfile
@@ -70,32 +70,11 @@ RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
 ENV PYENV_ROOT=$HOME/pyenv
 ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"
 
-RUN pyenv install -v 2.7.18 && \
-  pyenv install -v 3.9.21 && \
-  pyenv global 2.7.18 3.9.21 && \
+RUN pyenv install -v 3.9.21 && \
+  pyenv global 3.9.21 && \
   pyenv rehash
 
-# Add build dependencies for python2
-# - First we upgrade pip because that makes a lot of things better
-# - Then we remove the provided version of setuptools and install a different version
-# - Then we install additional dependencies
-RUN python2 -m pip install -U "pip < 21.0" && \
-        apt-get remove -y python-setuptools && \
-        python2 -m pip install "setuptools < 36" && \
-        python2 -m pip install \
-        cloudpickle \
-        codecov \
-        flake8 \
-        flaky \
-        "future>=0.15.2" \
-        "futures>=3.0.5" \
-        pytest \
-        pytest-runner \
-        requests-kerberos \
-        "requests >= 2.10.0" \
-        "responses >= 0.5.1"
-
-# Now do the same for python3
+# Install build dependencies for python3
 RUN python3 -m pip install -U pip && pip3 install \
         cloudpickle \
         codecov \
@@ -112,4 +91,3 @@ RUN pyenv rehash
 RUN apt remove -y openjdk-11-jre-headless
 
 WORKDIR /workspace
-
diff --git a/dev/merge_livy_pr.py b/dev/merge_livy_pr.py
@@ -33,21 +33,15 @@
 #   usage: ./merge_livy_pr.py    (see config env vars below)
 #
 
-
 import json
 import os
 import re
 import subprocess
 import sys
+import urllib.request
+from urllib.error import HTTPError
 
-if sys.version_info[0] < 3:
-    import urllib2
-    from urllib2 import HTTPError
-    input_prompt_fn = raw_input
-else:
-    import urllib.request as urllib2
-    from urllib.error import HTTPError
-    input_prompt_fn = input
+input_prompt_fn = input
 
 try:
     import jira.client
@@ -71,21 +65,19 @@
 # https://github.com/settings/tokens. This script only requires the "public_repo" scope.
 GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")
 
-
 GITHUB_BASE = "https://github.com/apache/incubator-livy/pull"
 GITHUB_API_BASE = "https://api.github.com/repos/apache/incubator-livy"
 JIRA_BASE = "https://issues.apache.org/jira/browse"
 JIRA_API_BASE = "https://issues.apache.org/jira"
 # Prefix added to temporary branches
 BRANCH_PREFIX = "PR_TOOL"
 
-
 def get_json(url):
     try:
-        request = urllib2.Request(url)
+        request = urllib.request.Request(url)
         if GITHUB_OAUTH_KEY:
             request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
-        return json.load(urllib2.urlopen(request))
+        return json.load(urllib.request.urlopen(request))
     except HTTPError as e:
         if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
             print("Exceeded the GitHub API rate limit; see the instructions in " +
@@ -95,42 +87,34 @@ def get_json(url):
             print("Unable to fetch URL, exiting: %s" % url)
         sys.exit(-1)
 
-
 def fail(msg):
     print(msg)
     clean_up()
     sys.exit(-1)
 
-
 def run_cmd(cmd):
     print(cmd)
     if isinstance(cmd, list):
         out_bytes = subprocess.check_output(cmd)
     else:
         out_bytes = subprocess.check_output(cmd.split(" "))
-    if sys.version_info[0] > 2:
-        return out_bytes.decode()
-    else:
-        return out_bytes
-
+    return out_bytes.decode()
 
 def continue_maybe(prompt):
     result = input_prompt_fn("\n%s (y/n): " % prompt)
     if result.lower() != "y":
         fail("Okay, exiting")
 
-
 def clean_up():
     print("Restoring head pointer to %s" % original_head)
     run_cmd("git checkout %s" % original_head)
 
     branches = run_cmd("git branch").replace(" ", "").split("\n")
 
-    for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
+    for branch in [x for x in branches if x.startswith(BRANCH_PREFIX)]:
         print("Deleting local branch %s" % branch)
         run_cmd("git branch -D %s" % branch)
 
-
 # merge the requested PR and return the merge hash
 def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
     pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
@@ -201,7 +185,6 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
     print("Merge hash: %s" % merge_hash)
     return merge_hash
 
-
 def cherry_pick(pr_num, merge_hash, default_branch):
     pick_ref = input_prompt_fn("Enter a branch name [%s]: " % default_branch)
     if pick_ref == "":
@@ -236,15 +219,13 @@ def cherry_pick(pr_num, merge_hash, default_branch):
     print("Pick hash: %s" % pick_hash)
     return pick_ref
 
-
 def fix_version_from_branch(branch, versions):
     # Note: Assumes this is a sorted (newest->oldest) list of un-released versions
     if branch == "master":
         return versions[0]
     else:
         branch_ver = branch.replace("branch-", "")
-        return filter(lambda x: x.name.startswith(branch_ver), versions)[-1]
-
+        return [x for x in versions if x.name.startswith(branch_ver)][-1]
 
 def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
     asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
@@ -275,11 +256,11 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
 
     versions = asf_jira.project_versions("LIVY")
     versions = sorted(versions, key=lambda x: x.name, reverse=True)
-    versions = filter(lambda x: x.raw['released'] is False, versions)
+    versions = [x for x in versions if x.raw['released'] is False]
     # Consider only x.y.z versions
-    versions = filter(lambda x: re.match('\d+\.\d+\.\d+', x.name), versions)
+    versions = [x for x in versions if re.match(r'\d+\.\d+\.\d+', x.name)]
 
-    default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches)
+    default_fix_versions = [fix_version_from_branch(x, versions).name for x in merge_branches]
     for v in default_fix_versions:
         # Handles the case where we have forked a release branch but not yet made the release.
         # In this case, if the PR is committed to the master branch and the release branch, we
@@ -289,7 +270,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
         if patch == "0":
             previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
             if previous in default_fix_versions:
-                default_fix_versions = filter(lambda x: x != v, default_fix_versions)
+                default_fix_versions = [x for x in default_fix_versions if x != v]
     default_fix_versions = ",".join(default_fix_versions)
 
     fix_versions = input_prompt_fn(
@@ -299,19 +280,18 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
     fix_versions = fix_versions.replace(" ", "").split(",")
 
     def get_version_json(version_str):
-        return filter(lambda v: v.name == version_str, versions)[0].raw
+        return [v for v in versions if v.name == version_str][0].raw
 
-    jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
+    jira_fix_versions = [get_version_json(v) for v in fix_versions]
 
-    resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
-    resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
+    resolve = [a for a in asf_jira.transitions(jira_id) if a['name'] == "Resolve Issue"][0]
+    resolution = [r for r in asf_jira.resolutions() if r.raw['name'] == "Fixed"][0]
     asf_jira.transition_issue(
         jira_id, resolve["id"], fixVersions=jira_fix_versions,
         comment=comment, resolution={'id': resolution.raw['id']})
 
     print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
 
-
 def resolve_jira_issues(title, merge_branches, comment):
     jira_ids = re.findall("LIVY-[0-9]{3,6}", title)
 
@@ -320,7 +300,6 @@ def resolve_jira_issues(title, merge_branches, comment):
     for jira_id in jira_ids:
         resolve_jira_issue(merge_branches, comment, jira_id)
 
-
 def standardize_jira_ref(text):
     """
     Standardize the [LIVY-XXXXX] [MODULE] prefix
@@ -362,7 +341,6 @@ def standardize_jira_ref(text):
 
     return clean_text
 
-
 def get_current_ref():
     ref = run_cmd("git rev-parse --abbrev-ref HEAD").strip()
     if ref == 'HEAD':
@@ -371,15 +349,14 @@ def get_current_ref():
     else:
         return ref
 
-
 def main():
     global original_head
 
     os.chdir(LIVY_HOME)
     original_head = get_current_ref()
 
     branches = get_json("%s/branches" % GITHUB_API_BASE)
-    branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
+    branch_names = [x for x in [x['name'] for x in branches] if x.startswith("branch-")]
     # Assumes branch names can be sorted lexicographically
     latest_branch = sorted(branch_names, reverse=True)[0]
 
@@ -462,7 +439,6 @@ def main():
         print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
         print("Exiting without trying to close the associated JIRA.")
 
-
 if __name__ == "__main__":
     import doctest
     (failure_count, test_count) = doctest.testmod()

diff --git a/examples/src/main/python/pi_app.py b/examples/src/main/python/pi_app.py
@@ -15,8 +15,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
-
 import sys
 from random import random
 from operator import add
@@ -48,7 +46,7 @@ def f(_):
         return 1 if x ** 2 + y ** 2 <= 1 else 0
 
     def pi_job(context):
-        count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add)
+        count = context.sc.parallelize(list(range(1, samples + 1)), slices).map(f).reduce(add)
         return 4.0 * count / samples
 
     pi = client.submit(pi_job).result()

diff --git a/integration-test/src/test/resources/batch.py b/integration-test/src/test/resources/batch.py
@@ -22,6 +22,6 @@
 output = sys.argv[1]
 sc = SparkContext(appName="PySpark Test")
 try:
-  sc.parallelize(range(100), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
+  sc.parallelize(list(range(100)), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
 finally:
   sc.stop()