Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@ To build Livy, you will need:
Debian/Ubuntu:
* mvn (from ``maven`` package or maven3 tarball)
* openjdk-8-jdk (or Oracle JDK 8)
* Python 2.7+
* Python 3.x+
* R 3.x

Redhat/CentOS:
* mvn (from ``maven`` package or maven3 tarball)
* java-1.8.0-openjdk (or Oracle JDK 8)
* Python 2.7+
* Python 3.x+
* R 3.x

MacOS:
* Xcode command line tools
* Oracle's JDK 1.8
* Maven (Homebrew)
* Python 2.7+
* Python 3.x+
* R 3.x

Required python packages for building Livy:
Expand Down
28 changes: 3 additions & 25 deletions dev/docker/livy-dev-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,32 +70,11 @@ RUN git clone https://github.com/pyenv/pyenv.git $HOME/pyenv
ENV PYENV_ROOT=$HOME/pyenv
ENV PATH="$HOME/pyenv/shims:$HOME/pyenv/bin:$HOME/bin:$PATH"

RUN pyenv install -v 2.7.18 && \
pyenv install -v 3.9.21 && \
pyenv global 2.7.18 3.9.21 && \
RUN pyenv install -v 3.9.21 && \
pyenv global 3.9.21 && \
pyenv rehash

# Add build dependencies for python2
# - First we upgrade pip because that makes a lot of things better
# - Then we remove the provided version of setuptools and install a different version
# - Then we install additional dependencies
RUN python2 -m pip install -U "pip < 21.0" && \
apt-get remove -y python-setuptools && \
python2 -m pip install "setuptools < 36" && \
python2 -m pip install \
cloudpickle \
codecov \
flake8 \
flaky \
"future>=0.15.2" \
"futures>=3.0.5" \
pytest \
pytest-runner \
requests-kerberos \
"requests >= 2.10.0" \
"responses >= 0.5.1"

# Now do the same for python3
# Install build dependencies for python3
RUN python3 -m pip install -U pip && pip3 install \
cloudpickle \
codecov \
Expand All @@ -112,4 +91,3 @@ RUN pyenv rehash
RUN apt remove -y openjdk-11-jre-headless

WORKDIR /workspace

58 changes: 17 additions & 41 deletions dev/merge_livy_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,15 @@
# usage: ./merge_livy_pr.py (see config env vars below)
#


import json
import os
import re
import subprocess
import sys
import urllib.request
from urllib.error import HTTPError

if sys.version_info[0] < 3:
import urllib2
from urllib2 import HTTPError
input_prompt_fn = raw_input
else:
import urllib.request as urllib2
from urllib.error import HTTPError
input_prompt_fn = input
input_prompt_fn = input

try:
import jira.client
Expand All @@ -71,21 +65,19 @@
# https://github.com/settings/tokens. This script only requires the "public_repo" scope.
GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")


GITHUB_BASE = "https://github.com/apache/incubator-livy/pull"
GITHUB_API_BASE = "https://api.github.com/repos/apache/incubator-livy"
JIRA_BASE = "https://issues.apache.org/jira/browse"
JIRA_API_BASE = "https://issues.apache.org/jira"
# Prefix added to temporary branches
BRANCH_PREFIX = "PR_TOOL"


def get_json(url):
try:
request = urllib2.Request(url)
request = urllib.request.Request(url)
if GITHUB_OAUTH_KEY:
request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
return json.load(urllib2.urlopen(request))
return json.load(urllib.request.urlopen(request))
except HTTPError as e:
if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
print("Exceeded the GitHub API rate limit; see the instructions in " +
Expand All @@ -95,42 +87,34 @@ def get_json(url):
print("Unable to fetch URL, exiting: %s" % url)
sys.exit(-1)


def fail(msg):
print(msg)
clean_up()
sys.exit(-1)


def run_cmd(cmd):
print(cmd)
if isinstance(cmd, list):
out_bytes = subprocess.check_output(cmd)
else:
out_bytes = subprocess.check_output(cmd.split(" "))
if sys.version_info[0] > 2:
return out_bytes.decode()
else:
return out_bytes

return out_bytes.decode()

def continue_maybe(prompt):
result = input_prompt_fn("\n%s (y/n): " % prompt)
if result.lower() != "y":
fail("Okay, exiting")


def clean_up():
print("Restoring head pointer to %s" % original_head)
run_cmd("git checkout %s" % original_head)

branches = run_cmd("git branch").replace(" ", "").split("\n")

for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
for branch in [x for x in branches if x.startswith(BRANCH_PREFIX)]:
print("Deleting local branch %s" % branch)
run_cmd("git branch -D %s" % branch)


# merge the requested PR and return the merge hash
def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
Expand Down Expand Up @@ -201,7 +185,6 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
print("Merge hash: %s" % merge_hash)
return merge_hash


def cherry_pick(pr_num, merge_hash, default_branch):
pick_ref = input_prompt_fn("Enter a branch name [%s]: " % default_branch)
if pick_ref == "":
Expand Down Expand Up @@ -236,15 +219,13 @@ def cherry_pick(pr_num, merge_hash, default_branch):
print("Pick hash: %s" % pick_hash)
return pick_ref


def fix_version_from_branch(branch, versions):
# Note: Assumes this is a sorted (newest->oldest) list of un-released versions
if branch == "master":
return versions[0]
else:
branch_ver = branch.replace("branch-", "")
return filter(lambda x: x.name.startswith(branch_ver), versions)[-1]

return [x for x in versions if x.name.startswith(branch_ver)][-1]

def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
Expand Down Expand Up @@ -275,11 +256,11 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):

versions = asf_jira.project_versions("LIVY")
versions = sorted(versions, key=lambda x: x.name, reverse=True)
versions = filter(lambda x: x.raw['released'] is False, versions)
versions = [x for x in versions if x.raw['released'] is False]
# Consider only x.y.z versions
versions = filter(lambda x: re.match('\d+\.\d+\.\d+', x.name), versions)
versions = [x for x in versions if re.match(r'\d+\.\d+\.\d+', x.name)]

default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches)
default_fix_versions = [fix_version_from_branch(x, versions).name for x in merge_branches]
for v in default_fix_versions:
# Handles the case where we have forked a release branch but not yet made the release.
# In this case, if the PR is committed to the master branch and the release branch, we
Expand All @@ -289,7 +270,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
if patch == "0":
previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
if previous in default_fix_versions:
default_fix_versions = filter(lambda x: x != v, default_fix_versions)
default_fix_versions = [x for x in default_fix_versions if x != v]
default_fix_versions = ",".join(default_fix_versions)

fix_versions = input_prompt_fn(
Expand All @@ -299,19 +280,18 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
fix_versions = fix_versions.replace(" ", "").split(",")

def get_version_json(version_str):
return filter(lambda v: v.name == version_str, versions)[0].raw
return [v for v in versions if v.name == version_str][0].raw

jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)
jira_fix_versions = [get_version_json(v) for v in fix_versions]

resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
resolve = [a for a in asf_jira.transitions(jira_id) if a['name'] == "Resolve Issue"][0]
resolution = [r for r in asf_jira.resolutions() if r.raw['name'] == "Fixed"][0]
asf_jira.transition_issue(
jira_id, resolve["id"], fixVersions=jira_fix_versions,
comment=comment, resolution={'id': resolution.raw['id']})

print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))


def resolve_jira_issues(title, merge_branches, comment):
jira_ids = re.findall("LIVY-[0-9]{3,6}", title)

Expand All @@ -320,7 +300,6 @@ def resolve_jira_issues(title, merge_branches, comment):
for jira_id in jira_ids:
resolve_jira_issue(merge_branches, comment, jira_id)


def standardize_jira_ref(text):
"""
Standardize the [LIVY-XXXXX] [MODULE] prefix
Expand Down Expand Up @@ -362,7 +341,6 @@ def standardize_jira_ref(text):

return clean_text


def get_current_ref():
ref = run_cmd("git rev-parse --abbrev-ref HEAD").strip()
if ref == 'HEAD':
Expand All @@ -371,15 +349,14 @@ def get_current_ref():
else:
return ref


def main():
global original_head

os.chdir(LIVY_HOME)
original_head = get_current_ref()

branches = get_json("%s/branches" % GITHUB_API_BASE)
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
branch_names = [x for x in [x['name'] for x in branches] if x.startswith("branch-")]
# Assumes branch names can be sorted lexicographically
latest_branch = sorted(branch_names, reverse=True)[0]

Expand Down Expand Up @@ -462,7 +439,6 @@ def main():
print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
print("Exiting without trying to close the associated JIRA.")


if __name__ == "__main__":
import doctest
(failure_count, test_count) = doctest.testmod()
Expand Down
4 changes: 1 addition & 3 deletions examples/src/main/python/pi_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# limitations under the License.
#

from __future__ import print_function

import sys
from random import random
from operator import add
Expand Down Expand Up @@ -48,7 +46,7 @@ def f(_):
return 1 if x ** 2 + y ** 2 <= 1 else 0

def pi_job(context):
count = context.sc.parallelize(range(1, samples + 1), slices).map(f).reduce(add)
count = context.sc.parallelize(list(range(1, samples + 1)), slices).map(f).reduce(add)
return 4.0 * count / samples

pi = client.submit(pi_job).result()
Expand Down
2 changes: 1 addition & 1 deletion integration-test/src/test/resources/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
output = sys.argv[1]
sc = SparkContext(appName="PySpark Test")
try:
sc.parallelize(range(100), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
sc.parallelize(list(range(100)), 10).map(lambda x: (x, x * 2)).saveAsTextFile(output)
finally:
sc.stop()
Loading