From 03ab06963badb64f494a1636a4c07238ea4be011 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 24 Sep 2024 12:06:48 -0700 Subject: [PATCH] initial commit of ipex action + README --- ipex/README.md | 33 +++++++++++++++++++++++++++++++++ ipex/ipex-init.sh | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 ipex/README.md create mode 100644 ipex/ipex-init.sh diff --git a/ipex/README.md b/ipex/README.md new file mode 100644 index 000000000..b7a82ce5e --- /dev/null +++ b/ipex/README.md @@ -0,0 +1,33 @@ + +## In Intel's own words: + +Intel® Extension for PyTorch* extends PyTorch* with the latest +performance optimizations for Intel hardware. Optimizations take +advantage of Intel® Advanced Vector Extensions 512 (Intel® AVX-512) +Vector Neural Network Instructions (VNNI) and Intel® Advanced Matrix +Extensions (Intel® AMX) on Intel CPUs as well as Intel XeMatrix +Extensions (XMX) AI engines on Intel discrete GPUs. Moreover, Intel® +Extension for PyTorch* provides easy GPU acceleration for Intel +discrete GPUs through the PyTorch* xpu device. + +The extension can be loaded as a Python module for Python programs or +linked as a C++ library for C++ programs. In Python scripts, users can +enable it dynamically by importing intel_extension_for_pytorch. + +## This action: + +The ipex initialization action installs the appropriate python +libraries and installs the spark-tfrecord jar for the current scala +runtime into the cluster's classpath. + +### Metadata arguments +* metadata name: spark-tfrecord-version +* default value: 0.4.0 + Used to specify the version of the spark-tfrecord jar. See url + https://central.sonatype.com/artifact/com.linkedin.sparktfrecord/spark-tfrecord_2.12/versions + https://central.sonatype.com/artifact/com.linkedin.sparktfrecord/spark-tfrecord_2.13/versions + +* metadata name: scala-version +* defaut value: 2.12 + Used to specify the version of scala. See url + https://github.com/scala/scala/releases diff --git a/ipex/ipex-init.sh b/ipex/ipex-init.sh new file mode 100644 index 000000000..527cc4749 --- /dev/null +++ b/ipex/ipex-init.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script installs ipex acceleration drivers and does not collects GPU utilization metrics at this time + +set -euxo pipefail + +mkdir -p /home/.cache +chmod -R 777 /home/.cache + +pip install tiktoken +pip install tldextract +pip install zstandard +pip install transformers +pip install -U optimum[neural-compressor] intel-extension-for-transformers +pip install intel_extension_for_pytorch +pip install fasttext +export CUDA_VISIBLE_DEVICES="" + +DEFAULT_SCALA_VER=2.12 +DEFAULT_TFRECORD_VER=0.4.0 +SCALA_VER=$(get_metadata_attribute 'scala-version' "${DEFAULT_SCALA_VER}") +TFRECORD_VER=$(get_metadata_attribute 'spark-tfrecord-version' "${DEFAULT_TFRECORD_VER}") + +JAR_FILENAME="spark-tfrecord_${SCALA_VER}-${TFRECORD_VER}.jar" +wget "https://repo1.maven.org/maven2/com/linkedin/sparktfrecord/spark-tfrecord_${SCALA_VER}/${TFRECORD_VER}/${JAR_FILENAME}" +mv "${JAR_FILENAME}" /usr/lib/spark/jars/