Skip to content

Commit 62574da

Browse files
authored
Alloc/Free Job Resource done by Prolog/Epilog phase of Plugin (#28)
1 parent be4fa16 commit 62574da

File tree

10 files changed

+203
-66
lines changed

10 files changed

+203
-66
lines changed

commands/qrun/daapi/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ Once you specify all required configuration parameters, you can create a Client
8787
let client = ClientBuilder::new("http://localhost:8290")
8888
.with_auth(AuthMethod::IbmCloudIam {
8989
apikey: "your_iam_apikey".to_string(),
90+
service_crn: "your_service_crn".to_string(),
91+
iam_endpoint_url: "your_iam_endpoint_url".to_string(),
9092
})
9193
.with_timeout(Duration::from_secs(60))
9294
.with_retry_policy(retry_policy)

commands/qrun/daapi_c/build.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
//
2+
// (C) Copyright IBM 2025
3+
//
4+
// This code is licensed under the Apache License, Version 2.0. You may
5+
// obtain a copy of this license in the LICENSE.txt file in the root directory
6+
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
7+
//
8+
// Any modifications or derivative works of this code must retain this
9+
// copyright notice, and modified files need to carry a notice indicating
10+
// that they have been altered from the originals.
11+
112
fn main() {
213
for (key, value) in std::env::vars() {
314
eprintln!("{key}: {value}");

commands/qrun/qrun/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ edition = "2021"
66
[features]
77
default = ["ibmcloud_appid_auth"]
88
ibmcloud_appid_auth = []
9+
job_cleanup = []
910

1011
[dependencies]
1112
direct-access-api = { workspace = true }

commands/qrun/qrun/src/main.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// copyright notice, and modified files need to carry a notice indicating
1010
// that they have been altered from the originals.
1111

12+
#![allow(unused_imports)]
1213
use std::fs::File;
1314
use std::fs::OpenOptions;
1415
use std::io::prelude::*;
@@ -61,6 +62,7 @@ struct Args {
6162
}
6263

6364
// Handle signals, and cancel QPU job if SIGTERM is received.
65+
#[cfg(feature = "job_cleanup")]
6466
async fn handle_signals(mut signals: Signals, job: PrimitiveJob) {
6567
while let Some(signal) = signals.next().await {
6668
// To cancel a job, invoke scancel without --signal option. This will send
@@ -205,7 +207,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
205207
.unwrap();
206208

207209
// scancel related signals
210+
#[cfg(feature = "job_cleanup")]
208211
let signals = Signals::new([SIGTERM, SIGCONT])?;
212+
#[cfg(feature = "job_cleanup")]
209213
let handle = signals.handle();
210214

211215
let f = File::open(args.input).expect("file not found");
@@ -225,6 +229,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
225229
)
226230
.await?;
227231

232+
#[cfg(feature = "job_cleanup")]
228233
let signals_task = tokio::spawn(handle_signals(signals, primitive_job.clone()));
229234

230235
let mut succeeded: bool = true;
@@ -250,6 +255,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
250255
"Error occurred while waiting for final state: {:?}",
251256
e.to_string()
252257
);
258+
#[cfg(feature = "job_cleanup")]
253259
let _ = primitive_job.cancel(false).await;
254260
succeeded = false;
255261
}
@@ -289,10 +295,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
289295
}
290296
}
291297

292-
client.delete_job(&primitive_job.job_id).await?;
298+
#[cfg(feature = "job_cleanup")]
299+
{
300+
client.delete_job(&primitive_job.job_id).await?;
293301

294-
handle.close();
295-
signals_task.await?;
302+
handle.close();
303+
signals_task.await?;
304+
}
296305

297306
Ok(())
298307
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
YOUR_PROVISIONED_INSTANCE
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
YOUR_IAM_APIKEY
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
# /etc/slurm/prolog.d/set_cloud_env.sh
3+
4+
if [ -n $SLURM_JOB_PARTITION ]
5+
then
6+
if [ $SLURM_JOB_PARTITION == "staging" ]
7+
then
8+
echo "export IBMQRUN_AWS_ACCESS_KEY_ID=$(cat /etc/cloud_secrets/aws_access_key)"
9+
echo "export IBMQRUN_AWS_SECRET_ACCESS_KEY=$(cat /etc/cloud_secrets/aws_secret_access_key)"
10+
echo "export IBMQRUN_IAM_APIKEY=$(cat /etc/cloud_secrets/ibmcloud_iam_apikey)"
11+
echo "export IBMQRUN_SERVICE_CRN=$(cat /etc/cloud_secrets/directaccess_service_crn)"
12+
echo "export IBMQRUN_IAM_ENDPOINT=https://iam.test.cloud.ibm.com"
13+
echo "export IBMQRUN_DAAPI_ENDPOINT=http://127.0.0.1:8080"
14+
echo "export IBMQRUN_S3_ENDPOINT=https://s3.us-east.cloud-object-storage.appdomain.cloud"
15+
echo "export IBMQRUN_S3_BUCKET=<your bucket name>"
16+
echo "export IBMQRUN_S3_REGION=us-east"
17+
elif [ $SLURM_JOB_PARTITION == "normal" ]
18+
then
19+
echo "export IBMQRUN_AWS_ACCESS_KEY_ID=minioadmin"
20+
echo "export IBMQRUN_AWS_SECRET_ACCESS_KEY=minioadmin"
21+
echo "export IBMQRUN_IAM_APIKEY=demoapikey1"
22+
echo "export IBMQRUN_SERVICE_CRN=crn:v1:local:daa_sim"
23+
echo "export IBMQRUN_IAM_ENDPOINT=http://daapi:8290"
24+
echo "export IBMQRUN_DAAPI_ENDPOINT=http://daapi:8290"
25+
echo "export IBMQRUN_S3_ENDPOINT=http://minio:9000"
26+
echo "export IBMQRUN_S3_BUCKET=test"
27+
echo "export IBMQRUN_S3_REGION=us-east"
28+
fi
29+
fi

demo/qrun/etc/slurm/slurm.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ ReturnToService=0
3636
#SrunProlog=
3737
#SrunEpilog=
3838
TaskProlog=/etc/slurm/prolog.d/set_cloud_env.sh
39-
TaskEpilog=/etc/slurm/epilog.d/unset_cloud_env.sh
39+
#TaskEpilog=
4040
#TaskPlugin=
4141
#TrackWCKey=no
4242
#TreeWidth=50

plugins/spank_ibm_qrun/CMakeLists.txt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ project (spank_ibm_qrun C)
88
include(CheckIncludeFiles)
99
include(FindPackageHandleStandardArgs)
1010

11+
set(TARGET_NAME spank_ibm_qrun)
12+
set(DAAPI_C_LIB
13+
-L${CMAKE_CURRENT_SOURCE_DIR}/../../commands/qrun/target/release
14+
-ldirect_access_capi
15+
)
16+
1117
find_path(SLURM_INCLUDE_DIR NAMES slurm/slurm.h)
1218
find_library(SLURM_LIBRARY NAMES libslurm.so)
1319
find_package_handle_standard_args(SLURM DEFAULT_MSG SLURM_LIBRARY SLURM_INCLUDE_DIR)
@@ -23,6 +29,13 @@ else (NOT SLURM_FOUND)
2329
endif (NOT SLURM_FOUND)
2430
mark_as_advanced (SLURM_LIBRARIES SLURM_INCLUDE_DIRS)
2531

26-
add_library (spank_ibm_qrun MODULE spank_ibm_qrun.c)
32+
add_library (${TARGET_NAME} MODULE spank_ibm_qrun.c)
2733
include_directories (BEFORE ${SLURM_INCLUDE_DIRS})
28-
set_target_properties (spank_ibm_qrun PROPERTIES PREFIX "" SUFFIX "" OUTPUT_NAME "spank_ibm_qrun.so")
34+
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../commands/qrun/daapi_c/)
35+
target_link_libraries(${TARGET_NAME} PRIVATE ${DAAPI_C_LIB})
36+
target_link_libraries(${TARGET_NAME} PUBLIC "-lssl")
37+
target_link_libraries(${TARGET_NAME} PUBLIC "-lcrypto")
38+
target_link_libraries(${TARGET_NAME} PUBLIC "-ldl")
39+
target_link_libraries(${TARGET_NAME} PUBLIC "-lm")
40+
target_link_libraries(${TARGET_NAME} PUBLIC "-lpthread")
41+
set_target_properties (${TARGET_NAME} PROPERTIES PREFIX "" SUFFIX "" OUTPUT_NAME "spank_ibm_qrun.so")

0 commit comments

Comments
 (0)