diff --git a/deploy/sast-ai-chart/templates/deployment.yaml b/deploy/sast-ai-chart/templates/deployment.yaml
index 27743d1..e75aba2 100644
--- a/deploy/sast-ai-chart/templates/deployment.yaml
+++ b/deploy/sast-ai-chart/templates/deployment.yaml
@@ -54,6 +54,22 @@ spec:
secretKeyRef:
name: {{ include "sast-ai.postgresql.secretName" . }}
key: {{ include "sast-ai.postgresql.secretKey" . }}
+ # AWS S3 credentials for DVC (MinIO)
+ - name: AWS_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: sast-ai-s3-credentials
+ key: access_key_id
+ - name: AWS_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: sast-ai-s3-credentials
+ key: secret_access_key
+ - name: AWS_S3_ENDPOINT_URL
+ valueFrom:
+ secretKeyRef:
+ name: sast-ai-s3-credentials
+ key: endpoint_url
# Application environment variables
{{- range $key, $value := .Values.app.env }}
- name: {{ $key }}
diff --git a/pom.xml b/pom.xml
index eea9339..1728363 100644
--- a/pom.xml
+++ b/pom.xml
@@ -162,6 +162,12 @@
quarkus-jacoco
test
+
+
+ org.yaml
+ snakeyaml
+ 2.5
+
org.jsoup
jsoup
diff --git a/src/main/docker/Dockerfile.jvm b/src/main/docker/Dockerfile.jvm
index 69c976e..bea6962 100644
--- a/src/main/docker/Dockerfile.jvm
+++ b/src/main/docker/Dockerfile.jvm
@@ -82,6 +82,14 @@ FROM registry.access.redhat.com/ubi9/openjdk-21:1.21
ENV LANGUAGE='en_US:en'
+# Install DVC CLI and dependencies for data version control
+USER 0
+RUN microdnf install -y python3 python3-pip git && \
+ pip3 install --no-cache-dir dvc dvc-s3 && \
+ microdnf clean all && \
+ chown 185:185 /usr/local/bin/dvc && \
+ chmod 700 /usr/local/bin/dvc
+USER 185
# We make four distinct layers so if there are application changes the library layers can be re-used
COPY --chown=185 target/quarkus-app/lib/ /deployments/lib/
diff --git a/src/main/java/com/redhat/sast/api/exceptions/DvcException.java b/src/main/java/com/redhat/sast/api/exceptions/DvcException.java
new file mode 100644
index 0000000..d17da6f
--- /dev/null
+++ b/src/main/java/com/redhat/sast/api/exceptions/DvcException.java
@@ -0,0 +1,28 @@
+package com.redhat.sast.api.exceptions;
+
+/**
+ * Exception thrown when DVC operations fail.
+ * This includes failures in fetching data from DVC repositories,
+ * parsing DVC YAML files, or DVC command execution errors.
+ */
+public class DvcException extends RuntimeException {
+
+ /**
+ * Constructs a new DvcException with the specified detail message.
+ *
+ * @param message the detail message
+ */
+ public DvcException(String message) {
+ super(message);
+ }
+
+ /**
+ * Constructs a new DvcException with the specified detail message and cause.
+ *
+ * @param message the detail message
+ * @param cause the cause of this exception
+ */
+ public DvcException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/src/main/java/com/redhat/sast/api/service/DvcService.java b/src/main/java/com/redhat/sast/api/service/DvcService.java
new file mode 100644
index 0000000..f76a948
--- /dev/null
+++ b/src/main/java/com/redhat/sast/api/service/DvcService.java
@@ -0,0 +1,121 @@
+package com.redhat.sast.api.service;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+
+import org.eclipse.microprofile.config.inject.ConfigProperty;
+import org.yaml.snakeyaml.LoaderOptions;
+import org.yaml.snakeyaml.Yaml;
+import org.yaml.snakeyaml.constructor.SafeConstructor;
+
+import com.redhat.sast.api.exceptions.DvcException;
+import com.redhat.sast.api.util.dvc.ProcessExecutor;
+
+import jakarta.annotation.Nonnull;
+import jakarta.enterprise.context.ApplicationScoped;
+import lombok.extern.slf4j.Slf4j;
+
+@ApplicationScoped
+@Slf4j
+public class DvcService {
+
+ @ConfigProperty(name = "dvc.repo.url")
+ String dvcRepoUrl;
+
+ @ConfigProperty(name = "dvc.batch.yaml.path")
+ String batchYamlPath;
+
+ /**
+ * Get list of NVRs from DVC repository by version tag
+ * Fetches YAML file from DVC and extracts NVR list
+ *
+ * @param version DVC version tag (e.g., "1.0.0" or "v1.0.0")
+ * @return List of package NVR strings (empty list if no NVRs found)
+ * @throws DvcException if DVC fetch fails or parsing fails
+ * @throws IllegalArgumentException if version is null or empty
+ */
+ public List getNvrList(@Nonnull String version) {
+
+ String yamlContent = fetchNvrConfigFromDvc(version);
+ LOGGER.debug("Raw YAML content from DVC ({} bytes)", yamlContent.length());
+
+ Object object;
+ try {
+ LoaderOptions loaderOptions = new LoaderOptions();
+ Yaml yaml = new Yaml(new SafeConstructor(loaderOptions));
+ object = yaml.load(yamlContent);
+ } catch (RuntimeException e) {
+ throw new DvcException("Failed to parse YAML content for version " + version, e);
+ }
+ Set nvrSet = new HashSet<>();
+
+ try {
+ if (object instanceof Map) {
+ // YAML has a map structure, find list of strings
+ Map> map = (Map>) object;
+ for (List stringList : map.values()) {
+ nvrSet.addAll(stringList);
+ }
+ } else if (object instanceof List) {
+ // YAML is just a list of NVRs
+ List list = (List) object;
+ nvrSet.addAll(list);
+ }
+ } catch (RuntimeException e) {
+ throw new DvcException(
+ "Unexpected data type while parsing YAML for version " + version
+ + " (expected Map> or List)",
+ e);
+ }
+ if (nvrSet.isEmpty()) {
+ LOGGER.warn("No NVRs found in YAML for DVC version {}", version);
+ return Collections.emptyList();
+ }
+ return nvrSet.stream().toList();
+ }
+
+ /**
+ * Validates repo tag version - expected semantic version (v1.0.0)
+ */
+ private void validateDvcInputs(String version) {
+ // Prevent ReDoS by limiting input length
+ if (version.length() > 100) {
+ String displayVersion = version.substring(0, 50) + "...";
+ throw new IllegalArgumentException("DVC version too long (max 100 characters): " + displayVersion);
+ }
+
+ if (!version.matches(
+ "^(v?\\d+\\.\\d+\\.\\d+(?:-[a-zA-Z0-9]+)?(?:\\+[a-zA-Z0-9]+)?|[a-zA-Z][a-zA-Z0-9_-]{0,49}|\\d{4}-\\d{2}-\\d{2})$")) {
+ throw new IllegalArgumentException("Invalid DVC version format: '" + version
+ + "' - expected semantic version (v1.0.0) or valid identifier");
+ }
+ }
+
+ private String fetchNvrConfigFromDvc(@Nonnull String version) {
+ validateDvcInputs(version);
+ LOGGER.debug("Executing DVC get command: repo={}, path={}, version={}", dvcRepoUrl, batchYamlPath, version);
+ Path tempFile = null;
+ try {
+ tempFile = Files.createTempFile("dvc-fetch-", ".tmp");
+ ProcessExecutor.runDvcCommand(dvcRepoUrl, batchYamlPath, version, tempFile);
+ // read content from temp file which has filled by DVC command
+ return Files.readString(tempFile, java.nio.charset.StandardCharsets.UTF_8);
+ } catch (IOException e) {
+ throw new DvcException("I/O error during DVC fetch operation", e);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new DvcException("DVC fetch operation was interrupted", e);
+ } finally {
+ // clean up temp file
+ if (tempFile != null) {
+ try {
+ Files.deleteIfExists(tempFile);
+ } catch (IOException e) {
+ LOGGER.warn("[ACTION REQUIRED] Failed to delete temp file: {}", tempFile, e);
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java b/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java
new file mode 100644
index 0000000..5104bb4
--- /dev/null
+++ b/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java
@@ -0,0 +1,53 @@
+package com.redhat.sast.api.util.dvc;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import com.redhat.sast.api.exceptions.DvcException;
+
+import jakarta.enterprise.context.ApplicationScoped;
+import lombok.extern.slf4j.Slf4j;
+
+@ApplicationScoped
+@Slf4j
+public class ProcessExecutor {
+
+ private static final AtomicBoolean isProcessRunning = new AtomicBoolean(false);
+
+ public static void runDvcCommand(String dvcRepoUrl, String batchYamlPath, String version, Path tempFile)
+ throws InterruptedException, IOException {
+
+ if (isProcessRunning.get()) {
+ throw new DvcException("DVC command is already running...");
+ }
+ isProcessRunning.set(true);
+
+ try {
+ ProcessBuilder processBuilder = new ProcessBuilder(
+ "dvc", "get", dvcRepoUrl, batchYamlPath, "--rev", version, "-o", tempFile.toString(), "--force");
+ Process process = processBuilder.start();
+ // read stderr for error messages
+ String error = new String(process.getErrorStream().readAllBytes(), java.nio.charset.StandardCharsets.UTF_8);
+ boolean finished = process.waitFor(60, TimeUnit.SECONDS);
+ if (!finished) {
+ LOGGER.error("DVC command timed out after 60 seconds");
+ throw new DvcException("DVC command timed out after 60 seconds");
+ }
+ int exitCode = process.exitValue();
+
+ if (exitCode != 0) {
+ LOGGER.error("DVC command failed with exit code {}: {}", exitCode, error);
+ throw new DvcException("Failed to fetch data from DVC (exit code " + exitCode + "): " + error);
+ }
+
+ // force kill process if still running
+ if (process.isAlive()) {
+ process.destroyForcibly();
+ }
+ } finally {
+ isProcessRunning.set(false);
+ }
+ }
+}
diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties
index 3becf57..779daec 100644
--- a/src/main/resources/application.properties
+++ b/src/main/resources/application.properties
@@ -40,6 +40,10 @@ url-validation.request-timeout=30s
sast.ai.batch.job.polling.interval=5000
sast.ai.batch.job.timeout=3600000
+# DVC configuration
+dvc.repo.url=https://github.com/RHEcosystemAppEng/sast-ai-dvc
+# default value - might change in future
+dvc.batch.yaml.path=testing-data-nvrs.yaml
# OSH (Open Scan Hub) Integration Configuration
# Master toggle for OSH integration
osh.integration.enabled=false