diff --git a/deploy/sast-ai-chart/templates/deployment.yaml b/deploy/sast-ai-chart/templates/deployment.yaml index 27743d1..e75aba2 100644 --- a/deploy/sast-ai-chart/templates/deployment.yaml +++ b/deploy/sast-ai-chart/templates/deployment.yaml @@ -54,6 +54,22 @@ spec: secretKeyRef: name: {{ include "sast-ai.postgresql.secretName" . }} key: {{ include "sast-ai.postgresql.secretKey" . }} + # AWS S3 credentials for DVC (MinIO) + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: sast-ai-s3-credentials + key: access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: sast-ai-s3-credentials + key: secret_access_key + - name: AWS_S3_ENDPOINT_URL + valueFrom: + secretKeyRef: + name: sast-ai-s3-credentials + key: endpoint_url # Application environment variables {{- range $key, $value := .Values.app.env }} - name: {{ $key }} diff --git a/pom.xml b/pom.xml index eea9339..1728363 100644 --- a/pom.xml +++ b/pom.xml @@ -162,6 +162,12 @@ quarkus-jacoco test + + + org.yaml + snakeyaml + 2.5 + org.jsoup jsoup diff --git a/src/main/docker/Dockerfile.jvm b/src/main/docker/Dockerfile.jvm index 69c976e..bea6962 100644 --- a/src/main/docker/Dockerfile.jvm +++ b/src/main/docker/Dockerfile.jvm @@ -82,6 +82,14 @@ FROM registry.access.redhat.com/ubi9/openjdk-21:1.21 ENV LANGUAGE='en_US:en' +# Install DVC CLI and dependencies for data version control +USER 0 +RUN microdnf install -y python3 python3-pip git && \ + pip3 install --no-cache-dir dvc dvc-s3 && \ + microdnf clean all && \ + chown 185:185 /usr/local/bin/dvc && \ + chmod 700 /usr/local/bin/dvc +USER 185 # We make four distinct layers so if there are application changes the library layers can be re-used COPY --chown=185 target/quarkus-app/lib/ /deployments/lib/ diff --git a/src/main/java/com/redhat/sast/api/exceptions/DvcException.java b/src/main/java/com/redhat/sast/api/exceptions/DvcException.java new file mode 100644 index 0000000..d17da6f --- /dev/null +++ b/src/main/java/com/redhat/sast/api/exceptions/DvcException.java @@ -0,0 +1,28 @@ +package com.redhat.sast.api.exceptions; + +/** + * Exception thrown when DVC operations fail. + * This includes failures in fetching data from DVC repositories, + * parsing DVC YAML files, or DVC command execution errors. + */ +public class DvcException extends RuntimeException { + + /** + * Constructs a new DvcException with the specified detail message. + * + * @param message the detail message + */ + public DvcException(String message) { + super(message); + } + + /** + * Constructs a new DvcException with the specified detail message and cause. + * + * @param message the detail message + * @param cause the cause of this exception + */ + public DvcException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/com/redhat/sast/api/service/DvcService.java b/src/main/java/com/redhat/sast/api/service/DvcService.java new file mode 100644 index 0000000..f76a948 --- /dev/null +++ b/src/main/java/com/redhat/sast/api/service/DvcService.java @@ -0,0 +1,121 @@ +package com.redhat.sast.api.service; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +import org.eclipse.microprofile.config.inject.ConfigProperty; +import org.yaml.snakeyaml.LoaderOptions; +import org.yaml.snakeyaml.Yaml; +import org.yaml.snakeyaml.constructor.SafeConstructor; + +import com.redhat.sast.api.exceptions.DvcException; +import com.redhat.sast.api.util.dvc.ProcessExecutor; + +import jakarta.annotation.Nonnull; +import jakarta.enterprise.context.ApplicationScoped; +import lombok.extern.slf4j.Slf4j; + +@ApplicationScoped +@Slf4j +public class DvcService { + + @ConfigProperty(name = "dvc.repo.url") + String dvcRepoUrl; + + @ConfigProperty(name = "dvc.batch.yaml.path") + String batchYamlPath; + + /** + * Get list of NVRs from DVC repository by version tag + * Fetches YAML file from DVC and extracts NVR list + * + * @param version DVC version tag (e.g., "1.0.0" or "v1.0.0") + * @return List of package NVR strings (empty list if no NVRs found) + * @throws DvcException if DVC fetch fails or parsing fails + * @throws IllegalArgumentException if version is null or empty + */ + public List getNvrList(@Nonnull String version) { + + String yamlContent = fetchNvrConfigFromDvc(version); + LOGGER.debug("Raw YAML content from DVC ({} bytes)", yamlContent.length()); + + Object object; + try { + LoaderOptions loaderOptions = new LoaderOptions(); + Yaml yaml = new Yaml(new SafeConstructor(loaderOptions)); + object = yaml.load(yamlContent); + } catch (RuntimeException e) { + throw new DvcException("Failed to parse YAML content for version " + version, e); + } + Set nvrSet = new HashSet<>(); + + try { + if (object instanceof Map) { + // YAML has a map structure, find list of strings + Map> map = (Map>) object; + for (List stringList : map.values()) { + nvrSet.addAll(stringList); + } + } else if (object instanceof List) { + // YAML is just a list of NVRs + List list = (List) object; + nvrSet.addAll(list); + } + } catch (RuntimeException e) { + throw new DvcException( + "Unexpected data type while parsing YAML for version " + version + + " (expected Map> or List)", + e); + } + if (nvrSet.isEmpty()) { + LOGGER.warn("No NVRs found in YAML for DVC version {}", version); + return Collections.emptyList(); + } + return nvrSet.stream().toList(); + } + + /** + * Validates repo tag version - expected semantic version (v1.0.0) + */ + private void validateDvcInputs(String version) { + // Prevent ReDoS by limiting input length + if (version.length() > 100) { + String displayVersion = version.substring(0, 50) + "..."; + throw new IllegalArgumentException("DVC version too long (max 100 characters): " + displayVersion); + } + + if (!version.matches( + "^(v?\\d+\\.\\d+\\.\\d+(?:-[a-zA-Z0-9]+)?(?:\\+[a-zA-Z0-9]+)?|[a-zA-Z][a-zA-Z0-9_-]{0,49}|\\d{4}-\\d{2}-\\d{2})$")) { + throw new IllegalArgumentException("Invalid DVC version format: '" + version + + "' - expected semantic version (v1.0.0) or valid identifier"); + } + } + + private String fetchNvrConfigFromDvc(@Nonnull String version) { + validateDvcInputs(version); + LOGGER.debug("Executing DVC get command: repo={}, path={}, version={}", dvcRepoUrl, batchYamlPath, version); + Path tempFile = null; + try { + tempFile = Files.createTempFile("dvc-fetch-", ".tmp"); + ProcessExecutor.runDvcCommand(dvcRepoUrl, batchYamlPath, version, tempFile); + // read content from temp file which has filled by DVC command + return Files.readString(tempFile, java.nio.charset.StandardCharsets.UTF_8); + } catch (IOException e) { + throw new DvcException("I/O error during DVC fetch operation", e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new DvcException("DVC fetch operation was interrupted", e); + } finally { + // clean up temp file + if (tempFile != null) { + try { + Files.deleteIfExists(tempFile); + } catch (IOException e) { + LOGGER.warn("[ACTION REQUIRED] Failed to delete temp file: {}", tempFile, e); + } + } + } + } +} diff --git a/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java b/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java new file mode 100644 index 0000000..5104bb4 --- /dev/null +++ b/src/main/java/com/redhat/sast/api/util/dvc/ProcessExecutor.java @@ -0,0 +1,53 @@ +package com.redhat.sast.api.util.dvc; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import com.redhat.sast.api.exceptions.DvcException; + +import jakarta.enterprise.context.ApplicationScoped; +import lombok.extern.slf4j.Slf4j; + +@ApplicationScoped +@Slf4j +public class ProcessExecutor { + + private static final AtomicBoolean isProcessRunning = new AtomicBoolean(false); + + public static void runDvcCommand(String dvcRepoUrl, String batchYamlPath, String version, Path tempFile) + throws InterruptedException, IOException { + + if (isProcessRunning.get()) { + throw new DvcException("DVC command is already running..."); + } + isProcessRunning.set(true); + + try { + ProcessBuilder processBuilder = new ProcessBuilder( + "dvc", "get", dvcRepoUrl, batchYamlPath, "--rev", version, "-o", tempFile.toString(), "--force"); + Process process = processBuilder.start(); + // read stderr for error messages + String error = new String(process.getErrorStream().readAllBytes(), java.nio.charset.StandardCharsets.UTF_8); + boolean finished = process.waitFor(60, TimeUnit.SECONDS); + if (!finished) { + LOGGER.error("DVC command timed out after 60 seconds"); + throw new DvcException("DVC command timed out after 60 seconds"); + } + int exitCode = process.exitValue(); + + if (exitCode != 0) { + LOGGER.error("DVC command failed with exit code {}: {}", exitCode, error); + throw new DvcException("Failed to fetch data from DVC (exit code " + exitCode + "): " + error); + } + + // force kill process if still running + if (process.isAlive()) { + process.destroyForcibly(); + } + } finally { + isProcessRunning.set(false); + } + } +} diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 3becf57..779daec 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -40,6 +40,10 @@ url-validation.request-timeout=30s sast.ai.batch.job.polling.interval=5000 sast.ai.batch.job.timeout=3600000 +# DVC configuration +dvc.repo.url=https://github.com/RHEcosystemAppEng/sast-ai-dvc +# default value - might change in future +dvc.batch.yaml.path=testing-data-nvrs.yaml # OSH (Open Scan Hub) Integration Configuration # Master toggle for OSH integration osh.integration.enabled=false