Skip to content

Commit 12e6153

Browse files
committed
feat: rename TRUE/FALSE POSITIVE terminology to ISSUE/NON_ISSUE
This change addresses the confusion between business domain terminology and ML classification metrics by introducing clear separation: security analysis (ISSUE/NON_ISSUE) ML evaluation (TRUE_POSITIVE/FALSE_POSITIVE) contexts.
1 parent 151cdcc commit 12e6153

33 files changed

+650
-487
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ HuggingFace model ([all-mpnet-base-v2](https://huggingface.co/sentence-transform
4848

4949
#### Evaluation
5050
- Applies metrics (from Ragas library) to assess the quality of model outputs.
51-
- **Note:** SAST-AI-Workflow is primarily focused on identifying false alarms (False Positives).
51+
- **Note:** SAST-AI-Workflow is primarily focused on identifying false alarms (Non-Issues).
5252

5353

5454
## 🔌 Installation & Setup

config/default_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ EMBEDDINGS_LLM_URL: "http://<<please-set-embedding-llm-url>>"
88
EMBEDDINGS_LLM_MODEL_NAME: "embedding-llm-model"
99

1010
INPUT_REPORT_FILE_PATH: "/path/to/report.html"
11-
KNOWN_FALSE_POSITIVE_FILE_PATH: "/path/to/known_false_positives_file"
11+
KNOWN_NON_ISSUES_FILE_PATH: "/path/to/known_non_issues_file"
1212
OUTPUT_FILE_PATH: "/path/to/output_excel.xlsx"
1313
AGGREGATE_RESULTS_G_SHEET: ""
1414
HUMAN_VERIFIED_FILE_PATH: ""

deploy/Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ TK := tkn --context $(CONTEXT)
1212

1313
# Pipeline parameters (overrideable on the CLI):
1414
REPO_REMOTE_URL ?= source/code/url
15-
FALSE_POSITIVES_URL ?= false/positives/url
15+
NON_ISSUES_URL ?= false/positives/url
1616

1717
LLM_URL ?= http://<<please-set-llm-url>>
1818
LLM_MODEL_NAME ?= llm-model
@@ -163,7 +163,7 @@ run:
163163
$(TK) pipeline start sast-ai-workflow-pipeline \
164164
-n $(NAMESPACE) \
165165
-p REPO_REMOTE_URL="$(REPO_REMOTE_URL)" \
166-
-p FALSE_POSITIVES_URL="$(FALSE_POSITIVES_URL)" \
166+
-p NON_ISSUES_URL="$(NON_ISSUES_URL)" \
167167
-p LLM_URL="$(LLM_URL)" \
168168
-p LLM_MODEL_NAME="$(LLM_MODEL_NAME)" \
169169
-p EMBEDDINGS_LLM_URL="$(EMBEDDINGS_LLM_URL)" \
@@ -172,6 +172,7 @@ run:
172172
-p PROJECT_VERSION="$(PROJECT_VERSION)" \
173173
-p INPUT_REPORT_FILE_PATH="$(INPUT_REPORT_FILE_PATH)" \
174174
-p AGGREGATE_RESULTS_G_SHEET="$(AGGREGATE_RESULTS_G_SHEET)" \
175+
-p USE_KNOWN_NON_ISSUES_FILE="true" \
175176
--workspace name=shared-workspace,claimName=sast-ai-workflow-pvc \
176177
--workspace name=gitlab-token-ws,secret=gitlab-token-secret \
177178
--workspace name=llm-credentials-ws,secret=sast-ai-default-llm-creds \
@@ -187,7 +188,7 @@ run:
187188
echo " tkn --context $(CONTEXT) pipeline start sast-ai-workflow-pipeline \\"; \
188189
echo " -n $(NAMESPACE) \\"; \
189190
echo " -p REPO_REMOTE_URL=\"$(REPO_REMOTE_URL)\" \\"; \
190-
echo " -p FALSE_POSITIVES_URL=\"$(FALSE_POSITIVES_URL)\" \\"; \
191+
echo " -p NON_ISSUES_URL=\"$(NON_ISSUES_URL)\" \\"; \
191192
echo " -p LLM_URL=\"$(LLM_URL)\" \\"; \
192193
echo " -p LLM_MODEL_NAME=\"$(LLM_MODEL_NAME)\" \\"; \
193194
echo " -p EMBEDDINGS_LLM_URL=\"$(EMBEDDINGS_LLM_URL)\" \\"; \
@@ -196,6 +197,7 @@ run:
196197
echo " -p PROJECT_VERSION=\"$(PROJECT_VERSION)\" \\"; \
197198
echo " -p INPUT_REPORT_FILE_PATH=\"$(INPUT_REPORT_FILE_PATH)\" \\"; \
198199
echo " -p AGGREGATE_RESULTS_G_SHEET=\"$(AGGREGATE_RESULTS_G_SHEET)\" \\"; \
200+
echo " -p USE_KNOWN_NON_ISSUES_FILE=\"true\" \\"; \
199201
echo " --workspace name=shared-workspace,claimName=sast-ai-workflow-pvc \\"; \
200202
echo " --workspace name=gitlab-token-ws,secret=gitlab-token-secret \\"; \
201203
echo " --workspace name=llm-credentials-ws,secret=sast-ai-default-llm-creds \\"; \

deploy/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ make all PROJECT_NAME="systemd" \
121121
PROJECT_VERSION="257-9" \
122122
REPO_REMOTE_URL="https://download.devel.redhat.com/brewroot/vol/rhel-10/packages/systemd/257/9.el10/src/systemd-257-9.el10.src.rpm" \
123123
INPUT_REPORT_FILE_PATH="https://docs.google.com/spreadsheets/d/1NPGmERBsSTdHjQK2vEocQ-PvQlRGGLMds02E_RGF8vY/export?format=csv" \
124-
FALSE_POSITIVES_URL="https://gitlab.cee.redhat.com/osh/known-false-positives/-/raw/master/systemd/ignore.err"
124+
NON_ISSUES_URL="https://gitlab.cee.redhat.com/osh/known-false-positives/-/raw/master/systemd/ignore.err"
125125
```
126126

127127
### 6. Step-by-Step Alternative

deploy/tekton/pipeline.yaml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
- name: REPO_REMOTE_URL
1414
type: string
1515
description: "Source code URL (RPM package or Git repository URL)"
16-
- name: FALSE_POSITIVES_URL
16+
- name: NON_ISSUES_URL
1717
type: string
18-
description: "GitLab repository URL for known false positives (optional)"
18+
description: "GitLab repository URL for known non-issues (optional)"
1919
- name: INPUT_REPORT_FILE_PATH
2020
type: string
2121
description: "Google Spreadsheet URL or OSH report .html file path"
@@ -39,6 +39,10 @@ spec:
3939
- name: AGGREGATE_RESULTS_G_SHEET
4040
type: string
4141
default: ""
42+
- name: USE_KNOWN_NON_ISSUES_FILE
43+
type: string
44+
description: "Whether to use known non-issues file for filtering (true/false)"
45+
default: "true"
4246
- name: GDRIVE_FOLDER_ID
4347
type: string
4448
description: "Google Drive folder ID for uploading SAST results (optional)"
@@ -63,8 +67,8 @@ spec:
6367
value: "$(params.REPO_REMOTE_URL)"
6468
- name: INPUT_REPORT_FILE_PATH
6569
value: "$(params.INPUT_REPORT_FILE_PATH)"
66-
- name: FALSE_POSITIVES_URL
67-
value: "$(params.FALSE_POSITIVES_URL)"
70+
- name: NON_ISSUES_URL
71+
value: "$(params.NON_ISSUES_URL)"
6872
workspaces:
6973
- name: google-sa-json-ws
7074
workspace: google-sa-json-ws
@@ -87,8 +91,8 @@ spec:
8791
taskRef:
8892
name: fetch-false-positives
8993
params:
90-
- name: FALSE_POSITIVES_URL
91-
value: "$(params.FALSE_POSITIVES_URL)"
94+
- name: NON_ISSUES_URL
95+
value: "$(params.NON_ISSUES_URL)"
9296
workspaces:
9397
- name: false-positives-workspace
9498
workspace: shared-workspace
@@ -121,6 +125,8 @@ spec:
121125
value: "$(params.EMBEDDINGS_LLM_MODEL_NAME)"
122126
- name: AGGREGATE_RESULTS_G_SHEET
123127
value: "$(params.AGGREGATE_RESULTS_G_SHEET)"
128+
- name: USE_KNOWN_NON_ISSUES_FILE
129+
value: "$(params.USE_KNOWN_NON_ISSUES_FILE)"
124130
workspaces:
125131
- name: source-workspace
126132
workspace: shared-workspace

deploy/tekton/pipelinerun.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ spec:
1212
value: "project-version"
1313
- name: REPO_REMOTE_URL
1414
value: "rpm-package-url"
15-
- name: FALSE_POSITIVES_URL
15+
- name: NON_ISSUES_URL
1616
value: "git-lab-url"
1717
- name: LLM_URL
1818
value: ""
@@ -24,6 +24,8 @@ spec:
2424
value: "embedding-llm-model"
2525
- name: INPUT_REPORT_FILE_PATH
2626
value: "input-report"
27+
- name: USE_KNOWN_NON_ISSUES_FILE
28+
value: "true"
2729
workspaces:
2830
- name: shared-workspace
2931
persistentVolumeClaim:

deploy/tekton/tasks/execute_sast_ai_workflow.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ spec:
3333
- name: EMBEDDINGS_LLM_MODEL_NAME
3434
type: string
3535
default: ""
36-
- name: USE_KNOWN_FALSE_POSITIVE_FILE
36+
- name: USE_KNOWN_NON_ISSUES_FILE
3737
type: string
38-
description: "Whether to use known false positive file for filtering (true/false)"
38+
description: "Whether to use known non-issues file for filtering (true/false)"
3939
default: "true"
4040
workspaces:
4141
- name: source-workspace
@@ -53,10 +53,10 @@ spec:
5353
value: "$(params.PROJECT_VERSION)"
5454
- name: REPO_LOCAL_PATH
5555
value: "$(params.REPO_LOCAL_PATH)"
56-
- name: KNOWN_FALSE_POSITIVE_FILE_PATH
56+
- name: KNOWN_NON_ISSUES_FILE_PATH
5757
value: "$(workspaces.false-positives-workspace.path)/ignore.err"
58-
- name: USE_KNOWN_FALSE_POSITIVE_FILE
59-
value: "$(params.USE_KNOWN_FALSE_POSITIVE_FILE)"
58+
- name: USE_KNOWN_NON_ISSUES_FILE
59+
value: "$(params.USE_KNOWN_NON_ISSUES_FILE)"
6060
- name: INPUT_REPORT_FILE_PATH
6161
value: "$(params.INPUT_REPORT_FILE_PATH)"
6262
- name: AGGREGATE_RESULTS_G_SHEET

deploy/tekton/tasks/fetch_false_positives.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ metadata:
44
name: fetch-false-positives
55
spec:
66
params:
7-
- name: FALSE_POSITIVES_URL
7+
- name: NON_ISSUES_URL
88
type: string
9-
description: "Optional GitLab URL containing known false positives"
9+
description: "Optional GitLab URL containing known non-issues"
1010
workspaces:
1111
- name: false-positives-workspace
1212
description: "Workspace to store the downloaded ignore.err"
@@ -19,7 +19,7 @@ spec:
1919
script: |
2020
#!/usr/bin/env sh
2121
set -euo pipefail
22-
FP_URL="$(params.FALSE_POSITIVES_URL)"
22+
FP_URL="$(params.NON_ISSUES_URL)"
2323
if [ -z "$FP_URL" ]; then
2424
echo "No falsePositivesUrl provided; skipping fetch..."
2525
exit 0
@@ -31,11 +31,11 @@ spec:
3131
GITLAB_TOKEN=$(cat "$TOKEN_FILE")
3232
echo "GitLab token found. Fetching file with authentication..."
3333
curl --retry 3 --retry-delay 5 -k -H "PRIVATE-TOKEN: $GITLAB_TOKEN" -fL "$FP_URL" -o "$(workspaces.false-positives-workspace.path)/ignore.err" \
34-
|| (echo "Error: Could not fetch false positives file with token." && exit 1)
34+
|| (echo "Error: Could not fetch non-issues file with token." && exit 1)
3535
else
3636
echo "No GitLab token file found; attempting unauthenticated fetch..."
3737
curl --retry 3 --retry-delay 5 -k -fL "$FP_URL" -o "$(workspaces.false-positives-workspace.path)/ignore.err" \
38-
|| (echo "Error: Could not fetch false positives file unauthenticated." && exit 1)
38+
|| (echo "Error: Could not fetch non-issues file unauthenticated." && exit 1)
3939
fi
4040
if [ -f "$(workspaces.false-positives-workspace.path)/ignore.err" ]; then
4141
echo "Success"

deploy/tekton/tasks/validate_urls.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ spec:
1010
- name: INPUT_REPORT_FILE_PATH
1111
type: string
1212
description: "Google Spreadsheet URL or OSH report .html file path"
13-
- name: FALSE_POSITIVES_URL
13+
- name: NON_ISSUES_URL
1414
type: string
15-
description: "GitLab URL containing known false positive issues"
15+
description: "GitLab URL containing known non-issue cases"
1616
workspaces:
1717
- name: google-sa-json-ws
1818
description: "Workspace containing Google service account key file"
@@ -29,14 +29,14 @@ spec:
2929
image: curlimages/curl:latest
3030
script: |
3131
#!/usr/bin/env sh
32-
FP_URL="$(params.FALSE_POSITIVES_URL)"
33-
echo "Validating false positives URL: $FP_URL"
32+
FP_URL="$(params.NON_ISSUES_URL)"
33+
echo "Validating non-issues URL: $FP_URL"
3434
# If blank, skip. If not, ensure it's valid
3535
if [ -z "$FP_URL" ]; then
36-
echo "No false positives URL provided; skipping validation."
36+
echo "No non-issues URL provided; skipping validation."
3737
exit 0
3838
fi
39-
curl -ksSfL "$FP_URL" || (echo "Error: False positives URL is invalid" && exit 1)
39+
curl -ksSfL "$FP_URL" || (echo "Error: Non-issues URL is invalid" && exit 1)
4040
4141
- name: validate-report-file
4242
image: 'python:3.11-slim'

docs/setup.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ podman run -d --name sast-ai-app \
8585
-e EMBEDDINGS_API_KEY=<your_key> \
8686
-e EMBEDDINGS_LLM_MODEL_NAME=<<embeddings-llm-model-name>> \
8787
-e INPUT_REPORT_FILE_PATH=https://docs.google.com/spreadsheets/d/<sheet-id> \
88-
-e KNOWN_FALSE_POSITIVE_FILE_PATH=/path/to/ignore.err \
88+
-e KNOWN_NON_ISSUES_FILE_PATH=/path/to/ignore.err \
8989
-e OUTPUT_FILE_PATH=https://docs.google.com/spreadsheets/d/<sheet-id> \
9090
quay.io/ecosystem-appeng/sast-ai-workflow:latest
9191
```
9292
Replace <your_key> with the actual LLM API key.
9393
9494
> **Note:**
95-
> Make sure the file paths required by the application (e.g., the HTML report, known false positives, etc.) point to the correct locations inside the container. For instance, if these files are copied into `/app`, update your configuration to reference `/app/<filename>` rather than the host paths.
95+
> Make sure the file paths required by the application (e.g., the HTML report, known non-issues, etc.) point to the correct locations inside the container. For instance, if these files are copied into `/app`, update your configuration to reference `/app/<filename>` rather than the host paths.
9696
>
9797
> If you ever need to run an interactive shell in your container (overriding the default entrypoint), use:
9898
>
@@ -117,7 +117,7 @@ environment variables.
117117
| EMBEDDINGS_LLM_URL | http://\<<please-set-embedding-llm-url\>> || https://integrate.api.nvidia.com/v1 | URL of the embedding model endpoint. |
118118
| EMBEDDINGS_LLM_MODEL_NAME | \<<please-set-embeddings-llm-model-name\>>|| all-mpnet-base-v2 | Model used for generating embeddings. |
119119
| INPUT_REPORT_FILE_PATH | /path/to/report.html || /path/to/report.html or https://docs.google.com/spreadsheets/d/\<sheet-id\> | Path to the SAST HTML report or URL of a Google Sheet containing the report. |
120-
| KNOWN_FALSE_POSITIVE_FILE_PATH | /path/to/ignore.err || /path/to/ignore.err | Path to the file containing known false positives data. |
120+
| KNOWN_NON_ISSUES_FILE_PATH | /path/to/ignore.err || /path/to/ignore.err | Path to the file containing known non-issues data. |
121121
| OUTPUT_FILE_PATH | /path/to/output_excel.xlsx || /path/to/output.xlsx | Path where the generated Excel report will be saved. |
122122
| LIBCLANG_PATH | /path/to/libclang || /usr/lib/llvm-12/lib/libclang.so | Path of to your libclang location. |
123123
| COMPILE_COMMANDS_JSON_PATH | /path/to/compile_commands.json | | /path/to/compile_commands.json | Path to the generated `compile_commands.json` file for the analyzed project. Required only for C projects. |
@@ -126,7 +126,7 @@ environment variables.
126126
| CHUNK_SEPARATORS | ["\n\n", "\n", ".", ";", ",", " ", ""] | | ["\n\n", "\n", ".", ";"] | Ordered list of separators to use when splitting text into chunks. |
127127
| CONFIG_H_PATH | /path/to/config.h | | /path/to/config.h | *(Optional)* Path to the generated `config.h` containing macro definitions. Used for accurate Clang parsing, but not strictly required. |
128128
| SERVICE_ACCOUNT_JSON_PATH | "" | | /path/to/sheet-access-bot-abc123.json | Path to the JSON file for the Google service account used to access Google Sheets. Mandatory only if using a Google Sheet as input. |
129-
| USE_KNOWN_FALSE_POSITIVE_FILE | true | | true | Flag indicating whether to use the known false positives file in the pipeline as an input. |
129+
| USE_KNOWN_NON_ISSUES_FILE | true | | true | Flag indicating whether to use the known non-issues file in the pipeline as an input. |
130130
| SIMILARITY_ERROR_THRESHOLD | 2 | | 3 | Number of Documents to return from known issues DB. |
131131
| MAX_ANALYSIS_ITERATIONS | 2 | | 3 | Maximum number of analysis loops allowed for any single issue. |
132132
| WRITE_RESULTS_INCLUDE_NON_FINAL | true | | true | Whether to include issues with is_final="FALSE" |

0 commit comments

Comments
 (0)