Skip to content

Commit 55482ee

Browse files
committed
feat: rename TRUE/FALSE POSITIVE terminology to ISSUE/NON_ISSUE
This change addresses the confusion between business domain terminology and ML classification metrics by introducing clear separation: security analysis (ISSUE/NON_ISSUE) ML evaluation (TRUE_POSITIVE/FALSE_POSITIVE) contexts.
1 parent 7463e43 commit 55482ee

34 files changed

+722
-491
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ HuggingFace model ([all-mpnet-base-v2](https://huggingface.co/sentence-transform
4848

4949
#### Evaluation
5050
- Applies metrics (from Ragas library) to assess the quality of model outputs.
51-
- **Note:** SAST-AI-Workflow is primarily focused on identifying false alarms (False Positives).
51+
- **Note:** SAST-AI-Workflow is primarily focused on identifying false alarms (Non-Issues).
5252

5353

5454
## 🔌 Installation & Setup

config/default_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ EMBEDDINGS_LLM_URL: "http://<<please-set-embedding-llm-url>>"
88
EMBEDDINGS_LLM_MODEL_NAME: "embedding-llm-model"
99

1010
INPUT_REPORT_FILE_PATH: "/path/to/report.html"
11-
KNOWN_FALSE_POSITIVE_FILE_PATH: "/path/to/known_false_positives_file"
11+
KNOWN_NON_ISSUES_FILE_PATH: "/path/to/known_non_issues_file"
1212
OUTPUT_FILE_PATH: "/path/to/output_excel.xlsx"
1313
AGGREGATE_RESULTS_G_SHEET: ""
1414
HUMAN_VERIFIED_FILE_PATH: ""

deploy/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ CO := oc --context $(CONTEXT)
1111

1212
# Pipeline parameters (overrideable on the CLI):
1313
REPO_REMOTE_URL ?= source/code/url
14-
FALSE_POSITIVES_URL ?= false/positives/url
14+
NON_ISSUES_URL ?= false/positives/url
1515

1616
LLM_URL ?= http://<<please-set-llm-url>>
1717
LLM_MODEL_NAME ?= llm-model
@@ -168,13 +168,14 @@ run:
168168
-e 's|PROJECT_NAME_PLACEHOLDER|$(PROJECT_NAME)|g' \
169169
-e 's|PROJECT_VERSION_PLACEHOLDER|$(PROJECT_VERSION)|g' \
170170
-e 's|REPO_REMOTE_URL_PLACEHOLDER|$(REPO_REMOTE_URL)|g' \
171-
-e 's|FALSE_POSITIVES_URL_PLACEHOLDER|$(FALSE_POSITIVES_URL)|g' \
171+
-e 's|NON_ISSUES_URL_PLACEHOLDER|$(NON_ISSUES_URL)|g' \
172172
-e 's|LLM_URL_PLACEHOLDER|$(LLM_URL)|g' \
173173
-e 's|LLM_MODEL_NAME_PLACEHOLDER|$(LLM_MODEL_NAME)|g' \
174174
-e 's|EMBEDDINGS_LLM_URL_PLACEHOLDER|$(EMBEDDINGS_LLM_URL)|g' \
175175
-e 's|EMBEDDINGS_LLM_MODEL_NAME_PLACEHOLDER|$(EMBEDDINGS_LLM_MODEL_NAME)|g' \
176176
-e 's|INPUT_REPORT_FILE_PATH_PLACEHOLDER|$(INPUT_REPORT_FILE_PATH)|g' \
177177
-e 's|AGGREGATE_RESULTS_G_SHEET_PLACEHOLDER|$(AGGREGATE_RESULTS_G_SHEET)|g' \
178+
-e 's|USE_KNOWN_NON_ISSUES_FILE_PLACEHOLDER|$(USE_KNOWN_NON_ISSUES_FILE)|g' \
178179
tekton/pipelinerun.yaml > tekton/pipelinerun-temp.yaml
179180
@$(CO) apply -n $(NAMESPACE) -f tekton/pipelinerun-temp.yaml > /dev/null 2>&1
180181
@rm -f tekton/pipelinerun-temp.yaml

deploy/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ make all PROJECT_NAME="systemd" \
117117
PROJECT_VERSION="257-9" \
118118
REPO_REMOTE_URL="https://download.devel.redhat.com/brewroot/vol/rhel-10/packages/systemd/257/9.el10/src/systemd-257-9.el10.src.rpm" \
119119
INPUT_REPORT_FILE_PATH="https://docs.google.com/spreadsheets/d/1NPGmERBsSTdHjQK2vEocQ-PvQlRGGLMds02E_RGF8vY/export?format=csv" \
120-
FALSE_POSITIVES_URL="https://gitlab.cee.redhat.com/osh/known-false-positives/-/raw/master/systemd/ignore.err"
120+
NON_ISSUES_URL="https://gitlab.cee.redhat.com/osh/known-false-positives/-/raw/master/systemd/ignore.err"
121121
```
122122

123123
### 6. Step-by-Step Alternative

deploy/tekton/pipeline.yaml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ spec:
1313
- name: REPO_REMOTE_URL
1414
type: string
1515
description: "Source code URL (RPM package or Git repository URL)"
16-
- name: FALSE_POSITIVES_URL
16+
- name: NON_ISSUES_URL
1717
type: string
18-
description: "GitLab repository URL for known false positives (optional)"
18+
description: "GitLab repository URL for known non-issues (optional)"
1919
- name: INPUT_REPORT_FILE_PATH
2020
type: string
2121
description: "Google Spreadsheet URL or OSH report .html file path"
@@ -39,6 +39,10 @@ spec:
3939
- name: AGGREGATE_RESULTS_G_SHEET
4040
type: string
4141
default: ""
42+
- name: USE_KNOWN_NON_ISSUES_FILE
43+
type: string
44+
description: "Whether to use known non-issues file for filtering (true/false)"
45+
default: "true"
4246
- name: GDRIVE_FOLDER_ID
4347
type: string
4448
description: "Google Drive folder ID for uploading SAST results (optional)"
@@ -67,8 +71,8 @@ spec:
6771
value: "$(params.REPO_REMOTE_URL)"
6872
- name: INPUT_REPORT_FILE_PATH
6973
value: "$(params.INPUT_REPORT_FILE_PATH)"
70-
- name: FALSE_POSITIVES_URL
71-
value: "$(params.FALSE_POSITIVES_URL)"
74+
- name: NON_ISSUES_URL
75+
value: "$(params.NON_ISSUES_URL)"
7276
workspaces:
7377
- name: google-sa-json-ws
7478
workspace: google-sa-json-ws
@@ -91,8 +95,8 @@ spec:
9195
taskRef:
9296
name: fetch-false-positives
9397
params:
94-
- name: FALSE_POSITIVES_URL
95-
value: "$(params.FALSE_POSITIVES_URL)"
98+
- name: NON_ISSUES_URL
99+
value: "$(params.NON_ISSUES_URL)"
96100
workspaces:
97101
- name: false-positives-workspace
98102
workspace: shared-workspace
@@ -125,8 +129,8 @@ spec:
125129
value: "$(params.EMBEDDINGS_LLM_MODEL_NAME)"
126130
- name: AGGREGATE_RESULTS_G_SHEET
127131
value: "$(params.AGGREGATE_RESULTS_G_SHEET)"
128-
- name: USE_KNOWN_FALSE_POSITIVE_FILE
129-
value: "$(params.USE_KNOWN_FALSE_POSITIVE_FILE)"
132+
- name: USE_KNOWN_NON_ISSUES_FILE
133+
value: "$(params.USE_KNOWN_NON_ISSUES_FILE)"
130134
workspaces:
131135
- name: source-workspace
132136
workspace: shared-workspace

deploy/tekton/pipelinerun.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ spec:
1212
value: "PROJECT_VERSION_PLACEHOLDER"
1313
- name: REPO_REMOTE_URL
1414
value: "REPO_REMOTE_URL_PLACEHOLDER"
15-
- name: FALSE_POSITIVES_URL
16-
value: "FALSE_POSITIVES_URL_PLACEHOLDER"
15+
- name: NON_ISSUES_URL
16+
value: "NON_ISSUES_URL_PLACEHOLDER"
1717
- name: LLM_URL
1818
value: "LLM_URL_PLACEHOLDER"
1919
- name: LLM_MODEL_NAME

deploy/tekton/tasks/execute_sast_ai_workflow.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ spec:
3333
- name: EMBEDDINGS_LLM_MODEL_NAME
3434
type: string
3535
default: ""
36-
- name: USE_KNOWN_FALSE_POSITIVE_FILE
36+
- name: USE_KNOWN_NON_ISSUES_FILE
3737
type: string
38-
description: "Whether to use known false positive file for filtering (true/false)"
38+
description: "Whether to use known non-issues file for filtering (true/false)"
3939
default: "true"
4040
workspaces:
4141
- name: source-workspace
@@ -53,10 +53,10 @@ spec:
5353
value: "$(params.PROJECT_VERSION)"
5454
- name: REPO_LOCAL_PATH
5555
value: "$(params.REPO_LOCAL_PATH)"
56-
- name: KNOWN_FALSE_POSITIVE_FILE_PATH
56+
- name: KNOWN_NON_ISSUES_FILE_PATH
5757
value: "$(workspaces.false-positives-workspace.path)/ignore.err"
58-
- name: USE_KNOWN_FALSE_POSITIVE_FILE
59-
value: "$(params.USE_KNOWN_FALSE_POSITIVE_FILE)"
58+
- name: USE_KNOWN_NON_ISSUES_FILE
59+
value: "$(params.USE_KNOWN_NON_ISSUES_FILE)"
6060
- name: INPUT_REPORT_FILE_PATH
6161
value: "$(params.INPUT_REPORT_FILE_PATH)"
6262
- name: AGGREGATE_RESULTS_G_SHEET

deploy/tekton/tasks/fetch_false_positives.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ metadata:
44
name: fetch-false-positives
55
spec:
66
params:
7-
- name: FALSE_POSITIVES_URL
7+
- name: NON_ISSUES_URL
88
type: string
9-
description: "Optional GitLab URL containing known false positives"
9+
description: "Optional GitLab URL containing known non-issues"
1010
workspaces:
1111
- name: false-positives-workspace
1212
description: "Workspace to store the downloaded ignore.err"
@@ -19,7 +19,7 @@ spec:
1919
script: |
2020
#!/usr/bin/env sh
2121
set -euo pipefail
22-
FP_URL="$(params.FALSE_POSITIVES_URL)"
22+
FP_URL="$(params.NON_ISSUES_URL)"
2323
if [ -z "$FP_URL" ]; then
2424
echo "No falsePositivesUrl provided; skipping fetch..."
2525
exit 0
@@ -31,11 +31,11 @@ spec:
3131
GITLAB_TOKEN=$(cat "$TOKEN_FILE")
3232
echo "GitLab token found. Fetching file with authentication..."
3333
curl --retry 3 --retry-delay 5 -k -H "PRIVATE-TOKEN: $GITLAB_TOKEN" -fL "$FP_URL" -o "$(workspaces.false-positives-workspace.path)/ignore.err" \
34-
|| (echo "Error: Could not fetch false positives file with token." && exit 1)
34+
|| (echo "Error: Could not fetch non-issues file with token." && exit 1)
3535
else
3636
echo "No GitLab token file found; attempting unauthenticated fetch..."
3737
curl --retry 3 --retry-delay 5 -k -fL "$FP_URL" -o "$(workspaces.false-positives-workspace.path)/ignore.err" \
38-
|| (echo "Error: Could not fetch false positives file unauthenticated." && exit 1)
38+
|| (echo "Error: Could not fetch non-issues file unauthenticated." && exit 1)
3939
fi
4040
if [ -f "$(workspaces.false-positives-workspace.path)/ignore.err" ]; then
4141
echo "Success"

deploy/tekton/tasks/validate_urls.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ spec:
1010
- name: INPUT_REPORT_FILE_PATH
1111
type: string
1212
description: "Google Spreadsheet URL or OSH report .html file path"
13-
- name: FALSE_POSITIVES_URL
13+
- name: NON_ISSUES_URL
1414
type: string
15-
description: "GitLab URL containing known false positive issues"
15+
description: "GitLab URL containing known non-issue cases"
1616
workspaces:
1717
- name: google-sa-json-ws
1818
description: "Workspace containing Google service account key file"
@@ -29,14 +29,14 @@ spec:
2929
image: curlimages/curl:latest
3030
script: |
3131
#!/usr/bin/env sh
32-
FP_URL="$(params.FALSE_POSITIVES_URL)"
33-
echo "Validating false positives URL: $FP_URL"
32+
FP_URL="$(params.NON_ISSUES_URL)"
33+
echo "Validating non-issues URL: $FP_URL"
3434
# If blank, skip. If not, ensure it's valid
3535
if [ -z "$FP_URL" ]; then
36-
echo "No false positives URL provided; skipping validation."
36+
echo "No non-issues URL provided; skipping validation."
3737
exit 0
3838
fi
39-
curl -ksSfL "$FP_URL" || (echo "Error: False positives URL is invalid" && exit 1)
39+
curl -ksSfL "$FP_URL" || (echo "Error: Non-issues URL is invalid" && exit 1)
4040
4141
- name: validate-report-file
4242
image: 'python:3.11-slim'

docs/setup.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ podman run -d --name sast-ai-app \
114114
-e EMBEDDINGS_API_KEY=<your_key> \
115115
-e EMBEDDINGS_LLM_MODEL_NAME=<<embeddings-llm-model-name>> \
116116
-e INPUT_REPORT_FILE_PATH=https://docs.google.com/spreadsheets/d/<sheet-id> \
117-
-e KNOWN_FALSE_POSITIVE_FILE_PATH=/path/to/ignore.err \
117+
-e KNOWN_NON_ISSUES_FILE_PATH=/path/to/ignore.err \
118118
-e OUTPUT_FILE_PATH=https://docs.google.com/spreadsheets/d/<sheet-id> \
119119
quay.io/ecosystem-appeng/sast-ai-workflow:latest
120120
```
121121
Replace <your_key> with the actual LLM API key.
122122
123123
> **Note:**
124-
> Make sure the file paths required by the application (e.g., the HTML report, known false positives, etc.) point to the correct locations inside the container. For instance, if these files are copied into `/app`, update your configuration to reference `/app/<filename>` rather than the host paths.
124+
> Make sure the file paths required by the application (e.g., the HTML report, known non-issues, etc.) point to the correct locations inside the container. For instance, if these files are copied into `/app`, update your configuration to reference `/app/<filename>` rather than the host paths.
125125
>
126126
> If you ever need to run an interactive shell in your container (overriding the default entrypoint), use:
127127
>
@@ -146,7 +146,7 @@ environment variables.
146146
| EMBEDDINGS_LLM_URL | http://\<<please-set-embedding-llm-url\>> || https://integrate.api.nvidia.com/v1 | URL of the embedding model endpoint. |
147147
| EMBEDDINGS_LLM_MODEL_NAME | \<<please-set-embeddings-llm-model-name\>>|| all-mpnet-base-v2 | Model used for generating embeddings. |
148148
| INPUT_REPORT_FILE_PATH | /path/to/report.html || /path/to/report.html or https://docs.google.com/spreadsheets/d/\<sheet-id\> | Path to the SAST HTML report or URL of a Google Sheet containing the report. |
149-
| KNOWN_FALSE_POSITIVE_FILE_PATH | /path/to/ignore.err || /path/to/ignore.err | Path to the file containing known false positives data. |
149+
| KNOWN_NON_ISSUES_FILE_PATH | /path/to/ignore.err || /path/to/ignore.err | Path to the file containing known non-issues data. |
150150
| OUTPUT_FILE_PATH | /path/to/output_excel.xlsx || /path/to/output.xlsx | Path where the generated Excel report will be saved. |
151151
| LIBCLANG_PATH | /path/to/libclang || /usr/lib/llvm-12/lib/libclang.so | Path of to your libclang location. |
152152
| COMPILE_COMMANDS_JSON_PATH | /path/to/compile_commands.json | | /path/to/compile_commands.json | Path to the generated `compile_commands.json` file for the analyzed project. Required only for C projects. |
@@ -155,7 +155,7 @@ environment variables.
155155
| CHUNK_SEPARATORS | ["\n\n", "\n", ".", ";", ",", " ", ""] | | ["\n\n", "\n", ".", ";"] | Ordered list of separators to use when splitting text into chunks. |
156156
| CONFIG_H_PATH | /path/to/config.h | | /path/to/config.h | *(Optional)* Path to the generated `config.h` containing macro definitions. Used for accurate Clang parsing, but not strictly required. |
157157
| SERVICE_ACCOUNT_JSON_PATH | "" | | /path/to/sheet-access-bot-abc123.json | Path to the JSON file for the Google service account used to access Google Sheets. Mandatory only if using a Google Sheet as input. |
158-
| USE_KNOWN_FALSE_POSITIVE_FILE | true | | true | Flag indicating whether to use the known false positives file in the pipeline as an input. |
158+
| USE_KNOWN_NON_ISSUES_FILE | true | | true | Flag indicating whether to use the known non-issues file in the pipeline as an input. |
159159
| SIMILARITY_ERROR_THRESHOLD | 2 | | 3 | Number of Documents to return from known issues DB. |
160160
| MAX_ANALYSIS_ITERATIONS | 2 | | 3 | Maximum number of analysis loops allowed for any single issue. |
161161
| WRITE_RESULTS_INCLUDE_NON_FINAL | true | | true | Whether to include issues with is_final="FALSE" |

0 commit comments

Comments
 (0)