Skip to content

Commit ab3c135

Browse files
mariusarvintedxoigmnmzweilin
authored
Release 2025.10 (#15)
# 🎉 Major Updates - Added support for automated C and Rust (using `cargo test`) test instrumentation and execution. - Added `ideas.repair` LLM-based Rust code repair module using `cargo test` execution feedback. - Added initial support for [AFL](https://en.wikipedia.org/wiki/American_Fuzzy_Lop_(software))-based fuzzing. - Added `ideas.translate` support for ingesting CMake `compile_commands.json` and parallelized C translation unit. --------- Co-authored-by: Cory Cornelius <[email protected]> Co-authored-by: Weilin Xu <[email protected]>
1 parent 295f23e commit ab3c135

30 files changed

+1285
-704
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Ignore examples
2+
examples/
3+
14
.DS_Store
25

36
# Byte-compiled / optimized / DLL files

.pre-commit-config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
default_language_version:
22
python: python3.11
33

4+
exclude: 'examples|lib|test/fixtures/.*\.json'
5+
46
repos:
57
- repo: https://github.com/pre-commit/pre-commit-hooks
68
rev: v5.0.0

IDEAS.mk

Lines changed: 187 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -6,96 +6,189 @@
66

77
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
88
MAKEFILE_DIR := $(realpath $(dir $(MAKEFILE_PATH)))
9+
PIPELINE_DIR := ${MAKEFILE_DIR}/lib/pipeline_automation
10+
PIPELINE_TAG := ideas/$(shell git rev-list -1 HEAD -- ${PIPELINE_DIR})
911
CARGO_TOML_CMAKE := ${MAKEFILE_DIR}/cargo_toml.cmake
12+
IDEAS_MAKEFILE := $(MAKEFILE_DIR)/IDEAS.mk
1013

1114
PROVIDER ?= hosted_vllm
1215
MODEL ?= Qwen/Qwen3-Coder-30B-A3B-Instruct
1316
PORT ?= 8000
1417
BASE_URL ?= http://localhost:${PORT}/v1
1518
TRANSLATION_DIR ?= translation.$(shell git --git-dir=${MAKEFILE_DIR}/.git rev-parse HEAD)
16-
TRANSLATE_ARGS ?= algorithm.preproc_strategy=clang generate.max_new_tokens=10000
19+
ifeq (${PROVIDER},hosted_vllm)
20+
TRANSLATE_ARGS = model.base_url=${BASE_URL}
21+
REPAIR_ARGS = model.base_url=${BASE_URL}
22+
endif
1723
RUSTFLAGS ?= -Awarnings## Ignore Rust compiler warnings
1824
CFLAGS ?= -w## Ignore C compiler warnings
1925
VERBOSE ?= 0## This is for verbose output in IDEAS.mk
2026

21-
PROJECT_C_FILES := $(wildcard project/src/*.c)
22-
C_FILES := $(notdir ${PROJECT_C_FILES})
23-
RUST_FILES := $(patsubst %.c,%.rs,${C_FILES})
24-
TEST_FILES := $(wildcard test_cases/*.json)
27+
PROJECT_C_FILES = $(shell jq -r 'map(.file) | .[] | @text' test_case/build/compile_commands.json)
28+
C_FILES = $(subst ${CURDIR}/test_case/,,${PROJECT_C_FILES})
29+
TEST_FILES := $(wildcard test_vectors/*.json)
2530
TEST_TIMEOUT ?= 5
2631

27-
# This makefile assumes CURDIR is an example
28-
ifeq (${PROJECT_C_FILES},)
29-
$(error ${CURDIR} does not contain project/src/*.c files! Use -C to specify example directory)
30-
endif
32+
AFL_TAG = aflplusplus/aflplusplus:stable
33+
FUZZING_TIMEOUT ?= 60
34+
# Makefile does not like colons in filenames.
35+
FUZZING_TEST_VECTORS := $(subst :,\:, $(wildcard afl/out/default/queue/*))
36+
37+
38+
.PHONY: FORCE
39+
FORCE:
3140

3241

3342
# project
34-
project/translate.log: ;
43+
test_case/translate.log: test_case/build/compile_commands.json
44+
@$(MAKE) --no-print-directory -f ${IDEAS_MAKEFILE} $(addprefix test_case/,$(addsuffix .i,${C_FILES}))
45+
@touch $@
46+
47+
.PRECIOUS: test_case/build/%
48+
test_case/build/%: test_case/build/CMakeCache.txt ;
49+
50+
.PRECIOUS: test_case/build/CMakeCache.txt
51+
test_case/build/CMakeCache.txt: test_case/CMakeLists.txt ${CARGO_TOML_CMAKE}
52+
@rm -rf test_case/build
53+
cmake -S test_case -B test_case/build --log-level=ERROR -DCMAKE_PROJECT_INCLUDE=${CARGO_TOML_CMAKE} -DCMAKE_C_FLAGS="${CFLAGS}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
54+
55+
.PRECIOUS: test_case/build/compile_commands.json
56+
test_case/build/compile_commands.json: test_case/build/CMakeCache.txt ;
57+
58+
.PRECIOUS: test_case/build.log
59+
test_case/build.log: test_case/translate.log
60+
-cmake --build test_case/build --target all -- --no-print-directory ${CMAKE_BUILD_ARGS} 2> $@
61+
62+
.PRECIOUS: test_case/%.c.i
63+
test_case/%.c.i: test_case/build/compile_commands.json
64+
$(shell cat test_case/build/compile_commands.json | \
65+
jq -r '.[] | select(.file == "${CURDIR}/test_case/$*.c") | .command' | \
66+
sed -e 's/-o [^ ]*//g' | \
67+
xargs -I{} echo "{} -E -o $@")
68+
69+
# Add more tests from fuzzing. The procedure is
70+
# 1. Copy test input from the initial JSON test cases;
71+
# 2. Use afl-cmin to minimize the test corpus so they all have unique execution paths;
72+
# 3. Run afl-fuzz with the minimized seeds with a timeout=FUZZING_TIMEOUT;
73+
# 4. Collect the interesting test cases from the fuzzing output that provide unique execution paths as JSON files.
74+
.PHONY: add_test_cases
75+
add_test_cases: afl/executable afl/seeds afl/fuzzing.log
76+
77+
.PRECIOUS: afl/build/%
78+
afl/build/%: afl/build/CMakeCache.txt ;
79+
80+
# Use the same source file, but override CC and CXX with AFL's.
81+
# TODO: Use the modified source code in afl/ rather than in test_case/ to fuzz programs with different arguments.
82+
.PRECIOUS: afl/build/CMakeCache.txt
83+
afl/build/CMakeCache.txt: test_case/CMakeLists.txt
84+
@rm -rf afl/build
85+
-docker run \
86+
--user $(shell id -u):$(shell id -g) \
87+
-v ${CURDIR}:${CURDIR} \
88+
${AFL_TAG} \
89+
cmake -DCMAKE_C_COMPILER=afl-cc -DCMAKE_CXX_COMPILER=afl-c++ -S $(shell pwd)/test_case -B $(shell pwd)/afl/build --log-level=ERROR -DCMAKE_C_FLAGS="${CFLAGS}"
90+
91+
.PRECIOUS: afl/build.log
92+
afl/build.log: afl/build/CMakeCache.txt
93+
-docker run \
94+
--user $(shell id -u):$(shell id -g) \
95+
-v ${CURDIR}:${CURDIR} \
96+
${AFL_TAG} \
97+
cmake --build $(shell pwd)/afl/build --target all -- --no-print-directory 2> $@
98+
99+
.PRECIOUS: afl/executable
100+
afl/executable: afl/build.log
101+
-@cp afl/build/$(shell grep -E "CMAKE_PROJECT_NAME:STATIC=.*" afl/build/CMakeCache.txt | cut -f2 -d"=") $@
102+
103+
# Generate seeds for AFL from test cases by copying the input from all test cases.
104+
afl/test_input_orig/%: test_vectors/%.json
105+
@mkdir -p $(@D)
106+
@jq -r "(.in // []) | join(\"\n\")" $< > $@
107+
108+
# Minimize the seed corpus by keeping inputs that activate unique execution paths.
109+
afl/seeds: $(patsubst test_vectors/%.json, afl/test_input_orig/%, ${TEST_FILES})
110+
@echo "--- Starting with $(words $^) test cases ---"
111+
docker run \
112+
--user $(shell id -u):$(shell id -g) \
113+
-v ${CURDIR}:${CURDIR} \
114+
${AFL_TAG} \
115+
afl-cmin -i $(shell pwd)/$(dir $<) -o $(shell pwd)/$@ -- $(shell pwd)/afl/executable > /dev/null 2>&1
116+
117+
# Fuzzing, then recursively call make to extract test cases,
118+
# because we don't know the file names before fuzzing.
119+
afl/fuzzing.log: afl/seeds afl/executable
120+
@echo "--- Minimized to $(shell find $< -maxdepth 1 -type f | wc -l) test cases ---"
121+
-docker run \
122+
--user $(shell id -u):$(shell id -g) \
123+
-v ${CURDIR}:${CURDIR} \
124+
-e AFL_SKIP_CPUFREQ=1 \
125+
-e AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 \
126+
-e AFL_SHA1_FILENAMES=1 \
127+
${AFL_TAG} \
128+
timeout ${FUZZING_TIMEOUT} afl-fuzz -i $(shell pwd)/$< -o $(shell pwd)/$(@D)/out -- $(shell pwd)/afl/executable > /dev/null 2> $(shell pwd)/$@
129+
$(MAKE) -j1 -f $(IDEAS_MAKEFILE) extract
130+
131+
# Extract interesting test cases from fuzzing that increase test converage.
132+
.PHONY: extract
133+
extract: $(patsubst afl/out/default/queue/%,test_vectors/%.json,${FUZZING_TEST_VECTORS})
134+
@echo "--- Added $(words $^) test vectors in ---"
135+
@echo "--- $(CURDIR)/test_vectors/ ---"
136+
137+
# FIXME: This needs to be updated to the new JSON schemas
138+
test_vectors/%.json: afl/out/default/queue/%
139+
@mkdir -p $(@D)
140+
@./afl/executable < $< \
141+
| jq -n --rawfile input $< --rawfile output /dev/stdin "{args: [], in: (\$$input | rtrimstr(\"\n\") | split(\"\n\")), out: (\$$output | rtrimstr(\"\n\") | split(\"\n\")) }" > $@
142+
143+
144+
# c2rust
145+
# FIXME: We are using a hotfix to run `carge generate_lockfile` on the host.
146+
.PRECIOUS: c2rust/translate.log
147+
c2rust/translate.log: test_case/CMakeLists.txt
148+
-uv run --with-requirements ${PIPELINE_DIR}/requirements.txt \
149+
python ${PIPELINE_DIR}/c2rust/invoke_c2rust.py \
150+
--container-name ${PIPELINE_TAG}/c2rust \
151+
--stream-docker-output $(<D) $(@D) 2> c2rust.log
152+
mv c2rust.log $@
35153

36-
.PRECIOUS: project/build/%
37-
project/build/%: project/build/CMakeCache.txt ;
154+
# init
155+
.PHONY: init
156+
init: ${TRANSLATION_DIR}/Cargo.toml test_case/build/compile_commands.json
38157

39-
.PRECIOUS: project/build/CMakeCache.txt
40-
project/build/CMakeCache.txt: project/CMakeLists.txt ${CARGO_TOML_CMAKE}
41-
@rm -rf project/build
42-
cmake -S project -B project/build --log-level=ERROR -DCMAKE_PROJECT_INCLUDE=${CARGO_TOML_CMAKE} -DCMAKE_C_FLAGS="${CFLAGS}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
158+
@$(MAKE) --no-print-directory -f${IDEAS_MAKEFILE} $(addprefix ${TRANSLATION_DIR}/,$(patsubst %.c,%.rs,${C_FILES}))
43159

44-
project/build/compile_commands.json: project/build/CMakeCache.txt ;
160+
.PRECIOUS: ${TRANSLATION_DIR}/Cargo.toml
161+
${TRANSLATION_DIR}/Cargo.toml: test_case/build/Cargo.toml
162+
@mkdir -p $(@D)
163+
cp test_case/build/Cargo.toml $@
45164

46-
.PRECIOUS: project/build.log
47-
project/build.log: project/build/CMakeCache.txt
48-
-cmake --build project/build --target all -- --no-print-directory 2> $@
165+
${TRANSLATION_DIR}/%.rs:
166+
@mkdir -p $(@D)
167+
echo 'fn main() {\n println!("Hello, world!");\n}' > $@
49168

50-
.PRECIOUS: project/executable
51-
project/executable: project/build/CMakeCache.txt
52-
-@cp project/build/$(shell grep -E "CMAKE_PROJECT_NAME:STATIC=.*" project/build/CMakeCache.txt | cut -f2 -d"=") $@
53169

54170
# translate
55171
.PHONY: translate
56172
translate: ${TRANSLATION_DIR}/translate.log ;
57173

58-
ifeq (${TRANSLATION_DIR},project)
174+
ifeq (${TRANSLATION_DIR},test_case)
175+
else ifeq (${TRANSLATION_DIR},c2rust)
176+
else ifeq (${TRANSLATION_DIR},afl)
59177
else
60-
${TRANSLATION_DIR}/translate.log: ${TRANSLATION_DIR}/compile_commands.json \
61-
${TRANSLATION_DIR}/Cargo.toml \
62-
$(addprefix ${TRANSLATION_DIR}/src/,$(addsuffix .i,${C_FILES}))
63-
@mkdir -p $(@D)
64-
ifeq (${PROVIDER},hosted_vllm)
65-
-uv run python -m ideas model.name=${PROVIDER}/${MODEL} model.base_url=${BASE_URL} filename=${TRANSLATION_DIR} $(TRANSLATE_ARGS) 2> $@
66-
else
67-
-uv run python -m ideas model.name=${PROVIDER}/${MODEL} filename=${TRANSLATION_DIR} $(TRANSLATE_ARGS) 2> $@
178+
${TRANSLATION_DIR}/translate.log: test_case/build/compile_commands.json
179+
-uv run python -m ideas.translate model.name=${PROVIDER}/${MODEL} filename=test_case/build/compile_commands.json hydra.run.dir=${TRANSLATION_DIR} ${TRANSLATE_ARGS}
68180
endif
69-
endif
70-
.PRECIOUS: ${TRANSLATION_DIR}/compile_commands.json
71-
${TRANSLATION_DIR}/compile_commands.json: project/build/compile_commands.json
72-
@mkdir -p $(@D)
73-
@cp $^ $@
74-
75-
.PRECIOUS: ${TRANSLATION_DIR}/Cargo.toml
76-
${TRANSLATION_DIR}/Cargo.toml: project/build/Cargo.toml
77-
mkdir -p $(@D)
78-
sed -e "s/\.c\.rs/.rs/g" project/build/Cargo.toml > $@
79-
80-
.PRECIOUS: ${TRANSLATION_DIR}/src/%.c
81-
${TRANSLATION_DIR}/src/%.c: project/src/%.c
82-
mkdir -p $(@D)
83-
cp $^ $@
84-
85-
.PRECIOUS: ${TRANSLATION_DIR}/src/%.c.i
86-
${TRANSLATION_DIR}/src/%.c.i: ${TRANSLATION_DIR}/src/%.c project/build/CMakeFiles/TargetDirectories.txt
87-
cmake --build project/build --target src/$(@F)
88-
mkdir -p ${TRANSLATION_DIR}/src
89-
cp $(shell head -n1 project/build/CMakeFiles/TargetDirectories.txt)/src/$(@F) $@
90181

91182

92183
# build
93184
.PHONY: build
94185
build: ${TRANSLATION_DIR}/build.log ;
95186

96-
ifneq (${TRANSLATION_DIR},project)
187+
ifeq (${TRANSLATION_DIR},test_case)
188+
else ifeq (${TRANSLATION_DIR},afl)
189+
else
97190
.PRECIOUS: ${TRANSLATION_DIR}/build.log
98-
${TRANSLATION_DIR}/build.log: ${TRANSLATION_DIR}/translate.log
191+
${TRANSLATION_DIR}/build.log: ${TRANSLATION_DIR}/translate.log ${TRANSLATION_DIR}/Cargo.toml FORCE
99192
-export RUSTFLAGS=${RUSTFLAGS} && cargo build --quiet --manifest-path $(@D)/Cargo.toml 2> $@
100193
endif
101194

@@ -106,47 +199,52 @@ RED_COL := \033[1;31m
106199
GREEN_COL := \033[1;32m
107200

108201
.PHONY: test
109-
test: ${TRANSLATION_DIR}/test.log
110-
@grep -E "^(PASS|FAIL) " ${TRANSLATION_DIR}/test.log | cut -d" " -f1 | sort | uniq -c
111-
112-
.PRECIOUS: ${TRANSLATION_DIR}/test.log
113-
${TRANSLATION_DIR}/test.log: $(addprefix ${TRANSLATION_DIR}/,${TEST_FILES})
114-
@echo "# ${CURDIR}/${TRANSLATION_DIR}" > $@
115-
@echo "## ${GREY_COL}test_cases${RESET}" >> $@
116-
@for test in $(abspath $(addprefix ${TRANSLATION_DIR}/,${TEST_FILES})); \
117-
do \
118-
if [ $$(jq -e "(.out | if type == \"boolean\" then [] elif type==\"string\" then [.] else . end) == .ret" $$test) = "true" ] ; then \
119-
echo "PASS ${GREEN_COL}$$test${RESET}" >> $@ ; \
120-
else \
121-
echo "FAIL ${RED_COL}$$test${RESET}" >> $@; \
122-
if [ ${VERBOSE} -ne 0 ] ; then jq -cM "." $$test >> $@ ; fi ; \
123-
fi ; \
124-
done
125-
126-
ifneq (${TRANSLATION_DIR},project)
127-
.PRECIOUS: ${TRANSLATION_DIR}/executable
128-
${TRANSLATION_DIR}/executable: ${TRANSLATION_DIR}/build.log
129-
-@cp $(shell find $(@D)/target/debug -maxdepth 1 -type f -executable | head -n1) $@
130-
endif
131-
132-
.PRECIOUS: ${TRANSLATION_DIR}/test_cases/%.json
133-
${TRANSLATION_DIR}/test_cases/%.json: ${TRANSLATION_DIR}/executable test_cases/%.json
202+
test: ${TRANSLATION_DIR}/cargo_test.log ;
203+
204+
.PRECIOUS: ${TRANSLATION_DIR}/unsafety.json
205+
${TRANSLATION_DIR}/unsafety.json: ${TRANSLATION_DIR}/build.log
206+
uv run --with-requirements ${PIPELINE_DIR}/requirements.txt \
207+
python ${PIPELINE_DIR}/evaluate_unsafe_usage/invoke_unsafety.py \
208+
--container-name ${PIPELINE_TAG}/unsafety \
209+
$(<D) $@
210+
211+
212+
.PRECIOUS: ${TRANSLATION_DIR}/idiomaticity.json
213+
${TRANSLATION_DIR}/idiomaticity.json: ${TRANSLATION_DIR}/build.log
214+
uv run --with-requirements ${PIPELINE_DIR}/requirements.txt \
215+
python ${PIPELINE_DIR}/idiomaticity/invoke_idiomaticity.py \
216+
--container-name ${PIPELINE_TAG}/idiomaticity \
217+
$(<D) $@
218+
219+
.PRECIOUS: ${TRANSLATION_DIR}/cargo_test.log
220+
${TRANSLATION_DIR}/cargo_test.log: ${TRANSLATION_DIR}/Cargo.toml \
221+
${TRANSLATION_DIR}/tests/test_cases.rs \
222+
${TRANSLATION_DIR}/build.log
223+
@if [ $$(stat -c %s ${TRANSLATION_DIR}/build.log) = 0 ]; then \
224+
cargo test --manifest-path ${TRANSLATION_DIR}/Cargo.toml --test test_cases > $@ ; \
225+
else \
226+
find test_vectors -name '*.json' -exec echo "test {} ... FAILED" \; > $@ ; \
227+
fi
228+
229+
.PRECIOUS: ${TRANSLATION_DIR}/tests/test_cases.rs
230+
${TRANSLATION_DIR}/tests/test_cases.rs: ${TEST_FILES}
134231
@mkdir -p $(@D)
135-
@jq -r "(.in // []) | join(\"\n\")" test_cases/$(@F) \
136-
| (timeout ${TEST_TIMEOUT} ${TRANSLATION_DIR}/executable $$(jq -r "(.args // []) | join(\"\\n\")" test_cases/$(@F))) 2>&1 \
137-
| jq --rawfile output /dev/stdin ".ret = (\$$output | rtrimstr(\"\n\") | split(\"\n\"))" test_cases/$(@F) > $@
232+
-uv run python -m ideas.convert_tests $^ | rustfmt > $@
138233

139-
.PRECIOUS: test_cases/%.json
140-
test_cases/%.json:
234+
.PRECIOUS: test_vectors/%.json
235+
test_vectors/%.json:
141236
$(error $@ not found)
142237

238+
# repair
239+
.PHONY: repair
240+
repair: ${TRANSLATION_DIR}/translate.log ${TRANSLATION_DIR}/Cargo.toml ${TRANSLATION_DIR}/tests/test_cases.rs
241+
-uv run python -m ideas.repair model.name=${PROVIDER}/${MODEL} cargo_toml=${TRANSLATION_DIR}/Cargo.toml ${REPAIR_ARGS}
143242

144243
# clean
145244
.PHONY: clean
146245
clean:
147-
rm -rf outputs
148-
rm -rf project/build project/translate.log project/build.log project/*.json project/executable project/test_cases project/test.log
149-
rm -rf test.log
150-
ifneq (${TRANSLATION_DIR},project)
246+
rm -rf test_case/build test_case/translate.log test_case/build.log test_case/*.json
247+
rm -rf c2rust
248+
ifneq (${TRANSLATION_DIR},test_case)
151249
rm -rf ${TRANSLATION_DIR}
152250
endif

0 commit comments

Comments
 (0)