Skip to content

Commit 8bfa689

Browse files
committed
✨ Add RegexSearchTool
1 parent 45139a4 commit 8bfa689

File tree

4 files changed

+395
-1
lines changed

4 files changed

+395
-1
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package ai.koog.agents.ext.tool.search
2+
3+
import ai.koog.agents.core.tools.Tool
4+
import ai.koog.agents.core.tools.annotations.LLMDescription
5+
import ai.koog.agents.ext.tool.file.model.FileSystemEntry
6+
import ai.koog.agents.ext.tool.file.model.buildFileSize
7+
import ai.koog.prompt.text.text
8+
import ai.koog.rag.base.files.DocumentProvider
9+
import ai.koog.rag.base.files.FileMetadata
10+
import ai.koog.rag.base.files.FileSystemProvider
11+
import ai.koog.rag.base.files.readText
12+
import ai.koog.rag.base.files.extendRangeByLines
13+
import ai.koog.rag.base.files.toPosition
14+
import kotlinx.coroutines.flow.Flow
15+
import kotlinx.coroutines.flow.drop
16+
import kotlinx.coroutines.flow.emitAll
17+
import kotlinx.coroutines.flow.flow
18+
import kotlinx.coroutines.flow.mapNotNull
19+
import kotlinx.coroutines.flow.take
20+
import kotlinx.coroutines.flow.toList
21+
import kotlinx.serialization.KSerializer
22+
import kotlinx.serialization.SerialName
23+
import kotlinx.serialization.Serializable
24+
25+
/**
26+
* Regular expression based content search tool.
27+
*
28+
* Use to find occurrences of a regex pattern across text files under a path.
29+
*/
30+
public class RegexSearchTool<Path>(
31+
private val fs: FileSystemProvider.ReadOnly<Path>,
32+
) : Tool<RegexSearchTool.Args, RegexSearchTool.Result>() {
33+
34+
override val name: String = "__search_contents_by_regex__"
35+
override val description: String = text {
36+
+"Executes a regular expression search on folder or file contents within the specified path."
37+
+"The tool returns structured results with file paths, line numbers, positions, and excerpts where the text was found."
38+
+"The tool will solely return search results and does not modify any files."
39+
}
40+
41+
@Serializable
42+
public data class Args(
43+
@param:LLMDescription("Absolute starting directory or file path.")
44+
val path: String,
45+
@param:LLMDescription("Regular expression pattern.")
46+
val regex: String,
47+
@param:LLMDescription("Maximum number of matching files to return (pagination).")
48+
val limit: Int = 25,
49+
@param:LLMDescription("Number of matching files to skip (pagination).")
50+
val skip: Int = 0,
51+
@SerialName("case_sensitive")
52+
@param:LLMDescription("If false, performs case-insensitive matching.")
53+
val caseSensitive: Boolean = false,
54+
)
55+
56+
@Serializable
57+
public data class Result(val entries: List<FileSystemEntry.File>, val original: String)
58+
59+
override val argsSerializer: KSerializer<Args> = Args.serializer()
60+
override val resultSerializer: KSerializer<Result> = Result.serializer()
61+
62+
override suspend fun execute(args: Args): Result {
63+
val path = fs.fromAbsolutePathString(args.path)
64+
val matches = search(path, args.regex, args.limit, args.skip, args.caseSensitive).toList()
65+
return Result(matches, original = args.regex)
66+
}
67+
68+
private suspend fun search(
69+
path: Path,
70+
pattern: String,
71+
limit: Int,
72+
skip: Int,
73+
caseSensitive: Boolean,
74+
linesAroundSnippet: Int = 2,
75+
): Flow<FileSystemEntry.File> {
76+
val options = mutableSetOf<RegexOption>()
77+
if (!caseSensitive) options.add(RegexOption.IGNORE_CASE)
78+
79+
return searchByRegex(
80+
fs = fs,
81+
start = path,
82+
regex = Regex(pattern, options)
83+
)
84+
.drop(skip)
85+
.take(limit)
86+
.mapNotNull { match ->
87+
val content = fs.readText(match.file)
88+
val snippets = match.ranges.map { range ->
89+
val extended = extendRangeByLines(content, range, linesAroundSnippet, linesAroundSnippet)
90+
FileSystemEntry.File.Content.Excerpt.Snippet(
91+
text = extended.substring(content),
92+
range = extended
93+
)
94+
}
95+
if (snippets.isEmpty()) return@mapNotNull null
96+
val metadata = fs.metadata(match.file) ?: return@mapNotNull null
97+
val contentType = fs.getFileContentType(match.file)
98+
FileSystemEntry.File(
99+
name = fs.name(match.file),
100+
extension = fs.extension(match.file),
101+
path = fs.toAbsolutePathString(match.file),
102+
hidden = metadata.hidden,
103+
size = buildFileSize(fs, match.file, contentType),
104+
contentType = contentType,
105+
content = FileSystemEntry.File.Content.Excerpt(snippets)
106+
)
107+
}
108+
}
109+
110+
/**
111+
* A match of one file and the ranges within it that matched a regex.
112+
*/
113+
private data class ContentMatch<Path>(val file: Path, val ranges: List<DocumentProvider.DocumentRange>)
114+
115+
/**
116+
* Recursively searches starting at [start] for text files whose contents match [regex].
117+
* Returns a flow of [ContentMatch] where each item corresponds to a file and its matched ranges.
118+
*/
119+
private fun <Path> searchByRegex(
120+
fs: FileSystemProvider.ReadOnly<Path>,
121+
start: Path,
122+
regex: Regex
123+
): Flow<ContentMatch<Path>> = flow {
124+
when (fs.metadata(start)?.type) {
125+
FileMetadata.FileType.File -> {
126+
try {
127+
if (fs.getFileContentType(start) != FileMetadata.FileContentType.Text) return@flow
128+
val content = fs.readText(start)
129+
val ranges = regex.findAll(content).map { mr ->
130+
val s = mr.range.first
131+
val e = mr.range.last + 1 // exclusive
132+
DocumentProvider.DocumentRange(s.toPosition(content), e.toPosition(content))
133+
}.toList()
134+
if (ranges.isNotEmpty()) emit(ContentMatch(start, ranges))
135+
} catch (e: kotlinx.coroutines.CancellationException) {
136+
throw e
137+
} catch (_: Throwable) {
138+
// ignore unreadable files
139+
}
140+
}
141+
FileMetadata.FileType.Directory -> {
142+
val children = try {
143+
fs.list(start)
144+
} catch (e: kotlinx.coroutines.CancellationException) {
145+
throw e
146+
} catch (_: Throwable) {
147+
emptyList()
148+
}
149+
for (child in children) emitAll(searchByRegex(fs, child, regex))
150+
}
151+
else -> { /* ignore */ }
152+
}
153+
}
154+
}
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
package ai.koog.agents.ext.tool.search
2+
3+
import ai.koog.agents.ext.tool.file.model.FileSystemEntry
4+
import ai.koog.rag.base.files.FileSystemProvider
5+
import ai.koog.rag.base.files.JVMFileSystemProvider
6+
import kotlinx.coroutines.runBlocking
7+
import java.nio.file.Files
8+
import java.nio.file.Path
9+
import kotlin.io.path.createDirectories
10+
import kotlin.io.path.writeText
11+
import kotlin.test.Test
12+
import kotlin.test.assertEquals
13+
import kotlin.test.assertTrue
14+
15+
class RegexSearchToolTest {
16+
private fun buildFsWithSampleProject(): Pair<FileSystemProvider.ReadOnly<Path>, Path> {
17+
val tempDir = Files.createTempDirectory("regex-search-tool-test").toAbsolutePath()
18+
val fs: FileSystemProvider.ReadOnly<Path> = JVMFileSystemProvider.ReadOnly
19+
20+
// project layout
21+
val srcMainKotlin = tempDir.resolve("src/main/kotlin").createDirectories()
22+
val srcMainJava = tempDir.resolve("src/main/java").createDirectories()
23+
val srcTestKotlin = tempDir.resolve("src/test/kotlin").createDirectories()
24+
val docs = tempDir.resolve("docs").createDirectories()
25+
26+
// kotlin files
27+
srcMainKotlin.resolve("File1.kt").writeText("fun main() { println(\"Hello, World!\") }")
28+
srcMainKotlin.resolve("File2.kt").writeText("class User(val name: String, val age: Int)")
29+
30+
// java files
31+
srcMainJava.resolve("File1.java").writeText("public class File1 { public static void main(String[] args) { System.out.println(\"Hello, Java!\"); } }")
32+
srcMainJava.resolve("File2.java").writeText("public class User { private String name; private int age; }")
33+
34+
// test files
35+
srcTestKotlin.resolve("Test1.kt").writeText("fun testFunction() { assertEquals(expected, actual) }")
36+
srcTestKotlin.resolve("TestUtils.kt").writeText("fun assertSomething() { assertTrue(condition) }")
37+
38+
// docs
39+
docs.resolve("readme.txt").writeText("This is a sample project with Kotlin and Java files.")
40+
docs.resolve("api.txt").writeText("API Documentation: Use the User class to create user instances.")
41+
docs.resolve("multiline.txt").writeText(
42+
"""line 0 This is a multiline string.
43+
|Line 1 has some content.
44+
|Line 2 has different content.
45+
|Line 3 ends with a number: 42
46+
|Line 4 starts with a number: 100 and continues.
47+
|Line 5 is also there.
48+
|Line 6 is the last line.
49+
|""".trimMargin()
50+
)
51+
52+
return fs to tempDir
53+
}
54+
55+
private fun String.norm(): String = replace('\\', '/')
56+
57+
private fun List<String>.containsSuffix(suffix: String): Boolean = any { it.norm().endsWith(suffix) }
58+
59+
private fun List<String>.containsAllSuffixes(vararg suffixes: String): Boolean = suffixes.all { containsSuffix(it) }
60+
61+
private fun List<String>.containsNoneOfSuffixes(vararg suffixes: String): Boolean = suffixes.none { containsSuffix(it) }
62+
63+
@Test
64+
fun testRegexSearchBasic() = runBlocking {
65+
val (fs, root) = buildFsWithSampleProject()
66+
val tool = RegexSearchTool(fs)
67+
68+
val result = tool.execute(
69+
RegexSearchTool.Args(
70+
path = fs.toAbsolutePathString(root),
71+
regex = "class\\s+User",
72+
limit = 10,
73+
skip = 0,
74+
caseSensitive = true
75+
)
76+
)
77+
78+
assertEquals("class\\s+User", result.original)
79+
val paths = result.entries.map { it.path }
80+
assertTrue(paths.containsAllSuffixes("/src/main/java/File2.java", "/src/main/kotlin/File2.kt"))
81+
}
82+
83+
@Test
84+
fun testRegexSearchLimit() = runBlocking {
85+
val (fs, root) = buildFsWithSampleProject()
86+
val tool = RegexSearchTool(fs)
87+
88+
val result = tool.execute(
89+
RegexSearchTool.Args(
90+
path = fs.toAbsolutePathString(root),
91+
regex = "class",
92+
limit = 1,
93+
skip = 0,
94+
caseSensitive = true
95+
)
96+
)
97+
98+
assertEquals(1, result.entries.size)
99+
}
100+
101+
@Test
102+
fun testRegexSearchNoResults() = runBlocking {
103+
val (fs, root) = buildFsWithSampleProject()
104+
val tool = RegexSearchTool(fs)
105+
106+
val result = tool.execute(
107+
RegexSearchTool.Args(
108+
path = fs.toAbsolutePathString(root),
109+
regex = "non-existent-pattern",
110+
limit = 10,
111+
skip = 0,
112+
caseSensitive = true
113+
)
114+
)
115+
116+
assertTrue(result.entries.isEmpty())
117+
}
118+
119+
@Test
120+
fun testRegexSearchCaseInsensitive() = runBlocking {
121+
val (fs, root) = buildFsWithSampleProject()
122+
val tool = RegexSearchTool(fs)
123+
124+
val result = tool.execute(
125+
RegexSearchTool.Args(
126+
path = fs.toAbsolutePathString(root),
127+
regex = "CLASS",
128+
limit = 10,
129+
skip = 0,
130+
caseSensitive = false
131+
)
132+
)
133+
134+
val paths = result.entries.map { it.path }
135+
assertTrue(paths.containsAllSuffixes(
136+
"/src/main/kotlin/File2.kt",
137+
"/src/main/java/File1.java",
138+
"/src/main/java/File2.java"
139+
))
140+
}
141+
142+
@Test
143+
fun testRegexSearchSpecificDirectory() = runBlocking {
144+
val (fs, root) = buildFsWithSampleProject()
145+
val tool = RegexSearchTool(fs)
146+
147+
val result = tool.execute(
148+
RegexSearchTool.Args(
149+
path = root.resolve("src/main/kotlin").toString(),
150+
regex = "fun|class",
151+
limit = 10,
152+
skip = 0,
153+
caseSensitive = true
154+
)
155+
)
156+
157+
val paths = result.entries.map { it.path }
158+
assertTrue(paths.containsAllSuffixes(
159+
"/src/main/kotlin/File1.kt",
160+
"/src/main/kotlin/File2.kt"
161+
))
162+
assertTrue(paths.containsNoneOfSuffixes(
163+
"/src/main/java/File1.java",
164+
"/src/main/java/File2.java"
165+
))
166+
}
167+
168+
@Test
169+
fun testRegexSearchComplexPattern() = runBlocking {
170+
val (fs, root) = buildFsWithSampleProject()
171+
val tool = RegexSearchTool(fs)
172+
173+
val result = tool.execute(
174+
RegexSearchTool.Args(
175+
path = fs.toAbsolutePathString(root),
176+
regex = "assert\\w+\\(",
177+
limit = 10,
178+
skip = 0,
179+
caseSensitive = true
180+
)
181+
)
182+
183+
val paths = result.entries.map { it.path }
184+
assertTrue(paths.containsAllSuffixes(
185+
"/src/test/kotlin/Test1.kt",
186+
"/src/test/kotlin/TestUtils.kt"
187+
))
188+
}
189+
190+
@Test
191+
fun testRegexSearchMultilineMatches() = runBlocking {
192+
val (fs, root) = buildFsWithSampleProject()
193+
val tool = RegexSearchTool(fs)
194+
195+
val result = tool.execute(
196+
RegexSearchTool.Args(
197+
path = root.resolve("docs").toString(),
198+
regex = "Line.*?\\d+.*?\\n.*?Line",
199+
limit = 10,
200+
skip = 0,
201+
caseSensitive = true
202+
)
203+
)
204+
205+
val entry = result.entries.firstOrNull { it.path.norm().endsWith("/docs/multiline.txt") }
206+
assertTrue(entry != null, "docs/multiline.txt should be in results")
207+
val content = entry.content
208+
assertTrue(content is FileSystemEntry.File.Content.Excerpt)
209+
val hasContext = content.snippets.any { snippet ->
210+
val t = snippet.text
211+
t.contains("Line 3 ends with a number: 42") && t.contains("Line 4 starts with a number: 100 and continues.")
212+
}
213+
assertTrue(hasContext)
214+
}
215+
}

examples/code-agent/step-04-add-subagent/src/main/kotlin/FindAgent.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import ai.koog.agents.ext.tool.file.ReadFileTool
88
import ai.koog.prompt.executor.clients.openai.OpenAIModels
99
import ai.koog.prompt.executor.llms.all.simpleOpenAIExecutor
1010
import ai.koog.rag.base.files.JVMFileSystemProvider
11+
import ai.koog.agents.ext.tool.search.RegexSearchTool
1112

1213
val findAgent = AIAgent(
1314
promptExecutor = simpleOpenAIExecutor(System.getenv("OPENAI_API_KEY")),
@@ -32,6 +33,7 @@ val findAgent = AIAgent(
3233
toolRegistry = ToolRegistry {
3334
tool(ListDirectoryTool(JVMFileSystemProvider.ReadOnly))
3435
tool(ReadFileTool(JVMFileSystemProvider.ReadOnly))
36+
tool(RegexSearchTool(JVMFileSystemProvider.ReadOnly))
3537
},
3638
maxIterations = 400
3739
) {

0 commit comments

Comments
 (0)