-
Notifications
You must be signed in to change notification settings - Fork 14
feat: draft file filtering [PS-105] #487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,11 +15,43 @@ import ( | |
| "gopkg.in/yaml.v3" | ||
| ) | ||
|
|
||
| type FileFilterStrategy interface { | ||
| FilterOut(path string) bool | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: I'd call it |
||
| } | ||
|
|
||
| type GlobFileFilter struct { | ||
| ignores *gitignore.GitIgnore | ||
| } | ||
|
|
||
| func NewGlobFileFilter(path string, ruleFiles []string, logger *zerolog.Logger) (FileFilterStrategy, error) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd call it |
||
| ff := FileFilter{ | ||
| path: path, | ||
| defaultRules: []string{"**/.git/**"}, | ||
| logger: logger, | ||
| max_threads: int64(runtime.NumCPU()), | ||
| } | ||
|
|
||
| rules, err := ff.GetRules(ruleFiles) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| return &GlobFileFilter{ignores: gitignore.CompileIgnoreLines(rules...)}, nil | ||
| } | ||
|
|
||
| func (ff *GlobFileFilter) FilterOut(path string) bool { | ||
| if ff.ignores == nil { | ||
| return false | ||
| } | ||
| return ff.ignores.MatchesPath(path) | ||
| } | ||
|
|
||
| type FileFilter struct { | ||
| path string | ||
| defaultRules []string | ||
| logger *zerolog.Logger | ||
| max_threads int64 | ||
| path string | ||
| defaultRules []string | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this may not be needed anymore, as it could be a strategy, right? |
||
| logger *zerolog.Logger | ||
| filterStrategies []FileFilterStrategy | ||
| max_threads int64 | ||
| } | ||
|
|
||
| type FileFilterOption func(*FileFilter) error | ||
|
|
@@ -35,6 +67,26 @@ func WithThreadNumber(maxThreadCount int) FileFilterOption { | |
| } | ||
| } | ||
|
|
||
| func WithFileFilterStrategies(strategies []FileFilterStrategy) FileFilterOption { | ||
| return func(filter *FileFilter) error { | ||
| filter.filterStrategies = append(filter.filterStrategies, strategies...) | ||
| return nil | ||
| } | ||
| } | ||
|
|
||
| func WithSecretsFileFilterStrategies(path string, ruleFiles []string, logger *zerolog.Logger) FileFilterOption { | ||
| globFilter, err := NewGlobFileFilter(path, ruleFiles, logger) | ||
| if err != nil { | ||
| return nil //TODO handle error | ||
|
|
||
| } | ||
|
|
||
| return func(filter *FileFilter) error { | ||
| filter.filterStrategies = append(filter.filterStrategies, globFilter) // TODO add other filters here such as null byte filter | ||
| return nil | ||
| } | ||
| } | ||
|
|
||
| func NewFileFilter(path string, logger *zerolog.Logger, options ...FileFilterOption) *FileFilter { | ||
| filter := &FileFilter{ | ||
| path: path, | ||
|
|
@@ -103,11 +155,9 @@ func (fw *FileFilter) GetRules(ruleFiles []string) ([]string, error) { | |
| } | ||
|
|
||
| // GetFilteredFiles returns a filtered channel of filepaths from a given channel of filespaths and glob patterns to filter on | ||
| func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan string { | ||
| func (fw *FileFilter) GetFilteredFiles(filesCh chan string) chan string { | ||
| var filteredFilesCh = make(chan string) | ||
|
|
||
| // create pattern matcher used to match filesToFilter to glob patterns | ||
| globPatternMatcher := gitignore.CompileIgnoreLines(globs...) | ||
| go func() { | ||
| ctx := context.Background() | ||
| availableThreads := semaphore.NewWeighted(fw.max_threads) | ||
|
|
@@ -122,8 +172,16 @@ func (fw *FileFilter) GetFilteredFiles(filesCh chan string, globs []string) chan | |
| } | ||
| go func(f string) { | ||
| defer availableThreads.Release(1) | ||
| // filesToFilter that do not match the glob pattern are filtered | ||
| if !globPatternMatcher.MatchesPath(f) { | ||
| // filesToFilter that do not match the filter list are excluded | ||
| keepFile := true | ||
| for _, filter := range fw.filterStrategies { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A thought: we could also have a |
||
| if filter.FilterOut(f) { | ||
| keepFile = false | ||
| break | ||
| } | ||
| } | ||
|
Comment on lines
+178
to
+182
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, exactly like I was thinking how it could/would work. |
||
|
|
||
| if keepFile { | ||
| filteredFilesCh <- f | ||
| } | ||
| }(file) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ package utils | |
|
|
||
| import ( | ||
| "fmt" | ||
| "github.com/stretchr/testify/require" | ||
| "os" | ||
| "path/filepath" | ||
| "runtime" | ||
|
|
@@ -133,14 +134,13 @@ func TestFileFilter_GetFilteredFiles(t *testing.T) { | |
| t.Run(testCase.name, func(t *testing.T) { | ||
| setupTestFileSystem(t, testCase) | ||
|
|
||
| fileFilter := NewFileFilter(testCase.repoPath, &log.Logger) | ||
| globFileFilter, err := NewGlobFileFilter(testCase.repoPath, testCase.ruleFiles, &log.Logger) | ||
| require.NoError(t, err) | ||
|
|
||
| fileFilter := NewFileFilter(testCase.repoPath, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter})) | ||
| files := fileFilter.GetAllFiles() | ||
|
|
||
| globs, err := fileFilter.GetRules(testCase.ruleFiles) | ||
| assert.NoError(t, err) | ||
|
|
||
| filteredFiles := fileFilter.GetFilteredFiles(files, globs) | ||
| filteredFiles := fileFilter.GetFilteredFiles(files) | ||
| expectedFilePaths := make([]string, 0) | ||
| for _, file := range testCase.expectedFiles { | ||
| expectedFilePaths = append(expectedFilePaths, filepath.Join(testCase.repoPath, file)) | ||
|
|
@@ -151,7 +151,7 @@ func TestFileFilter_GetFilteredFiles(t *testing.T) { | |
| } | ||
|
|
||
| t.Run("2nd call should return the same filesToFilter", func(t *testing.T) { | ||
| filteredFiles := fileFilter.GetFilteredFiles(files, globs) | ||
| filteredFiles := fileFilter.GetFilteredFiles(files) | ||
| for filteredFile := range filteredFiles { | ||
| assert.Contains(t, expectedFilePaths, filteredFile) | ||
| } | ||
|
|
@@ -184,12 +184,13 @@ func BenchmarkFileFilter_GetFilteredFiles(b *testing.B) { | |
|
|
||
| b.ResetTimer() | ||
| for n := 0; n < b.N; n++ { | ||
| fileFilter := NewFileFilter(rootDir, &log.Logger, WithThreadNumber(runtime.NumCPU())) | ||
| globFileFilter, err := NewGlobFileFilter(rootDir, ruleFiles, &log.Logger) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we should leave that test to ensure that clients that use the NewFileFilter functions right now have a smooth upgrade path. |
||
| assert.NoError(b, err) | ||
|
|
||
| fileFilter := NewFileFilter(rootDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter}), WithThreadNumber(runtime.NumCPU())) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should probably be a new test |
||
|
|
||
| b.StartTimer() | ||
| globs, err := fileFilter.GetRules(ruleFiles) | ||
| assert.NoError(b, err) | ||
| filteredFiles := fileFilter.GetFilteredFiles(fileFilter.GetAllFiles(), globs) | ||
| filteredFiles := fileFilter.GetFilteredFiles(fileFilter.GetAllFiles()) | ||
| b.StopTimer() | ||
|
|
||
| var actualFilteredFilesLen int | ||
|
|
@@ -472,7 +473,10 @@ func TestFileFilter_SlashPatternInGitIgnore(t *testing.T) { | |
| createFileInPath(t, gitignorePath, []byte("/")) | ||
|
|
||
| // Test file filtering | ||
| fileFilter := NewFileFilter(tempDir, &log.Logger) | ||
| globFileFilter, err := NewGlobFileFilter(tempDir, []string{".gitignore"}, &log.Logger) | ||
| assert.NoError(t, err) | ||
|
|
||
| fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter})) | ||
| rules, err := fileFilter.GetRules([]string{".gitignore"}) | ||
| assert.NoError(t, err) | ||
|
|
||
|
|
@@ -481,7 +485,7 @@ func TestFileFilter_SlashPatternInGitIgnore(t *testing.T) { | |
|
|
||
| // Get all files and filter them | ||
| allFiles := fileFilter.GetAllFiles() | ||
| filteredFiles := fileFilter.GetFilteredFiles(allFiles, rules) | ||
| filteredFiles := fileFilter.GetFilteredFiles(allFiles) | ||
|
|
||
| // Collect filtered files | ||
| var filteredFilesList []string | ||
|
|
@@ -517,13 +521,14 @@ func TestFileFilter_SlashPatternInGitIgnore(t *testing.T) { | |
| createFileInPath(t, gitignorePath, []byte("/*")) | ||
|
|
||
| // Test file filtering | ||
| fileFilter := NewFileFilter(tempDir, &log.Logger) | ||
| rules, err := fileFilter.GetRules([]string{".gitignore"}) | ||
| globFileFilter, err := NewGlobFileFilter(tempDir, []string{".gitignore"}, &log.Logger) | ||
| assert.NoError(t, err) | ||
|
|
||
| fileFilter := NewFileFilter(tempDir, &log.Logger, WithFileFilterStrategies([]FileFilterStrategy{globFileFilter})) | ||
|
|
||
| // Get all files and filter them | ||
| allFiles := fileFilter.GetAllFiles() | ||
| filteredFiles := fileFilter.GetFilteredFiles(allFiles, rules) | ||
| filteredFiles := fileFilter.GetFilteredFiles(allFiles) | ||
|
|
||
| // Collect filtered files | ||
| var filteredFilesList []string | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Potentially more go idiomatic different name (don't care much):
FilterableThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something to consider would be something like this: