Skip to content

Commit 03c62d9

Browse files
committed
Added some strong boundaries around examples_schema
this rule has been known to spin the fudge out at scale, it will chew up gigs of memory and chew up CPU spinning out like nuts. This only matters to platforms using vacuum programatically. This change puts in some configurable and tunable controls to stop this happening in the fiuture,
1 parent 2ae9ec1 commit 03c62d9

File tree

3 files changed

+212
-27
lines changed

3 files changed

+212
-27
lines changed

functions/openapi/examples_schema.go

Lines changed: 157 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
package openapi
55

66
import (
7+
"context"
78
"fmt"
89
"strings"
910
"sync"
11+
"sync/atomic"
12+
"time"
1013

1114
"github.com/daveshanley/vacuum/model"
1215
vacuumUtils "github.com/daveshanley/vacuum/utils"
@@ -17,7 +20,6 @@ import (
1720
"github.com/pb33f/libopenapi/datamodel/low"
1821
"github.com/pb33f/libopenapi/orderedmap"
1922
"github.com/pb33f/libopenapi/utils"
20-
"github.com/sourcegraph/conc"
2123
"go.yaml.in/yaml/v4"
2224
)
2325

@@ -38,27 +40,49 @@ func (es ExamplesSchema) GetCategory() string {
3840
var bannedErrors = []string{"if-then failed", "if-else failed", "allOf failed", "oneOf failed"}
3941

4042
// RunRule will execute the ComponentDescription rule, based on supplied context and a supplied []*yaml.Node slice.
41-
func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionContext) []model.RuleFunctionResult {
43+
func (es ExamplesSchema) RunRule(_ []*yaml.Node, ruleContext model.RuleFunctionContext) []model.RuleFunctionResult {
4244

4345
var results []model.RuleFunctionResult
4446

45-
if context.DrDocument == nil {
47+
if ruleContext.DrDocument == nil {
4648
return results
4749
}
4850

51+
// Get configuration values from context, use defaults if not set
52+
maxConcurrentValidations := ruleContext.MaxConcurrentValidations
53+
if maxConcurrentValidations <= 0 {
54+
maxConcurrentValidations = 10 // Default: 10 parallel validations
55+
}
56+
57+
validationTimeout := ruleContext.ValidationTimeout
58+
if validationTimeout <= 0 {
59+
validationTimeout = 10 * time.Second // Default: 10 seconds
60+
}
61+
62+
// Create a timeout context for the entire validation process
63+
ctx, cancel := context.WithTimeout(context.Background(), validationTimeout)
64+
defer cancel()
65+
66+
// Create semaphore for concurrency limiting
67+
sem := make(chan struct{}, maxConcurrentValidations)
68+
69+
// Track active workers
70+
var activeWorkers int32
71+
var completedWorkers int32
72+
4973
buildResult := func(message, path string, key, node *yaml.Node, component v3.AcceptsRuleResults) model.RuleFunctionResult {
5074
// Try to find all paths for this node if it's a schema
5175
var allPaths []string
5276
if schema, ok := component.(*v3.Schema); ok {
53-
_, allPaths = vacuumUtils.LocateSchemaPropertyPaths(context, schema, key, node)
77+
_, allPaths = vacuumUtils.LocateSchemaPropertyPaths(ruleContext, schema, key, node)
5478
}
5579

5680
result := model.RuleFunctionResult{
5781
Message: message,
5882
StartNode: key,
5983
EndNode: vacuumUtils.BuildEndNode(key),
6084
Path: path,
61-
Rule: context.Rule,
85+
Rule: ruleContext.Rule,
6286
}
6387

6488
// Set the Paths array if we found multiple locations
@@ -69,11 +93,58 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
6993
component.AddRuleFunctionResult(v3.ConvertRuleResult(&result))
7094
return result
7195
}
72-
wg := conc.WaitGroup{}
96+
7397
var expLock sync.Mutex
98+
var wg sync.WaitGroup
99+
100+
// Helper function to spawn workers with context and concurrency control
101+
spawnWorker := func(work func()) {
102+
// Check if context is already cancelled before spawning
103+
select {
104+
case <-ctx.Done():
105+
return
106+
default:
107+
}
108+
109+
atomic.AddInt32(&activeWorkers, 1)
110+
wg.Add(1)
111+
112+
go func() {
113+
defer wg.Done()
114+
defer atomic.AddInt32(&completedWorkers, 1)
115+
defer atomic.AddInt32(&activeWorkers, -1)
116+
117+
// Recover from panics to prevent crashes
118+
defer func() {
119+
if r := recover(); r != nil {
120+
// Log panic if logger available
121+
if ruleContext.Logger != nil {
122+
ruleContext.Logger.Error("ExamplesSchema validation panic", "error", r)
123+
}
124+
}
125+
}()
126+
127+
// Try to acquire semaphore with context
128+
select {
129+
case sem <- struct{}{}:
130+
defer func() { <-sem }()
131+
case <-ctx.Done():
132+
// Context cancelled while waiting for semaphore
133+
return
134+
}
135+
136+
// Check context again before starting work
137+
select {
138+
case <-ctx.Done():
139+
return
140+
default:
141+
work()
142+
}
143+
}()
144+
}
74145

75146
validator := schema_validation.NewSchemaValidator()
76-
version := context.Document.GetSpecInfo().VersionNumeric
147+
version := ruleContext.Document.GetSpecInfo().VersionNumeric
77148
validateSchema := func(iKey *int,
78149
sKey, label string,
79150
s *v3.Schema,
@@ -104,7 +175,7 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
104175
}
105176
for _, r := range validationErrors {
106177
for _, err := range r.SchemaValidationErrors {
107-
result := buildResult(vacuumUtils.SuppliedOrDefault(context.Rule.Message, err.Reason),
178+
result := buildResult(vacuumUtils.SuppliedOrDefault(ruleContext.Rule.Message, err.Reason),
108179
path, keyNode, node, s)
109180

110181
banned := false
@@ -124,18 +195,31 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
124195
return rx
125196
}
126197

127-
if context.DrDocument != nil && context.DrDocument.Schemas != nil {
128-
for i := range context.DrDocument.Schemas {
129-
s := context.DrDocument.Schemas[i]
130-
wg.Go(func() {
198+
if ruleContext.DrDocument != nil && ruleContext.DrDocument.Schemas != nil {
199+
for i := range ruleContext.DrDocument.Schemas {
200+
s := ruleContext.DrDocument.Schemas[i]
201+
spawnWorker(func() {
202+
// Check context at start of work
203+
select {
204+
case <-ctx.Done():
205+
return
206+
default:
207+
}
208+
131209
if s.Value.Examples != nil {
132210
for x, ex := range s.Value.Examples {
211+
// Check context in loop
212+
select {
213+
case <-ctx.Done():
214+
return
215+
default:
216+
}
133217

134218
isRef, _, _ := utils.IsNodeRefValue(ex)
135219
if isRef {
136220
// extract node
137221
fNode, _, _, _ := low.LocateRefNodeWithContext(s.Value.ParentProxy.GoLow().GetContext(),
138-
ex, context.Index)
222+
ex, ruleContext.Index)
139223
if fNode != nil {
140224
ex = fNode
141225
} else {
@@ -164,7 +248,7 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
164248
if isRef {
165249
// extract node
166250
fNode, _, _, _ := low.LocateRefNodeWithContext(s.Value.ParentProxy.GoLow().GetContext(),
167-
s.Value.Example, context.Index)
251+
s.Value.Example, ruleContext.Index)
168252
if fNode != nil {
169253
ref = fNode
170254
}
@@ -221,10 +305,17 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
221305
return rx
222306
}
223307

224-
if context.DrDocument != nil && context.DrDocument.Parameters != nil {
225-
for i := range context.DrDocument.Parameters {
226-
p := context.DrDocument.Parameters[i]
227-
wg.Go(func() {
308+
if ruleContext.DrDocument != nil && ruleContext.DrDocument.Parameters != nil {
309+
for i := range ruleContext.DrDocument.Parameters {
310+
p := ruleContext.DrDocument.Parameters[i]
311+
spawnWorker(func() {
312+
// Check context at start of work
313+
select {
314+
case <-ctx.Done():
315+
return
316+
default:
317+
}
318+
228319
if p.Value.Examples.Len() >= 1 && p.SchemaProxy != nil {
229320
expLock.Lock()
230321
if p.Value.Examples != nil && p.Value.Examples.Len() > 0 {
@@ -245,10 +336,17 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
245336
}
246337
}
247338

248-
if context.DrDocument != nil && context.DrDocument.Headers != nil {
249-
for i := range context.DrDocument.Headers {
250-
h := context.DrDocument.Headers[i]
251-
wg.Go(func() {
339+
if ruleContext.DrDocument != nil && ruleContext.DrDocument.Headers != nil {
340+
for i := range ruleContext.DrDocument.Headers {
341+
h := ruleContext.DrDocument.Headers[i]
342+
spawnWorker(func() {
343+
// Check context at start of work
344+
select {
345+
case <-ctx.Done():
346+
return
347+
default:
348+
}
349+
252350
if h.Value.Examples.Len() >= 1 && h.Schema != nil {
253351
expLock.Lock()
254352
results = append(results, parseExamples(h.Schema.Schema, h, h.Value.Examples)...)
@@ -265,11 +363,18 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
265363
}
266364
}
267365

268-
if context.DrDocument != nil && context.DrDocument.MediaTypes != nil {
366+
if ruleContext.DrDocument != nil && ruleContext.DrDocument.MediaTypes != nil {
367+
368+
for i := range ruleContext.DrDocument.MediaTypes {
369+
mt := ruleContext.DrDocument.MediaTypes[i]
370+
spawnWorker(func() {
371+
// Check context at start of work
372+
select {
373+
case <-ctx.Done():
374+
return
375+
default:
376+
}
269377

270-
for i := range context.DrDocument.MediaTypes {
271-
mt := context.DrDocument.MediaTypes[i]
272-
wg.Go(func() {
273378
if mt.Value.Examples.Len() >= 1 && mt.SchemaProxy != nil {
274379
expLock.Lock()
275380
results = append(results, parseExamples(mt.SchemaProxy.Schema, mt, mt.Value.Examples)...)
@@ -286,7 +391,32 @@ func (es ExamplesSchema) RunRule(_ []*yaml.Node, context model.RuleFunctionConte
286391
}
287392

288393
}
289-
wg.Wait()
394+
395+
// Wait for all workers to complete or context to timeout
396+
done := make(chan struct{})
397+
go func() {
398+
wg.Wait()
399+
close(done)
400+
}()
401+
402+
select {
403+
case <-done:
404+
// All workers completed normally
405+
if ruleContext.Logger != nil && atomic.LoadInt32(&completedWorkers) > 0 {
406+
ruleContext.Logger.Debug("ExamplesSchema completed validations",
407+
"completed", atomic.LoadInt32(&completedWorkers))
408+
}
409+
case <-ctx.Done():
410+
// Timeout occurred - return whatever results we have
411+
if ruleContext.Logger != nil {
412+
ruleContext.Logger.Warn("ExamplesSchema validation timeout",
413+
"timeout", validationTimeout,
414+
"active", atomic.LoadInt32(&activeWorkers),
415+
"completed", atomic.LoadInt32(&completedWorkers),
416+
"results", len(results))
417+
}
418+
}
419+
290420
return results
291421
}
292422

functions/openapi/examples_schema_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package openapi
66
import (
77
"fmt"
88
"testing"
9+
"time"
910

1011
"github.com/daveshanley/vacuum/model"
1112
drModel "github.com/pb33f/doctor/model"
@@ -772,3 +773,49 @@ components:
772773
assert.Contains(t, res[0].Message, "subschemas 0, 1 matched")
773774
assert.Equal(t, "$.components.schemas['Test'].example", res[0].Path)
774775
}
776+
777+
func TestExamplesSchema_CustomConfiguration(t *testing.T) {
778+
// This test demonstrates how to use the new MaxConcurrentValidations and ValidationTimeout
779+
// configuration fields in RuleFunctionContext for programmatic control
780+
yml := `openapi: 3.1
781+
components:
782+
schemas:
783+
Product:
784+
type: object
785+
properties:
786+
name:
787+
type: string
788+
price:
789+
type: number
790+
example:
791+
name: "Widget"
792+
price: 19.99`
793+
794+
document, err := libopenapi.NewDocument([]byte(yml))
795+
if err != nil {
796+
panic(fmt.Sprintf("cannot create new document: %e", err))
797+
}
798+
799+
m, _ := document.BuildV3Model()
800+
path := "$"
801+
802+
drDocument := drModel.NewDrDocument(m)
803+
804+
rule := buildOpenApiTestRuleAction(path, "examples_schema", "", nil)
805+
ctx := buildOpenApiTestContext(model.CastToRuleAction(rule.Then), nil)
806+
807+
ctx.Document = document
808+
ctx.DrDocument = drDocument
809+
ctx.Rule = &rule
810+
811+
// Configure custom timeout and concurrency limits
812+
// These values can be set programmatically to control resource usage
813+
ctx.MaxConcurrentValidations = 5 // Limit to 5 concurrent validations (default: 10)
814+
ctx.ValidationTimeout = 5 * time.Second // 5 second timeout (default: 10 seconds)
815+
816+
def := ExamplesSchema{}
817+
res := def.RunRule(nil, ctx)
818+
819+
// Valid example should pass
820+
assert.Len(t, res, 0)
821+
}

model/rules.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ type RuleFunctionContext struct {
5959
DrDocument *model.DrDocument `json:"-" yaml:"-"` // A high level, more powerful representation of the document being parsed. Powered by the doctor.
6060
Logger *slog.Logger `json:"-" yaml:"-"` // Custom logger
6161

62+
// MaxConcurrentValidations controls the maximum number of parallel validations for functions that support
63+
// concurrency limiting (e.g., oasExampleSchema). Default is 10 if not set or 0.
64+
MaxConcurrentValidations int `json:"-" yaml:"-"`
65+
66+
// ValidationTimeout controls the maximum time allowed for validation functions that support timeouts
67+
// (e.g., oasExampleSchema). Default is 10 seconds if not set or 0.
68+
ValidationTimeout time.Duration `json:"-" yaml:"-"`
69+
6270
// optionsCache caches the converted options map to avoid repeated interface conversions
6371
optionsCache map[string]string `json:"-" yaml:"-"`
6472
}

0 commit comments

Comments
 (0)