Skip to content

Commit 711a1ea

Browse files
authored
Improve gibberish detection (#822)
1 parent 0777080 commit 711a1ea

File tree

3 files changed

+42
-2
lines changed

3 files changed

+42
-2
lines changed

pkg/internal/transform/route/clusterurl/cluster.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,26 @@ func (csf *ClusterURLClassifier) okWord(w string) bool {
162162
if ok {
163163
return ok
164164
}
165-
if gibberish.IsGibberish(w, csf.classifier) {
166-
return false
165+
166+
start := 0
167+
for i, c := range w {
168+
if c == '-' || c == '_' || c == '.' {
169+
if i == start {
170+
return false
171+
}
172+
173+
if gibberish.IsGibberish(w[start:i], csf.classifier) {
174+
return false
175+
}
176+
177+
start = i + 1
178+
}
179+
}
180+
181+
if start < len(w) {
182+
if gibberish.IsGibberish(w[start:], csf.classifier) {
183+
return false
184+
}
167185
}
168186

169187
csf.cache.Add(w, true)

pkg/internal/transform/route/clusterurl/cluster_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ import (
1313
func TestClusterURL(t *testing.T) {
1414
csf, err := NewClusterURLClassifier(DefaultConfig())
1515
assert.NoError(t, err)
16+
assert.Equal(t, "*", csf.ClusterURL("registry-apjkmyp"))
1617
assert.Empty(t, csf.ClusterURL(""))
18+
assert.Equal(t, "*", csf.ClusterURL("apjkmyp"))
1719
assert.Equal(t, "/users/*/j4elk/*/job/*", csf.ClusterURL("/users/fdklsd/j4elk/23993/job/2"))
1820
assert.Equal(t, "*", csf.ClusterURL("123"))
1921
assert.Equal(t, "/*", csf.ClusterURL("/123"))
@@ -51,6 +53,10 @@ func TestClusterURL(t *testing.T) {
5153
assert.Equal(t, "HTTP GET", csf.ClusterURL("HTTP GET"))
5254
assert.Equal(t, "GET /api/cart", csf.ClusterURL("GET /api/cart?sessionId=55f4e5ea-5d6d-482a-80c4-799e3c72dfb0&currencyCode=USD"))
5355
assert.Equal(t, "/getquote", csf.ClusterURL("/getquote"))
56+
assert.Equal(t, "PUT /bar/test/test/*/files/*/test/*", csf.ClusterURL("PUT /bar/test/test/bar-attach-generic-product-apjkmyp/files/multi-test-version-jwbCm/test/some-file.txt"))
57+
assert.Equal(t, "PUT /test/bar/test/test/*/files/*/test/*", csf.ClusterURL("PUT /test/bar/test/test/bar-attach-generic-registry-apjkmyp/files/push-metrics-test-OYboK/test/README.md"))
58+
assert.Equal(t, "PUT /test/bar/test_plus/test.now/*/files/*/test/*", csf.ClusterURL("PUT /test/bar/test_plus/test.now/bar-attach-generic-registry-apjkmyp/files/push-metrics-test-OYboK/test/README.md"))
59+
assert.Equal(t, "PUT /bar/test/test/*/files/*/test/*", csf.ClusterURL("PUT /bar/test/test/a----/files/-a-a-a--/test/some-file.txt"))
5460
assert.Equal(t, "", csf.ClusterURL("?"))
5561
assert.Equal(t, "*", csf.ClusterURL("attach12?"))
5662
assert.Equal(t, "*", csf.ClusterURL("1?"))

test/integration/red_test_python_aws_s3.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/mariomac/guara/pkg/test"
1717
"github.com/stretchr/testify/require"
1818

19+
ti "go.opentelemetry.io/obi/pkg/test/integration"
1920
"go.opentelemetry.io/obi/test/integration/components/jaeger"
2021
)
2122

@@ -35,6 +36,21 @@ func testPythonAWSS3(t *testing.T) {
3536
waitForTestComponentsNoMetrics(t, address+"/health")
3637
waitForTestComponentsNoMetrics(t, localstackAddress)
3738

39+
// Wait for /health to appear in jaeger
40+
test.Eventually(t, testTimeout, func(t require.TestingT) {
41+
ti.DoHTTPGet(t, "http://localhost:8381/health", 200)
42+
resp, err := http.Get(jaegerQueryURL + "?service=python3.12&operation=GET%20%2Fhealth")
43+
require.NoError(t, err)
44+
if resp == nil {
45+
return
46+
}
47+
require.Equal(t, http.StatusOK, resp.StatusCode)
48+
var tq jaeger.TracesQuery
49+
require.NoError(t, json.NewDecoder(resp.Body).Decode(&tq))
50+
traces := tq.FindBySpan(jaeger.Tag{Key: "url.path", Type: "string", Value: "/health"})
51+
require.Len(t, traces, 1)
52+
}, test.Interval(1*time.Second))
53+
3854
s3Req(t, address+"/createbucket")
3955
s3Req(t, address+"/createobject")
4056
s3Req(t, address+"/listobjects")

0 commit comments

Comments
 (0)