Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion internal/test/integration/red_test_python_aws_s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,6 @@ func assertS3Operation(t require.TestingT, op, expectedKey string) {

tag, found = jaeger.FindIn(span.Tags, "cloud.region")
require.True(t, found)
require.Empty(t, tag.Value)
// localstack doesn't have a region, so we should match the default AWS one which is "us-east-1"
require.Equal(t, "us-east-1", tag.Value)
}
3 changes: 2 additions & 1 deletion internal/test/integration/red_test_python_aws_sqs.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ func assertSQSOperation(t require.TestingT, op, expectedQueueURL, expectedMessag

tag, found = jaeger.FindIn(span.Tags, "cloud.region")
require.True(t, found)
require.Empty(t, tag.Value)
// localstack doesn't have a region, so we should match the default AWS one which is "us-east-1"
require.Equal(t, "us-east-1", tag.Value)

tag, found = jaeger.FindIn(span.Tags, "aws.sqs.queue_url")
require.True(t, found)
Expand Down
6 changes: 6 additions & 0 deletions pkg/appolly/app/request/span_getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ func spanOTELGetters(name attr.Name) (attributes.Getter[*Span, attribute.KeyValu
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSS3 && s.AWS != nil {
return AWSRequestID(s.AWS.S3.Meta.RequestID)
}
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSSQS && s.AWS != nil {
return AWSRequestID(s.AWS.SQS.Meta.RequestID)
}
return AWSRequestID("")
}
case attr.AWSExtendedRequestID:
Expand Down Expand Up @@ -253,6 +256,9 @@ func spanOTELGetters(name attr.Name) (attributes.Getter[*Span, attribute.KeyValu
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSS3 && s.AWS != nil {
return CloudRegion(s.AWS.S3.Meta.Region)
}
if s.Type == EventTypeHTTPClient && s.SubType == HTTPSubtypeAWSSQS && s.AWS != nil {
return CloudRegion(s.AWS.SQS.Meta.Region)
}
return CloudRegion("")
}
case attr.DNSQuestionName:
Expand Down
51 changes: 46 additions & 5 deletions pkg/ebpf/common/http/aws_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ var (
extendedRequestIDHeader = "x-amz-id-2"
)

var awsRegionRgx = regexp.MustCompile(`(?:^|\.)([a-z]{2}-[a-z]+-\d)\.amazonaws\.com$`)
var (
awsRegionURLRgx = regexp.MustCompile(`(?:^|\.)([a-z0-9-]+)\.amazonaws\.com(\.[a-z]+)?$`)
awsRegionURLRgx2 = regexp.MustCompile(`([a-z0-9-]+)-([a-z0-9-]+)\.amazonaws\.com(\.[a-z]+)?$`)
awsRegionRgx = regexp.MustCompile(`^[a-z]{2}(-gov)?-[a-z]+-\d+$`)
awsRegionRgx2 = regexp.MustCompile(`^cn-[a-z]+-\d+$`)
)

func parseAWSMeta(req *http.Request, resp *http.Response) (request.AWSMeta, error) {
meta := request.AWSMeta{}
Expand All @@ -47,10 +52,46 @@ func parseAWSMeta(req *http.Request, resp *http.Response) (request.AWSMeta, erro
return meta, nil
}

// parseAWSRegion extracts the AWS region from the Host in a request.
// It supports both virtual-hosted–style and path-style endpoints.
// If no explicit region is found, the default region ("us-east-1") is returned.
//
// Examples:
//
// Host: bucket.s3.eu-west-1.amazonaws.com => "eu-west-1"
// Host: bucket.s3.amazonaws.com => "us-east-1"
// Host: ec2.us-west-2.amazonaws.com => "us-west-2"
// Host: s3.eu-central-1.amazonaws.com => "eu-central-1"
// Host: sns.cn-north-1.amazonaws.com.cn => "cn-north-1"
// Host: sts.amazonaws.com => "us-east-1" (default)
func parseAWSRegion(req *http.Request) string {
match := awsRegionRgx.FindStringSubmatch(req.URL.Host)
if len(match) >= 2 {
return match[1]
// Common AWS endpoint patterns:
// <service>.<region>.amazonaws.com
// <service>.<region>.amazonaws.com.cn
// <service>.amazonaws.com
//
// Examples captured by this regex:
// ec2.us-east-2.amazonaws.com => us-east-2
// monitoring.us-gov-west-1.amazonaws.com => us-gov-west-1
// s3.cn-north-1.amazonaws.com.cn => cn-north-1
if m := awsRegionURLRgx.FindStringSubmatch(req.Host); len(m) >= 2 {
if isAWSRegion(m[1]) {
return m[1]
}
}
return ""

// Fallback pattern for "service.s3.region.amazonaws.com" style:
// bucket.s3.eu-west-1.amazonaws.com => eu-west-1
if m := awsRegionURLRgx2.FindStringSubmatch(req.Host); len(m) >= 2 {
if isAWSRegion(m[1]) {
return m[1]
}
}

// Default AWS region when none is found
return "us-east-1"
}

func isAWSRegion(region string) bool {
return awsRegionRgx.MatchString(region) || awsRegionRgx2.MatchString(region)
}
138 changes: 138 additions & 0 deletions pkg/ebpf/common/http/aws_common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ebpfcommon

import (
"net/http"
"testing"
)

func TestParseAWSRegion(t *testing.T) {
tests := []struct {
name string
host string
want string
}{
{
name: "ec2 with region",
host: "ec2.us-west-2.amazonaws.com",
want: "us-west-2",
},
{
name: "s3 with region",
host: "s3.eu-central-1.amazonaws.com",
want: "eu-central-1",
},
{
name: "sns with cn region",
host: "sns.cn-north-1.amazonaws.com.cn",
want: "cn-north-1",
},
{
name: "sts default region",
host: "sts.amazonaws.com",
want: "us-east-1",
},
{
name: "bucket s3 eu-west-1",
host: "bucket.s3.eu-west-1.amazonaws.com",
want: "eu-west-1",
},
{
name: "bucket s3 default region",
host: "bucket.s3.amazonaws.com",
want: "us-east-1",
},
{
name: "monitoring us-gov-west-1",
host: "monitoring.us-gov-west-1.amazonaws.com",
want: "us-gov-west-1",
},
{
name: "s3 cn-north-1 with .cn",
host: "s3.cn-north-1.amazonaws.com.cn",
want: "cn-north-1",
},
{
name: "bucket s3 cn-north-1 with .cn",
host: "bucket.s3.cn-north-1.amazonaws.com.cn",
want: "cn-north-1",
},
{
name: "service only",
host: "s3.amazonaws.com",
want: "us-east-1",
},
{
name: "service only .cn",
host: "s3.amazonaws.com.cn",
want: "us-east-1",
},
{
name: "bucket s3 dot region",
host: "bucket.s3.us-west-2.amazonaws.com",
want: "us-west-2",
},
{
name: "empty host",
host: "",
want: "us-east-1",
},
{
name: "random host",
host: "example.com",
want: "us-east-1",
},
{
name: "service.region.amazonaws.com.cn",
host: "ec2.ap-southeast-1.amazonaws.com.cn",
want: "ap-southeast-1",
},
{
name: "bucket s3 ap-southeast-2",
host: "bucket.s3.ap-southeast-2.amazonaws.com",
want: "ap-southeast-2",
},
{
name: "bucket s3 ap-southeast-2 .cn",
host: "bucket.s3.ap-southeast-2.amazonaws.com.cn",
want: "ap-southeast-2",
},
{
name: "service.region.amazonaws.com with numbers",
host: "lambda.us-east-1.amazonaws.com",
want: "us-east-1",
},
{
name: "service.region.amazonaws.com with dash",
host: "dynamodb.us-west-2.amazonaws.com",
want: "us-west-2",
},
{
name: "bucket s3 region with dot",
host: "bucket.s3.us-west-1.amazonaws.com",
want: "us-west-1",
},
{
name: "service.region.amazonaws.com with .cn",
host: "ec2.cn-northwest-1.amazonaws.com.cn",
want: "cn-northwest-1",
},
{
name: "bucket s3 region with .cn",
host: "bucket.s3.cn-northwest-1.amazonaws.com.cn",
want: "cn-northwest-1",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := &http.Request{Host: tt.host}
got := parseAWSRegion(req)
if got != tt.want {
t.Errorf("parseAWSRegion(%q) = %q, want %q", tt.host, got, tt.want)
}
})
}
}
119 changes: 70 additions & 49 deletions pkg/ebpf/common/http/aws_s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package ebpfcommon

import (
"bytes"
"errors"
"net/http"
"strings"
Expand Down Expand Up @@ -38,7 +37,7 @@ func parseAWSS3(req *http.Request, resp *http.Response) (request.AWSS3, error) {
if s3.Meta.ExtendedRequestID == "" {
return s3, errors.New("missing x-amz-id-2 header")
}
s3.Bucket, s3.Key = parseS3bucketKey(req.URL.Path)
s3.Bucket, s3.Key = parseS3bucketKey(req)
s3.Method = inferS3Method(req)
if s3.Method == "" {
return s3, errors.New("unable to parse s3 operation")
Expand All @@ -47,71 +46,93 @@ func parseAWSS3(req *http.Request, resp *http.Response) (request.AWSS3, error) {
return s3, nil
}

func parseS3bucketKey(path string) (string, string) {
// S3 paths are generally in the format 'PUT /bucket/key'
var bucket, key string
parts := bytes.SplitN([]byte(path), []byte("/"), 3)
if len(parts) >= 2 {
bucket = string(parts[1])
// parseS3bucketKey extracts the S3 bucket name and object key from an HTTP request.
// It supports both virtual-hosted-style (bucket.s3.region.amazonaws.com)
// and path-style (s3.amazonaws.com/bucket/object) addressing.
//
// Examples:
//
// Host: my-bucket.s3.eu-west-1.amazonaws.com, Path: /foo/bar.txt
// => ("my-bucket", "foo/bar.txt")
//
// Host: s3.amazonaws.com, Path: /my-bucket/foo/bar.txt
// => ("my-bucket", "foo/bar.txt")
//
// Host: my-bucket.s3.amazonaws.com, Path: /
// => ("my-bucket", "")
func parseS3bucketKey(req *http.Request) (string, string) {
path := strings.TrimPrefix(req.URL.Path, "/")

// Case 1: Virtual-hosted–style — bucket in the hostname.
// Example: my-bucket.s3.amazonaws.com /foo/bar.txt
if strings.Contains(req.Host, ".s3.") {
bucket := strings.SplitN(req.Host, ".s3.", 2)[0]
return bucket, path
}

// Case 2: Path-style — bucket in the first path segment.
// Example: s3.amazonaws.com /my-bucket/foo/bar.txt
parts := strings.SplitN(path, "/", 2)
if len(parts) == 0 || parts[0] == "" {
return "", ""
}
if len(parts) == 3 {
key = string(parts[2])

bucket := parts[0]
key := ""
if len(parts) > 1 {
key = parts[1]
}
return bucket, key
}

// This is a naive inference of S3 operations based on HTTP method and URL path/query
func inferS3Method(req *http.Request) string {
q := req.URL.Query()
path := strings.Trim(strings.TrimPrefix(req.URL.Path, "/"), "/")
parts := strings.Split(path, "/")
path := strings.TrimPrefix(req.URL.Path, "/")

switch req.Method {
case http.MethodGet:
switch {
case path == "":
return "ListBuckets"
case len(parts) == 1:
return "ListObjects"
case q.Has("uploads"):
return "ListMultipartUploads"
case q.Has("uploadId"):
return "ListParts"
default:
return "GetObject"
var bucket, object string
// --- Virtual-hosted–style URL ---
// Example: PUT bucket.s3.eu-west-1.amazonaws.com /hello.txt
if strings.Contains(req.Host, ".s3.") {
bucket = strings.SplitN(req.Host, ".s3.", 2)[0]
object = path // path may be empty or "object-key"
} else {
// --- Path-style URL ---
// Example: PUT s3.amazonaws.com /bucket/hello.txt
parts := strings.SplitN(path, "/", 2)
if len(parts) > 0 {
bucket = parts[0]
}
case http.MethodPut:
if q.Has("uploadId") && q.Has("partNumber") {
return "UploadPart"
}
if q.Has("uploadId") {
return "CompleteMultipartUpload"
if len(parts) > 1 {
object = parts[1]
}
}

switch len(parts) {
case 1:
// PUT /my-bucket -> Create bucket
hasBucket := bucket != ""
hasObject := object != ""

switch req.Method {
case http.MethodPut:
if hasBucket && !hasObject {
return "CreateBucket"
default:
// PUT /my-bucket/object.txt
}
if hasBucket && hasObject {
return "PutObject"
}
case http.MethodPost:
if q.Has("uploads") {
return "CreateMultipartUpload"
case http.MethodDelete:
if hasBucket && !hasObject {
return "DeleteBucket"
}
if q.Has("uploadId") {
return "CompleteMultipartUpload"
if hasBucket && hasObject {
return "DeleteObject"
}
return "PutObject"
case http.MethodDelete:
if q.Has("uploadId") {
return "AbortMultipartUpload"
case http.MethodGet:
if !hasBucket {
return "ListBuckets"
}
if len(parts) == 1 {
return "DeleteBucket"
if hasBucket && !hasObject {
return "ListObjects"
}
return "DeleteObject"
return "GetObject"
}

return ""
Expand Down
Loading
Loading