Skip to content

Commit 9090025

Browse files
authored
[OTEL] Switches OTLP Tests To dd-apm-test-agent Instead of MockTracerAgent (#7656)
## Summary of changes Making use of the [dd-apm-test-agent](https://github.com/DataDog/dd-apm-test-agent) container to enable OTLP metrics support for GRPC and HTTP, then gets then as JSON the container for snapshot validation. ## Reason for change To easily and more reliably test GRPC and ongoing OTLP support and validation in the .NET tracer without needing to build GRPC servers or change our agent for each new signal we plan to support. ## Implementation details Added the container to run for the integrations tests in the corresponding host name making use of either port `4317` or 4318`: 1. Pings `test/session/clear` to clear the session at the start of the tests 2. Pings `/test/session/metrics` endpoint for the generated metrics ## Test coverage Updated the snapshots format but the same 4 should pass even for GRPC now that added to run for OTEL. ## Other details <!-- Fixes #{issue} --> <!-- ⚠️ Note: Where possible, please obtain 2 approvals prior to merging. Unless CODEOWNERS specifies otherwise, for external teams it is typically best to have one review from a team member, and one review from apm-dotnet. Trivial changes do not require 2 reviews. MergeQueue is NOT enabled in this repository. If you have write access to the repo, the PR has 1-2 approvals (see above), and all of the required checks have passed, you can use the Squash and Merge button to merge the PR. If you don't have write access, or you need help, reach out in the #apm-dotnet channel in Slack. -->
1 parent f99490c commit 9090025

15 files changed

+716
-9494
lines changed

.azure-pipelines/ultimate-pipeline.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,12 @@ stages:
16831683
displayName: 'Initialize LocalDB'
16841684
workingDirectory: $(Build.Repository.LocalPath)
16851685
1686+
- powershell: |
1687+
mkdir -Force ./artifacts/build_data/snapshots
1688+
docker compose -f docker-compose.windows.yml run --rm start-test-agent.windows
1689+
displayName: Start test agent
1690+
retryCountOnTaskFailure: 3
1691+
16861692
- script: tracer\build.cmd CompileTrimmingSamples BuildWindowsIntegrationTests BuildWindowsRegressionTests -Framework $(framework) --code-coverage-enabled $(CodeCoverageEnabled)
16871693
displayName: Build integration tests
16881694
retryCountOnTaskFailure: 3
@@ -1700,6 +1706,11 @@ stages:
17001706
Filter: $(IntegrationTestFilter)
17011707
SampleName: $(IntegrationTestSampleName)
17021708

1709+
- script: docker compose -f docker-compose.windows.yml down
1710+
displayName: Stop test agent
1711+
condition: succeededOrFailed()
1712+
continueOnError: true
1713+
17031714
- task: PublishTestResults@2
17041715
displayName: publish test results
17051716
inputs:

docker-compose.windows.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ services:
3232
- ./artifacts/build_data/snapshots:c:/debug_snapshots
3333
ports:
3434
- "8126"
35+
- "4317:4317"
36+
- "4318:4318"
3537
environment:
3638
- SNAPSHOT_CI=1
3739
# api-security attrs are unfortunately ignored because gzip compression generates different bytes per platform windows/linux
@@ -306,4 +308,4 @@ services:
306308
- dockerTag=${dockerTag:-unset}
307309
- DD_TRACE_AGENT_URL=http://test-agent.windows:8126
308310
depends_on:
309-
- test-agent.windows
311+
- test-agent.windows

docker-compose.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ services:
473473
- DD_LOGGER_SYSTEM_PULLREQUEST_SOURCEBRANCH
474474
- DD_LOGGER_DD_TAGS
475475
- RANDOM_SEED
476+
- TEST_AGENT_HOST=test-agent
476477
hostname: integrationtests
477478
depends_on:
478479
- servicestackredis
@@ -494,6 +495,7 @@ services:
494495
- localstack
495496
- couchbase
496497
- azureservicebus-emulator
498+
- test-agent
497499

498500
IntegrationTests.Debugger:
499501
build:
@@ -675,9 +677,10 @@ services:
675677
- localstack
676678
- couchbase
677679
- azureservicebus-emulator
680+
- test-agent
678681
environment:
679682
- TIMEOUT_LENGTH=120
680-
command: servicestackredis:6379 stackexchangeredis:6379 stackexchangeredis-replica:6379 stackexchangeredis-single:6379 elasticsearch5:9200 elasticsearch6:9200 elasticsearch7:9200 sqlserver:1433 mongo:27017 postgres:5432 mysql:3306 mysql57:3306 rabbitmq:5672 kafka-broker:9092 kafka-zookeeper:2181 localstack:4566 couchbase:11210 azureservicebus-emulator:5672 # oracle:1521
683+
command: servicestackredis:6379 stackexchangeredis:6379 stackexchangeredis-replica:6379 stackexchangeredis-single:6379 elasticsearch5:9200 elasticsearch6:9200 elasticsearch7:9200 sqlserver:1433 mongo:27017 postgres:5432 mysql:3306 mysql57:3306 rabbitmq:5672 kafka-broker:9092 kafka-zookeeper:2181 localstack:4566 couchbase:11210 azureservicebus-emulator:5672 test-agent:8126 # oracle:1521
681684

682685
IntegrationTests.ARM64:
683686
build:
@@ -746,6 +749,7 @@ services:
746749
- DD_LOGGER_SYSTEM_PULLREQUEST_SOURCEBRANCH
747750
- DD_LOGGER_DD_TAGS
748751
- RANDOM_SEED
752+
- TEST_AGENT_HOST=test-agent
749753
depends_on:
750754
- servicestackredis_arm64
751755
- stackexchangeredis_arm64
@@ -758,6 +762,7 @@ services:
758762
- mysql_arm64
759763
- rabbitmq_arm64
760764
- localstack_arm64
765+
- test-agent
761766

762767
StartDependencies.ARM64:
763768
image: andrewlock/wait-for-dependencies
@@ -773,9 +778,10 @@ services:
773778
- mysql_arm64
774779
- rabbitmq_arm64
775780
- localstack_arm64
781+
- test-agent
776782
environment:
777783
- TIMEOUT_LENGTH=120
778-
command: servicestackredis_arm64:6379 stackexchangeredis_arm64:6379 stackexchangeredis_arm64-replica:6379 stackexchangeredis_arm64-single:6379 elasticsearch7_arm64:9200 sqledge_arm64:1433 mongo_arm64:27017 postgres_arm64:5432 mysql_arm64:3306 rabbitmq_arm64:5672 localstack_arm64:4566
784+
command: servicestackredis_arm64:6379 stackexchangeredis_arm64:6379 stackexchangeredis_arm64-replica:6379 stackexchangeredis_arm64-single:6379 elasticsearch7_arm64:9200 sqledge_arm64:1433 mongo_arm64:27017 postgres_arm64:5432 mysql_arm64:3306 rabbitmq_arm64:5672 localstack_arm64:4566 test-agent:8126
779785

780786
IntegrationTests.ARM64.Debugger:
781787
build:
@@ -841,6 +847,8 @@ services:
841847
- ./artifacts/build_data/snapshots:/debug_snapshots
842848
ports:
843849
- "8126:8126"
850+
- "4317:4317"
851+
- "4318:4318"
844852
environment:
845853
- ENABLED_CHECKS=trace_count_header,meta_tracer_version_header,trace_content_length
846854
- SNAPSHOT_CI=1

tracer/build/_build/docker/test-agent.windows.dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
WORKDIR /
44

55
# Pin to older test agent versions to try to avoid breakages in the future
6-
RUN pip install --no-cache-dir "ddapm-test-agent==1.28.0" "ddsketch==3.0.1" "ddsketch[serialization]==3.0.1"
6+
RUN pip install --no-cache-dir "ddapm-test-agent==1.32.0" "ddsketch==3.0.1" "ddsketch[serialization]==3.0.1"
77

88
ENTRYPOINT [ "ddapm-test-agent", "--port=8126" ]

tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/OpenTelemetrySdkTests.cs

Lines changed: 47 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
using System.Threading.Tasks;
1111
using Datadog.Trace.Configuration;
1212
using Datadog.Trace.TestHelpers;
13+
using Datadog.Trace.Vendors.Newtonsoft.Json;
14+
using Datadog.Trace.Vendors.Newtonsoft.Json.Linq;
1315
using FluentAssertions;
1416
using FluentAssertions.Execution;
1517
using VerifyXunit;
@@ -72,7 +74,6 @@ public class OpenTelemetrySdkTests : TracingIntegrationTest
7274

7375
private readonly Regex _versionRegex = new(@"telemetry.sdk.version: (0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)");
7476
private readonly Regex _timeUnixNanoRegex = new(@"time_unix_nano"":([0-9]{10}[0-9]+)");
75-
private readonly Regex _timeUnixNanoRegexMetrics = new(@"TimeUnixNano: ([0-9]{10}[0-9]+)");
7677
private readonly Regex _exceptionStacktraceRegex = new(@"exception.stacktrace"":""System.ArgumentException: Example argument exception.*"",""");
7778

7879
public OpenTelemetrySdkTests(ITestOutputHelper output)
@@ -89,8 +90,9 @@ public static IEnumerable<object[]> GetMetricsTestData()
8990
{
9091
foreach (var packageVersion in PackageVersions.OpenTelemetry)
9192
{
92-
yield return [packageVersion[0], "false", "true"];
93-
yield return [packageVersion[0], "true", "false"];
93+
yield return [packageVersion[0], "false", "true", "grpc"];
94+
yield return [packageVersion[0], "false", "true", "http/protobuf"];
95+
yield return [packageVersion[0], "true", "false", "http/protobuf"];
9496
}
9597
}
9698
#endif
@@ -213,10 +215,11 @@ public async Task IntegrationDisabled(string packageVersion)
213215
[SkippableTheory]
214216
[Trait("Category", "EndToEnd")]
215217
[Trait("RunOnWindows", "True")]
218+
[Trait("RequiresDockerDependency", "true")]
216219
[MemberData(nameof(GetMetricsTestData))]
217-
public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetricsEnabled, string otelMetricsEnabled)
220+
public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetricsEnabled, string otelMetricsEnabled, string protocol)
218221
{
219-
var parsedVersion = Version.Parse(!string.IsNullOrEmpty(packageVersion) ? packageVersion : "1.12.0");
222+
var parsedVersion = Version.Parse(!string.IsNullOrEmpty(packageVersion) ? packageVersion : "1.13.1");
220223
var runtimeMajor = Environment.Version.Major;
221224

222225
var snapshotName = runtimeMajor switch
@@ -227,94 +230,71 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric
227230
_ => throw new SkipException($"Skipping test due to irrelevant runtime and OTel versions mix: .NET {runtimeMajor} & Otel v{parsedVersion}")
228231
};
229232

230-
var initialAgentPort = TcpPortProvider.GetOpenPort();
233+
var testAgentHost = Environment.GetEnvironmentVariable("TEST_AGENT_HOST") ?? "localhost";
234+
var otlpPort = protocol == "grpc" ? 4317 : 4318;
235+
236+
using (var httpClient = new System.Net.Http.HttpClient())
237+
{
238+
await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/clear");
239+
}
231240

232241
SetEnvironmentVariable("DD_ENV", string.Empty);
233242
SetEnvironmentVariable("DD_SERVICE", string.Empty);
234243
SetEnvironmentVariable("DD_METRICS_OTEL_METER_NAMES", "OpenTelemetryMetricsMeter");
235244
SetEnvironmentVariable("DD_METRICS_OTEL_ENABLED", datadogMetricsEnabled);
236245
SetEnvironmentVariable("OTEL_METRICS_EXPORTER_ENABLED", otelMetricsEnabled);
237-
SetEnvironmentVariable("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf");
238-
SetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT", $"http://127.0.0.1:{initialAgentPort}");
246+
SetEnvironmentVariable("OTEL_EXPORTER_OTLP_PROTOCOL", protocol);
247+
SetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT", $"http://{testAgentHost}:{otlpPort}");
239248
SetEnvironmentVariable("OTEL_METRIC_EXPORT_INTERVAL", "1000");
240249

241250
// Up until Sdk version 1.6.0 Otel didn't support reading from the env var
242251
SetEnvironmentVariable("OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE", runtimeMajor >= 9 ? "delta" : "cumulative");
243252

244-
using var agent = EnvironmentHelper.GetMockAgent(fixedPort: initialAgentPort);
245-
using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.12.0"))
253+
using var agent = EnvironmentHelper.GetMockAgent();
254+
using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.13.1"))
246255
{
247-
// Collect requests from both MockTracerAgent and MockOtlpGrpcServer
248-
var metricRequests = agent.OtlpRequests
249-
.Where(r => r.PathAndQuery.StartsWith("/v1/metrics") || r.PathAndQuery.Contains("MetricsService"))
250-
.ToList();
251-
252-
metricRequests.Should().NotBeEmpty("Expected OTLP metric requests were not received.");
253-
254-
// Group the scope metrics by the resource metrics and schema URL (should only be one unique combination)
255-
var resourceMetricByResource = metricRequests
256-
.SelectMany(r => r.MetricsData.ResourceMetrics)
257-
.GroupBy(r => new Tuple<global::OpenTelemetry.Proto.Resource.V1.Resource, string>(r.Resource, r.SchemaUrl))
258-
.Should()
259-
.ContainSingle()
260-
.Subject;
261-
262-
// Group the individual metrics by scope metric and schema URL (should only be one unique combination since we're only using one ActivitySource)
263-
// This may result in multiple entries for metrics that are repeated multiple times before the test exits
264-
var scopeMetricsByResource = resourceMetricByResource
265-
.SelectMany(r => r.ScopeMetrics)
266-
.GroupBy(r => new Tuple<global::OpenTelemetry.Proto.Common.V1.InstrumentationScope, string>(r.Scope, r.SchemaUrl))
267-
.OrderBy(group => group.Key.Item1.Name);
268-
269-
var scopeMetrics = new List<object>();
270-
foreach (var scopeMetricByResource in scopeMetricsByResource)
256+
await Task.Delay(2000);
257+
258+
using var httpClient = new System.Net.Http.HttpClient();
259+
var metricsResponse = await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/metrics");
260+
metricsResponse.EnsureSuccessStatusCode();
261+
262+
var metricsJson = await metricsResponse.Content.ReadAsStringAsync();
263+
var metricsData = JToken.Parse(metricsJson);
264+
265+
metricsData.Should().NotBeNullOrEmpty();
266+
267+
foreach (var attribute in metricsData.SelectTokens("$..resource.attributes[?(@.key == 'telemetry.sdk.version')]"))
271268
{
272-
var metrics = scopeMetricByResource
273-
.SelectMany(r => r.Metrics)
274-
.GroupBy(r => r.Name)
275-
.OrderBy(group => group.Key)
276-
.Select(group => group.First())
277-
.ToList();
278-
279-
scopeMetrics.Add(new
280-
{
281-
Scope = scopeMetricByResource.Key.Item1,
282-
Metrics = metrics,
283-
SchemaUrl = scopeMetricByResource.Key.Item2
284-
});
269+
attribute["value"]!["string_value"] = "sdk-version";
285270
}
286271

287-
// Filter out the telemetry resource name, if any
288-
foreach (var attribute in resourceMetricByResource.Key.Item1.Attributes)
272+
foreach (var attribute in metricsData.SelectTokens("$..resource.attributes[?(@.key == 'telemetry.sdk.name')]"))
289273
{
290-
if (attribute.Key.Equals("telemetry.sdk.version"))
291-
{
292-
attribute.Value.StringValue = "sdk-version";
293-
}
294-
else if (attribute.Key.Equals("telemetry.sdk.name"))
295-
{
296-
attribute.Value.StringValue = "sdk-name";
297-
}
274+
attribute["value"]!["string_value"] = "sdk-name";
275+
}
276+
277+
foreach (var dataPoint in metricsData.SelectTokens("$..data_points[*]"))
278+
{
279+
dataPoint["start_time_unix_nano"] = "0";
280+
dataPoint["time_unix_nano"] = "0";
298281
}
299282

300-
// Although there's only one resource, let's still emit snapshot data in the expected array format
301-
var resourceMetrics = new object[]
283+
foreach (var scopeMetric in metricsData.SelectTokens("$..scope_metrics[*]"))
302284
{
303-
new
285+
if (scopeMetric["metrics"] is JArray metricsArray)
304286
{
305-
Resource = resourceMetricByResource.Key.Item1,
306-
ScopeMetrics = scopeMetrics,
307-
SchemaUrl = resourceMetricByResource.Key.Item2,
287+
var sorted = new JArray(metricsArray.OrderBy(m => m["name"]?.ToString()));
288+
scopeMetric["metrics"] = sorted;
308289
}
309-
};
290+
}
310291

292+
var formattedJson = metricsData.ToString(Formatting.Indented);
311293
var settings = VerifyHelper.GetSpanVerifierSettings();
312-
settings.AddRegexScrubber(_timeUnixNanoRegexMetrics, @"TimeUnixNano"": <DateTimeOffset.Now>");
313-
314294
var suffix = GetSuffix(packageVersion);
315295
var fileName = $"{nameof(OpenTelemetrySdkTests)}.SubmitsOtlpMetrics{suffix}{snapshotName}";
316296

317-
await Verifier.Verify(resourceMetrics, settings)
297+
await Verifier.Verify(formattedJson, settings)
318298
.UseFileName(fileName)
319299
.DisableRequireUniquePrefix();
320300
}

0 commit comments

Comments
 (0)