@@ -6,6 +6,7 @@ package queryfrontend
66import (
77 "context"
88 "encoding/json"
9+ "fmt"
910 "net/http"
1011 "net/http/httptest"
1112 "net/url"
@@ -532,6 +533,17 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
532533}
533534
534535func TestRoundTripQueryCacheWithShardingMiddleware (t * testing.T ) {
536+ // Run the test a couple of times because the behavior of the
537+ // PromQLShardingMiddleware middleware isn't 100% predictable.
538+ // See testRoundTripQueryCacheWithShardingMiddleware for details.
539+ for i := range 10 {
540+ if ! t .Run (fmt .Sprintf ("run-%d" , i ), testRoundTripQueryCacheWithShardingMiddleware ) {
541+ return
542+ }
543+ }
544+ }
545+
546+ func testRoundTripQueryCacheWithShardingMiddleware (t * testing.T ) {
535547 testRequest := & ThanosQueryRangeRequest {
536548 Path : "/api/v1/query_range" ,
537549 Start : 0 ,
@@ -569,57 +581,69 @@ func TestRoundTripQueryCacheWithShardingMiddleware(t *testing.T) {
569581 rt , err := newFakeRoundTripper ()
570582 testutil .Ok (t , err )
571583 defer rt .Close ()
572- res , handler := promqlResultsWithFailures (3 )
584+ count , handler := promqlResultsWithFailures (3 )
573585 rt .setHandler (handler )
574586
575- for _ , tc := range []struct {
576- name string
577- req queryrange.Request
578- err bool
579- expected int64
580- }{
581- {
582- name : "query with vertical sharding" ,
583- req : testRequest ,
584- err : true ,
585- expected : 2 ,
586- },
587- {
588- name : "same query as before, both requests are executed" ,
589- req : testRequest ,
590- err : true ,
591- expected : 4 ,
592- },
593- {
594- name : "same query as before, one request is executed" ,
595- req : testRequest ,
596- err : false ,
597- expected : 5 ,
598- },
599- {
600- name : "same query as before again, no requests are executed" ,
601- req : testRequest ,
602- err : false ,
603- expected : 5 ,
604- },
605- } {
606- if ! t .Run (tc .name , func (t * testing.T ) {
607- ctx := user .InjectOrgID (context .Background (), "1" )
608- httpReq , err := NewThanosQueryRangeCodec (true ).EncodeRequest (ctx , tc .req )
609- testutil .Ok (t , err )
610-
611- _ , err = tpw (rt ).RoundTrip (httpReq )
612- if tc .err {
613- testutil .NotOk (t , err )
614- } else {
615- testutil .Ok (t , err )
616- }
617-
618- testutil .Equals (t , tc .expected , res .Load ())
619- }) {
587+ var (
588+ rtErr error
589+ res * http.Response
590+ attempts int
591+ )
592+ // Depending on the timing of operations, the PromQLShardingMiddleware
593+ // middleware needs between 3 and 4 calls before returning successfully.
594+ //
595+ // Knowing that the downstream server is configured to fail the first 3
596+ // requests, the timeline can one of the 2 cases below.
597+ //
598+ // "Best" case scenarios (3 attempts):
599+ // 1. the middleware issues 2 concurrent requests that both fail.
600+ // 2. the middleware issues 2 concurrent requests. One fails, the other
601+ // succeeds and is stored in the cache.
602+ // 3. the middleware issues 1 request for the remaining shard that succeeds
603+ // and is stored in the cache.
604+ // Or
605+ // 1. the middleware issues 1 request that fails before it could
606+ // initiate the second request
607+ // 2. the middleware issues 2 concurrent requests that both fail.
608+ // 3. the middleware issues 2 concurrent requests that both succeed and are
609+ // stored in the cache.
610+ // Note that in the last case, 1. and 2. may happen in the reverse order.
611+ //
612+ // "Worst" case scenario (4 attempts):
613+ // 1. the middleware issues 1 request that fails before it could
614+ // initiate the second request
615+ // 2. the middleware issues 1 request that fails before it could
616+ // initiate the second request
617+ // 3. the middleware issues 1 request that fails before it could
618+ // initiate the second request
619+ // 4. the middleware issues 2 concurrent requests that both succeed.
620+ for range 4 {
621+ attempts ++
622+ ctx := user .InjectOrgID (context .Background (), "1" )
623+ httpReq , err := NewThanosQueryRangeCodec (true ).EncodeRequest (ctx , testRequest )
624+ testutil .Ok (t , err )
625+
626+ res , rtErr = tpw (rt ).RoundTrip (httpReq )
627+ if rtErr == nil {
620628 break
621629 }
622630 }
631+
632+ testutil .Ok (t , rtErr )
633+ testutil .Equals (t , http .StatusOK , res .StatusCode )
634+ testutil .Assert (t , attempts == 3 || attempts == 4 )
635+
636+ // Check that a subsequent request is served from the cache instead of
637+ // hitting the server.
638+ n := count .Load ()
639+ ctx := user .InjectOrgID (context .Background (), "1" )
640+ httpReq , err := NewThanosQueryRangeCodec (true ).EncodeRequest (ctx , testRequest )
641+ testutil .Ok (t , err )
642+
643+ _ , rtErr = tpw (rt ).RoundTrip (httpReq )
644+ testutil .Ok (t , rtErr )
645+ testutil .Equals (t , http .StatusOK , res .StatusCode )
646+ testutil .Equals (t , n , count .Load ())
623647}
624648
625649// TestRoundTripLabelsCacheMiddleware tests the cache middleware for labels requests.
@@ -876,10 +900,9 @@ func promqlResults(fail bool) (*int, http.Handler) {
876900}
877901
878902// promqlResultsWithFailures is a mock handler used to test split and cache middleware.
879- // it will return a failed response numFailures times .
903+ // it will return a failed response for the first numFailures requests .
880904func promqlResultsWithFailures (numFailures int ) (* atomic.Int64 , http.Handler ) {
881905 count := & atomic.Int64 {}
882- var lock sync.Mutex
883906 q := queryrange.PrometheusResponse {
884907 Status : "success" ,
885908 Data : queryrange.PrometheusData {
@@ -896,33 +919,16 @@ func promqlResultsWithFailures(numFailures int) (*atomic.Int64, http.Handler) {
896919 },
897920 }
898921
899- cond := sync .NewCond (& sync.Mutex {})
900- cond .L .Lock ()
901922 return count , http .HandlerFunc (func (w http.ResponseWriter , r * http.Request ) {
902- lock .Lock ()
903- defer lock .Unlock ()
904-
905923 // Set fail in the response code to test retry.
906- if numFailures > 0 {
907- numFailures --
908-
909- // Wait for a successful request.
910- // Release the lock to allow other requests to execute.
911- if numFailures == 0 {
912- lock .Unlock ()
913- cond .Wait ()
914- <- time .After (500 * time .Millisecond )
915- lock .Lock ()
916- }
924+ if count .Inc () <= int64 (numFailures ) {
917925 w .WriteHeader (500 )
926+ return
918927 }
928+
919929 if err := json .NewEncoder (w ).Encode (q ); err != nil {
920930 panic (err )
921931 }
922- if numFailures == 0 {
923- cond .Broadcast ()
924- }
925- count .Add (1 )
926932 })
927933}
928934
0 commit comments