Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.azure.cosmos.implementation.LeaseNotFoundException;
import com.azure.cosmos.implementation.OperationType;
import com.azure.cosmos.implementation.PartitionIsMigratingException;
import com.azure.cosmos.implementation.PartitionKeyRangeGoneException;
import com.azure.cosmos.implementation.PartitionKeyRangeIsSplittingException;
import com.azure.cosmos.implementation.RequestTimeoutException;
import com.azure.cosmos.implementation.ResourceType;
Expand Down Expand Up @@ -325,6 +326,66 @@ public void shouldRetryWithPartitionKeyRangeIsSplittingException() {

}

/**
* Retry with address resolution PartitionKeyRangeGoneException
*/
@Test(groups = { "unit" }, timeOut = TIMEOUT)
public void shouldRetryWithAddressResolutionPartitionKeyRangeGoneException() {
RxDocumentServiceRequest request = RxDocumentServiceRequest.create(
mockDiagnosticsClientContext(),
OperationType.Read,
ResourceType.Document);
GoneAndRetryWithRetryPolicy goneAndRetryWithRetryPolicy = new GoneAndRetryWithRetryPolicy(request, 30);
Mono<ShouldRetryResult> singleShouldRetry = goneAndRetryWithRetryPolicy
.shouldRetry(new PartitionKeyRangeGoneException().markRetryWithRoutingMapRefresh());
ShouldRetryResult shouldRetryResult = singleShouldRetry.block();
assertThat(shouldRetryResult.shouldRetry).isTrue();
assertThat(request.forcePartitionKeyRangeRefresh).isTrue();
assertThat(request.requestContext.resolvedPartitionKeyRange).isNull();
assertThat(request.requestContext.quorumSelectedLSN).isEqualTo(-1);
assertThat(shouldRetryResult.policyArg.getValue0()).isFalse();
}

@Test(groups = { "unit" }, timeOut = TIMEOUT)
public void shouldNotRetryWithPartitionKeyRangeGoneException() {
RxDocumentServiceRequest request = RxDocumentServiceRequest.create(
mockDiagnosticsClientContext(),
OperationType.Read,
ResourceType.Document);
GoneAndRetryWithRetryPolicy goneAndRetryWithRetryPolicy = new GoneAndRetryWithRetryPolicy(request, 30);
ShouldRetryResult shouldRetryResult = goneAndRetryWithRetryPolicy
.shouldRetry(new PartitionKeyRangeGoneException())
.block();

assertThat(shouldRetryResult.shouldRetry).isFalse();
}

@Test(groups = { "unit" }, timeOut = TIMEOUT)
public void shouldWrapAddressResolutionPartitionKeyRangeGoneExceptionWithServiceUnavailableWhenRetryBudgetExhausted() {
RxDocumentServiceRequest request = RxDocumentServiceRequest.create(
mockDiagnosticsClientContext(),
OperationType.Read,
ResourceType.Document);
GoneAndRetryWithRetryPolicy goneAndRetryWithRetryPolicy = new GoneAndRetryWithRetryPolicy(request, 0);

ShouldRetryResult shouldRetryResult = goneAndRetryWithRetryPolicy
.shouldRetry(new PartitionKeyRangeGoneException().markRetryWithRoutingMapRefresh())
.block();
assertThat(shouldRetryResult.shouldRetry).isTrue();

shouldRetryResult = goneAndRetryWithRetryPolicy
.shouldRetry(new PartitionKeyRangeGoneException().markRetryWithRoutingMapRefresh())
.block();

assertThat(shouldRetryResult.shouldRetry).isFalse();
assertThat(shouldRetryResult.exception).isInstanceOf(CosmosException.class);

CosmosException cosmosException = (CosmosException) shouldRetryResult.exception;
assertThat(cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE);
assertThat(cosmosException.getSubStatusCode())
.isEqualTo(HttpConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE_EXCEEDED_RETRY_LIMIT);
}

/**
* No retry on bad request exception
*/
Expand Down
1 change: 1 addition & 0 deletions sdk/cosmos/azure-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#### Breaking Changes

#### Bugs Fixed
* Fixed direct connectivity retries for `PartitionKeyRangeGoneException` thrown during address resolution so stale partition key ranges trigger routing-map refresh and exhausted retries surface as 503 with partition-key-range-gone retry-limit substatus. - See [Issue 49381](https://github.com/Azure/azure-sdk-for-java/issues/49381).
* Unified request-level consistency override behavior across transports: invalid attempts to upgrade the request consistency level above the account default are now silently ignored instead of returning `BadRequest` in some gateway paths. - See PR [49606](https://github.com/Azure/azure-sdk-for-java/pull/49606).

#### Other Changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
* This exception is thrown when DocumentServiceRequest contains x-ms-documentdb-partitionkeyrangeid
* header and such range id doesn't exist.
* <p>
* No retries should be made in this case, as either split or merge might have happened and query/readfeed
* must take appropriate actions.
* No retries should generally be made in this case, as either split or merge might have happened and query/readfeed
* must take appropriate actions. Direct-mode address resolution may opt into retrying with a routing map refresh when
* this exception is caused by stale address or routing state.
*/
public class PartitionKeyRangeGoneException extends CosmosException {
private boolean shouldRetryWithRoutingMapRefresh;

/**
* Instantiates a new Partition key range gone exception.
Expand Down Expand Up @@ -85,4 +87,13 @@ private void setSubstatus() {
this.getResponseHeaders().put(WFConstants.BackendHeaders.SUB_STATUS,
Integer.toString(HttpConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE));
}

public boolean shouldRetryWithRoutingMapRefresh() {
return this.shouldRetryWithRoutingMapRefresh;
}

public PartitionKeyRangeGoneException markRetryWithRoutingMapRefresh() {
this.shouldRetryWithRoutingMapRefresh = true;
return this;
Comment thread
arnabnandy7 marked this conversation as resolved.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,9 @@ private ResolutionResult handleRangeAddressResolutionFailure(
RMResources.PartitionKeyRangeNotFound,
request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId(),
request.getPartitionKeyRangeIdentity().getCollectionRid());
throw BridgeInternal.setResourceAddress(new PartitionKeyRangeGoneException(errorMessage), request.requestContext.resourcePhysicalAddress);
throw BridgeInternal.setResourceAddress(
new PartitionKeyRangeGoneException(errorMessage).markRetryWithRoutingMapRefresh(),
request.requestContext.resourcePhysicalAddress);
Comment thread
arnabnandy7 marked this conversation as resolved.
}
logger.debug("handleRangeAddressResolutionFailure returns null");
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,8 @@ private Mono<AddressInformation[]> getAddressesForRangeId(
partitionKeyRangeId,
collectionRid);

PartitionKeyRangeGoneException e = new PartitionKeyRangeGoneException(errorMessage);
PartitionKeyRangeGoneException e = new PartitionKeyRangeGoneException(errorMessage)
.markRetryWithRoutingMapRefresh();
BridgeInternal.setResourceAddress(e, collectionRid);

return Mono.error(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ private static ImplementationBridgeHelpers.CosmosExceptionHelper.CosmosException
return ImplementationBridgeHelpers.CosmosExceptionHelper.getCosmosExceptionAccessor();
}

private static boolean isPartitionKeyRangeGoneExceptionWithRoutingMapRefresh(Exception exception) {
return exception instanceof PartitionKeyRangeGoneException &&
((PartitionKeyRangeGoneException) exception).shouldRetryWithRoutingMapRefresh();
}

private final static Logger logger = LoggerFactory.getLogger(GoneAndRetryWithRetryPolicy.class);
private final GoneRetryPolicy goneRetryPolicy;
private final RetryWithRetryPolicy retryWithRetryPolicy;
Expand Down Expand Up @@ -127,6 +132,7 @@ private boolean isNonRetryableException(Exception exception) {
if (exception instanceof GoneException ||
exception instanceof PartitionIsMigratingException ||
exception instanceof PartitionKeyRangeIsSplittingException ||
isPartitionKeyRangeGoneExceptionWithRoutingMapRefresh(exception) ||
exception instanceof LeaseNotFoundException) {

return false;
Expand Down Expand Up @@ -292,6 +298,8 @@ private Pair<Mono<ShouldRetryResult>, Boolean> handleException(Exception excepti
return handlePartitionIsMigratingException((PartitionIsMigratingException)exception);
} else if (exception instanceof PartitionKeyRangeIsSplittingException) {
return handlePartitionKeyIsSplittingException((PartitionKeyRangeIsSplittingException) exception);
} else if (isPartitionKeyRangeGoneExceptionWithRoutingMapRefresh(exception)) {
return handlePartitionKeyRangeGoneException((PartitionKeyRangeGoneException) exception);
}

throw new IllegalStateException("Invalid exception type", exception);
Expand All @@ -309,13 +317,27 @@ private Pair<Mono<ShouldRetryResult>, Boolean> handlePartitionIsMigratingExcepti
}

private Pair<Mono<ShouldRetryResult>, Boolean> handlePartitionKeyIsSplittingException(PartitionKeyRangeIsSplittingException exception) {
this.request.requestContext.resolvedPartitionKeyRange = null;
this.request.requestContext.quorumSelectedLSN = -1;
this.request.requestContext.quorumSelectedStoreResponse = null;
resetRequestContextForPartitionKeyRangeRefresh();
logger.debug("Received partition key range splitting exception, will retry, {}", exception.toString());
this.request.forcePartitionKeyRangeRefresh = true;
return Pair.of(null, false);
}

private Pair<Mono<ShouldRetryResult>, Boolean> handlePartitionKeyRangeGoneException(PartitionKeyRangeGoneException exception) {
// PartitionKeyRangeGoneException is generally treated as non-retriable, but when it is thrown while resolving
// addresses in direct mode it typically indicates stale routing/partition state; clear the cached target and
// force a routing-map (partition key range) refresh to allow the request to be re-routed.
resetRequestContextForPartitionKeyRangeRefresh();
logger.debug("Received partition key range gone exception, will retry, {}", exception.toString());
this.request.forcePartitionKeyRangeRefresh = true;
return Pair.of(null, false);
}

private void resetRequestContextForPartitionKeyRangeRefresh() {
this.request.requestContext.resolvedPartitionKeyRange = null;
this.request.requestContext.quorumSelectedLSN = -1;
this.request.requestContext.quorumSelectedStoreResponse = null;
}
}

}
Loading