Skip to content

Commit b1a94a4

Browse files
authored
xds: implement server feature fail_on_data_errors (#12544)
Implements server feature `fail_on_data_errors`, per gRFC A88: https://github.com/grpc/proposal/blob/master/A88-xds-data-error-handling.md
1 parent 55ae1d0 commit b1a94a4

File tree

9 files changed

+553
-34
lines changed

9 files changed

+553
-34
lines changed

xds/src/main/java/io/grpc/xds/client/Bootstrapper.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,22 @@ public abstract static class ServerInfo {
6565

6666
public abstract boolean resourceTimerIsTransientError();
6767

68+
public abstract boolean failOnDataErrors();
69+
6870
@VisibleForTesting
6971
public static ServerInfo create(String target, @Nullable Object implSpecificConfig) {
7072
return new AutoValue_Bootstrapper_ServerInfo(target, implSpecificConfig,
71-
false, false, false);
73+
false, false, false, false);
7274
}
7375

7476
@VisibleForTesting
7577
public static ServerInfo create(
76-
String target, Object implSpecificConfig, boolean ignoreResourceDeletion,
77-
boolean isTrustedXdsServer, boolean resourceTimerIsTransientError) {
78+
String target, Object implSpecificConfig,
79+
boolean ignoreResourceDeletion, boolean isTrustedXdsServer,
80+
boolean resourceTimerIsTransientError, boolean failOnDataErrors) {
7881
return new AutoValue_Bootstrapper_ServerInfo(target, implSpecificConfig,
79-
ignoreResourceDeletion, isTrustedXdsServer, resourceTimerIsTransientError);
82+
ignoreResourceDeletion, isTrustedXdsServer,
83+
resourceTimerIsTransientError, failOnDataErrors);
8084
}
8185
}
8286

xds/src/main/java/io/grpc/xds/client/BootstrapperImpl.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public abstract class BootstrapperImpl extends Bootstrapper {
5858
private static final String SERVER_FEATURE_TRUSTED_XDS_SERVER = "trusted_xds_server";
5959
private static final String
6060
SERVER_FEATURE_RESOURCE_TIMER_IS_TRANSIENT_ERROR = "resource_timer_is_transient_error";
61+
private static final String SERVER_FEATURE_FAIL_ON_DATA_ERRORS = "fail_on_data_errors";
6162

6263
@VisibleForTesting
6364
static boolean enableXdsFallback = GrpcUtil.getFlag(GRPC_EXPERIMENTAL_XDS_FALLBACK, true);
@@ -257,6 +258,7 @@ private List<ServerInfo> parseServerInfos(List<?> rawServerConfigs, XdsLogger lo
257258

258259
boolean resourceTimerIsTransientError = false;
259260
boolean ignoreResourceDeletion = false;
261+
boolean failOnDataErrors = false;
260262
// "For forward compatibility reasons, the client will ignore any entry in the list that it
261263
// does not understand, regardless of type."
262264
List<?> serverFeatures = JsonUtil.getList(serverConfig, "server_features");
@@ -267,12 +269,14 @@ private List<ServerInfo> parseServerInfos(List<?> rawServerConfigs, XdsLogger lo
267269
}
268270
resourceTimerIsTransientError = xdsDataErrorHandlingEnabled
269271
&& serverFeatures.contains(SERVER_FEATURE_RESOURCE_TIMER_IS_TRANSIENT_ERROR);
272+
failOnDataErrors = xdsDataErrorHandlingEnabled
273+
&& serverFeatures.contains(SERVER_FEATURE_FAIL_ON_DATA_ERRORS);
270274
}
271275
servers.add(
272276
ServerInfo.create(serverUri, implSpecificConfig, ignoreResourceDeletion,
273277
serverFeatures != null
274278
&& serverFeatures.contains(SERVER_FEATURE_TRUSTED_XDS_SERVER),
275-
resourceTimerIsTransientError));
279+
resourceTimerIsTransientError, failOnDataErrors));
276280
}
277281
return servers.build();
278282
}

xds/src/main/java/io/grpc/xds/client/XdsClientImpl.java

Lines changed: 46 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -603,16 +603,20 @@ private <T extends ResourceUpdate> void handleResourceUpdate(
603603
}
604604

605605
if (invalidResources.contains(resourceName)) {
606-
// The resource update is invalid. Capture the error without notifying the watchers.
606+
// The resource update is invalid (NACK). Handle as a data error.
607607
subscriber.onRejected(args.versionInfo, updateTime, errorDetail);
608-
}
609-
610-
if (invalidResources.contains(resourceName)) {
611-
// The resource is missing. Reuse the cached resource if possible.
612-
if (subscriber.data == null) {
613-
// No cached data. Notify the watchers of an invalid update.
614-
subscriber.onError(Status.UNAVAILABLE.withDescription(errorDetail), processingTracker);
608+
609+
// Handle data errors (NACKs) based on fail_on_data_errors server feature.
610+
// When xdsDataErrorHandlingEnabled is true and fail_on_data_errors is present,
611+
// delete cached data so onError will call onResourceChanged instead of onAmbientError.
612+
// When xdsDataErrorHandlingEnabled is false, use old behavior (always keep cached data).
613+
if (BootstrapperImpl.xdsDataErrorHandlingEnabled && subscriber.data != null
614+
&& args.serverInfo.failOnDataErrors()) {
615+
subscriber.data = null;
615616
}
617+
// Call onError, which will decide whether to call onResourceChanged or onAmbientError
618+
// based on whether data exists after the above deletion.
619+
subscriber.onError(Status.UNAVAILABLE.withDescription(errorDetail), processingTracker);
616620
continue;
617621
}
618622

@@ -866,20 +870,42 @@ void onAbsent(@Nullable ProcessingTracker processingTracker, ServerInfo serverIn
866870
return;
867871
}
868872

869-
// Ignore deletion of State of the World resources when this feature is on,
870-
// and the resource is reusable.
873+
// Handle data errors (resource deletions) based on fail_on_data_errors server feature.
874+
// When xdsDataErrorHandlingEnabled is true and fail_on_data_errors is not present,
875+
// we treat deletions as ambient errors and keep using the cached resource.
876+
// When fail_on_data_errors is present, we delete the cached resource and fail.
877+
// When xdsDataErrorHandlingEnabled is false, use the old behavior (ignore_resource_deletion).
871878
boolean ignoreResourceDeletionEnabled = serverInfo.ignoreResourceDeletion();
872-
if (ignoreResourceDeletionEnabled && type.isFullStateOfTheWorld() && data != null) {
873-
if (!resourceDeletionIgnored) {
874-
logger.log(XdsLogLevel.FORCE_WARNING,
875-
"xds server {0}: ignoring deletion for resource type {1} name {2}}",
876-
serverInfo.target(), type, resource);
877-
resourceDeletionIgnored = true;
879+
boolean failOnDataErrors = serverInfo.failOnDataErrors();
880+
boolean xdsDataErrorHandlingEnabled = BootstrapperImpl.xdsDataErrorHandlingEnabled;
881+
882+
if (type.isFullStateOfTheWorld() && data != null) {
883+
// New behavior (per gRFC A88): Default is to treat deletions as ambient errors
884+
if (xdsDataErrorHandlingEnabled && !failOnDataErrors) {
885+
if (!resourceDeletionIgnored) {
886+
logger.log(XdsLogLevel.FORCE_WARNING,
887+
"xds server {0}: ignoring deletion for resource type {1} name {2}}",
888+
serverInfo.target(), type, resource);
889+
resourceDeletionIgnored = true;
890+
}
891+
Status deletionStatus = Status.NOT_FOUND.withDescription(
892+
"Resource " + resource + " deleted from server");
893+
onAmbientError(deletionStatus, processingTracker);
894+
return;
895+
}
896+
// Old behavior: Use ignore_resource_deletion server feature
897+
if (!xdsDataErrorHandlingEnabled && ignoreResourceDeletionEnabled) {
898+
if (!resourceDeletionIgnored) {
899+
logger.log(XdsLogLevel.FORCE_WARNING,
900+
"xds server {0}: ignoring deletion for resource type {1} name {2}}",
901+
serverInfo.target(), type, resource);
902+
resourceDeletionIgnored = true;
903+
}
904+
Status deletionStatus = Status.NOT_FOUND.withDescription(
905+
"Resource " + resource + " deleted from server");
906+
onAmbientError(deletionStatus, processingTracker);
907+
return;
878908
}
879-
Status deletionStatus = Status.NOT_FOUND.withDescription(
880-
"Resource " + resource + " deleted from server");
881-
onAmbientError(deletionStatus, processingTracker);
882-
return;
883909
}
884910

885911
logger.log(XdsLogLevel.INFO, "Conclude {0} resource {1} not exist", type, resource);

xds/src/test/java/io/grpc/xds/GrpcBootstrapperImplTest.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,52 @@ public void serverFeatures_ignoresUnknownValues() throws XdsInitializationExcept
723723
assertThat(serverInfo.isTrustedXdsServer()).isTrue();
724724
}
725725

726+
@Test
727+
public void serverFeature_failOnDataErrors() throws XdsInitializationException {
728+
BootstrapperImpl.xdsDataErrorHandlingEnabled = true;
729+
String rawData = "{\n"
730+
+ " \"xds_servers\": [\n"
731+
+ " {\n"
732+
+ " \"server_uri\": \"" + SERVER_URI + "\",\n"
733+
+ " \"channel_creds\": [\n"
734+
+ " {\"type\": \"insecure\"}\n"
735+
+ " ],\n"
736+
+ " \"server_features\": [\"fail_on_data_errors\"]\n"
737+
+ " }\n"
738+
+ " ]\n"
739+
+ "}";
740+
741+
bootstrapper.setFileReader(createFileReader(BOOTSTRAP_FILE_PATH, rawData));
742+
BootstrapInfo info = bootstrapper.bootstrap();
743+
ServerInfo serverInfo = Iterables.getOnlyElement(info.servers());
744+
assertThat(serverInfo.target()).isEqualTo(SERVER_URI);
745+
assertThat(serverInfo.implSpecificConfig()).isInstanceOf(InsecureChannelCredentials.class);
746+
assertThat(serverInfo.failOnDataErrors()).isTrue();
747+
BootstrapperImpl.xdsDataErrorHandlingEnabled = false;
748+
}
749+
750+
@Test
751+
public void serverFeature_failOnDataErrors_requiresEnvVar() throws XdsInitializationException {
752+
BootstrapperImpl.xdsDataErrorHandlingEnabled = false;
753+
String rawData = "{\n"
754+
+ " \"xds_servers\": [\n"
755+
+ " {\n"
756+
+ " \"server_uri\": \"" + SERVER_URI + "\",\n"
757+
+ " \"channel_creds\": [\n"
758+
+ " {\"type\": \"insecure\"}\n"
759+
+ " ],\n"
760+
+ " \"server_features\": [\"fail_on_data_errors\"]\n"
761+
+ " }\n"
762+
+ " ]\n"
763+
+ "}";
764+
765+
bootstrapper.setFileReader(createFileReader(BOOTSTRAP_FILE_PATH, rawData));
766+
BootstrapInfo info = bootstrapper.bootstrap();
767+
ServerInfo serverInfo = Iterables.getOnlyElement(info.servers());
768+
// Should be false when env var is not enabled
769+
assertThat(serverInfo.failOnDataErrors()).isFalse();
770+
}
771+
726772
@Test
727773
public void notFound() {
728774
bootstrapper.bootstrapPathFromEnvVar = null;

xds/src/test/java/io/grpc/xds/GrpcXdsClientImplDataTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3614,7 +3614,7 @@ private static Filter buildHttpConnectionManagerFilter(HttpFilter... httpFilters
36143614

36153615
private XdsResourceType.Args getXdsResourceTypeArgs(boolean isTrustedServer) {
36163616
return new XdsResourceType.Args(
3617-
ServerInfo.create("http://td", "", false, isTrustedServer, false), "1.0", null, null, null, null
3617+
ServerInfo.create("http://td", "", false, isTrustedServer, false, false), "1.0", null, null, null, null
36183618
);
36193619
}
36203620
}

0 commit comments

Comments
 (0)