Skip to content

Commit 0d58850

Browse files
authored
Merge pull request #2238 from mattcary/wait-err
Check for nil waitOp before examining error code
2 parents d9cbaf1 + 832d894 commit 0d58850

File tree

5 files changed

+19
-9
lines changed

5 files changed

+19
-9
lines changed

pkg/common/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ func isGoogleAPIError(err error) (codes.Code, error) {
363363
}
364364

365365
func loggedErrorForCode(msg string, code codes.Code, err error) error {
366-
klog.Errorf(msg+"%v", err.Error())
366+
klog.Errorf("%v: %s %v", code, msg, err.Error())
367367
return status.Errorf(code, msg+"%v", err.Error())
368368
}
369369

pkg/gce-cloud-provider/compute/gce-compute.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ func (cloud *CloudProvider) insertConstructedDisk(ctx context.Context, disk *com
671671
}
672672

673673
if filterErr := cloud.processDiskAlreadyExistErr(ctx, err, project, volKey, params, capacityRange, multiWriter, accessMode); filterErr != nil {
674-
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w", err))
674+
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w; filter error %v", err, filterErr))
675675
}
676676
return nil
677677
}
@@ -1064,7 +1064,7 @@ func (cloud *CloudProvider) waitForZonalOp(ctx context.Context, project, opName
10641064
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10651065
waitOp, err := cloud.service.ZoneOperations.Wait(project, zone, opName).Context(ctx).Do()
10661066
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1067-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1067+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
10681068
klog.Errorf("WaitForZonalOp(op: %s, zone: %#v, err: %v) failed to poll the operation", opName, zone, err)
10691069
return false, nil
10701070
}
@@ -1081,7 +1081,7 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
10811081
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10821082
waitOp, err := cloud.service.RegionOperations.Wait(project, region, opName).Context(ctx).Do()
10831083
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1084-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1084+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
10851085
klog.Errorf("WaitForRegionalOp(op: %s, region: %#v, err: %v) failed to poll the operation", opName, region, err)
10861086
return false, nil
10871087
}
@@ -1090,6 +1090,10 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
10901090
return false, err
10911091
}
10921092
done, err := opIsDone(waitOp)
1093+
if err != nil && status.Code(err) == codes.Unavailable {
1094+
klog.Errorf("retriable error in op: %+v", err)
1095+
return false, nil
1096+
}
10931097
return done, err
10941098
})
10951099
}
@@ -1098,7 +1102,7 @@ func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName
10981102
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10991103
waitOp, err := cloud.service.GlobalOperations.Wait(project, opName).Context(ctx).Do()
11001104
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1101-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1105+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
11021106
klog.Errorf("WaitForGlobalOp(op: %s, err: %v) failed to poll the operation", opName, err)
11031107
return false, nil
11041108
}
@@ -1198,6 +1202,7 @@ func codeForGCEOpError(err computev1.OperationErrorErrors) codes.Code {
11981202
"QUOTA_EXCEEDED": codes.ResourceExhausted,
11991203
"ZONE_RESOURCE_POOL_EXHAUSTED": codes.Unavailable,
12001204
"ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS": codes.Unavailable,
1205+
"INTERNAL_ERROR": codes.Unavailable,
12011206
"REGION_QUOTA_EXCEEDED": codes.ResourceExhausted,
12021207
"RATE_LIMIT_EXCEEDED": codes.ResourceExhausted,
12031208
"INVALID_USAGE": codes.InvalidArgument,

pkg/gce-pd-csi-driver/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ func (gceCS *GCEControllerServer) createSingleDeviceDisk(ctx context.Context, re
642642

643643
disk, err := gceCS.createSingleDisk(ctx, req, params, volKey, zones, accessMode)
644644
if err != nil {
645-
return nil, common.LoggedError("CreateVolume failed: %v", err)
645+
return nil, common.LoggedError("CreateVolume failed: ", err)
646646
}
647647

648648
return gceCS.generateCreateVolumeResponseWithVolumeId(disk, zones, params, dataCacheParams, enableDataCache, volumeID), nil

test/e2e/tests/multi_zone_e2e_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func checkSkipMultiZoneTests() {
6464
// TODO: Remove this once hyperdisk-ml SKU is supported
6565
// If you want to run these tests, set the env variable: RUN_MULTI_ZONE_TESTS=true
6666
if !runMultiZoneTests() {
67-
Skip("Not running multi-zone tests, as RUN_MULTI_ZONE_TESTS is falsy")
67+
Skip("Not running multi-zone tests without RUN_MULTI_ZONE_TESTS=true")
6868
}
6969
}
7070

@@ -1235,8 +1235,9 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
12351235
})
12361236

12371237
It("Should successfully run through entire lifecycle of a HdHA volume on instances in 2 zones", func() {
1238-
// Create new driver and client
1238+
Skip("Flaking on GCP errors. Google internal bug: 463743704")
12391239

1240+
// Create new driver and client
12401241
Expect(hyperdiskTestContexts).NotTo(BeEmpty())
12411242

12421243
zoneToContext := map[string]*remote.TestContext{}
@@ -1255,10 +1256,12 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
12551256
}
12561257

12571258
Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones")
1259+
klog.Infof("Using zones %s and %s", zones[0], zones[1])
12581260

12591261
controllerContext := zoneToContext[zones[0]]
12601262
controllerClient := controllerContext.Client
12611263
controllerInstance := controllerContext.Instance
1264+
klog.Infof("Using controller instance %v", controllerInstance.GetName())
12621265

12631266
p, _, _ := controllerInstance.GetIdentity()
12641267

@@ -1321,6 +1324,8 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
13211324
})
13221325

13231326
It("Should create a HdHA instance, write to it, force-attach it to another instance, and read the same data", func() {
1327+
Skip("Flaking on GCP errors. Google internal bug: 463743704")
1328+
13241329
Expect(hyperdiskTestContexts).NotTo(BeEmpty())
13251330

13261331
zoneToContext := map[string]*remote.TestContext{}

test/e2e/tests/setup_e2e_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ var (
6060
// Multi-writer is only supported on M3, C3, and N4
6161
// https://cloud.google.com/compute/docs/disks/sharing-disks-between-vms#hd-multi-writer
6262
hdMachineType = flag.String("hyperdisk-machine-type", "c3-standard-4", "Type of machine to provision instance on, or `none' to skip")
63-
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "sapphirerapids", "Minimum CPU architecture")
63+
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "Intel Sapphire Rapids", "Minimum CPU architecture")
6464

6565
// Some architectures don't have local ssd. Give way to opt out of tests like datacache.
6666
skipLocalSsdTests = flag.Bool("skip-local-ssd-tests", false, "Skip local ssd tests like datacache")

0 commit comments

Comments
 (0)