Skip to content

Commit 832d894

Browse files
committed
Check for nil waitOp before examining error code
Change-Id: Iab5a2c41528e49c5d8e2e092a22a89c2f8edd232
1 parent 574f433 commit 832d894

File tree

5 files changed

+19
-9
lines changed

5 files changed

+19
-9
lines changed

pkg/common/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ func isGoogleAPIError(err error) (codes.Code, error) {
363363
}
364364

365365
func loggedErrorForCode(msg string, code codes.Code, err error) error {
366-
klog.Errorf(msg+"%v", err.Error())
366+
klog.Errorf("%v: %s %v", code, msg, err.Error())
367367
return status.Errorf(code, msg+"%v", err.Error())
368368
}
369369

pkg/gce-cloud-provider/compute/gce-compute.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ func (cloud *CloudProvider) insertConstructedDisk(ctx context.Context, disk *com
670670
}
671671

672672
if filterErr := cloud.processDiskAlreadyExistErr(ctx, err, project, volKey, params, capacityRange, multiWriter, accessMode); filterErr != nil {
673-
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w", err))
673+
return common.NewTemporaryError(codes.Unavailable, fmt.Errorf("unknown error when polling the operation: %w; filter error %v", err, filterErr))
674674
}
675675
return nil
676676
}
@@ -1063,7 +1063,7 @@ func (cloud *CloudProvider) waitForZonalOp(ctx context.Context, project, opName
10631063
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10641064
waitOp, err := cloud.service.ZoneOperations.Wait(project, zone, opName).Context(ctx).Do()
10651065
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1066-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1066+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
10671067
klog.Errorf("WaitForZonalOp(op: %s, zone: %#v, err: %v) failed to poll the operation", opName, zone, err)
10681068
return false, nil
10691069
}
@@ -1080,7 +1080,7 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
10801080
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10811081
waitOp, err := cloud.service.RegionOperations.Wait(project, region, opName).Context(ctx).Do()
10821082
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1083-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1083+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
10841084
klog.Errorf("WaitForRegionalOp(op: %s, region: %#v, err: %v) failed to poll the operation", opName, region, err)
10851085
return false, nil
10861086
}
@@ -1089,6 +1089,10 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
10891089
return false, err
10901090
}
10911091
done, err := opIsDone(waitOp)
1092+
if err != nil && status.Code(err) == codes.Unavailable {
1093+
klog.Errorf("retriable error in op: %+v", err)
1094+
return false, nil
1095+
}
10921096
return done, err
10931097
})
10941098
}
@@ -1097,7 +1101,7 @@ func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName
10971101
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
10981102
waitOp, err := cloud.service.GlobalOperations.Wait(project, opName).Context(ctx).Do()
10991103
// In case of service unavailable do not propogate the error so ExponentialBackoff will retry
1100-
if err != nil && waitOp.HttpErrorStatusCode == 503 {
1104+
if err != nil && waitOp != nil && waitOp.HttpErrorStatusCode == 503 {
11011105
klog.Errorf("WaitForGlobalOp(op: %s, err: %v) failed to poll the operation", opName, err)
11021106
return false, nil
11031107
}
@@ -1197,6 +1201,7 @@ func codeForGCEOpError(err computev1.OperationErrorErrors) codes.Code {
11971201
"QUOTA_EXCEEDED": codes.ResourceExhausted,
11981202
"ZONE_RESOURCE_POOL_EXHAUSTED": codes.Unavailable,
11991203
"ZONE_RESOURCE_POOL_EXHAUSTED_WITH_DETAILS": codes.Unavailable,
1204+
"INTERNAL_ERROR": codes.Unavailable,
12001205
"REGION_QUOTA_EXCEEDED": codes.ResourceExhausted,
12011206
"RATE_LIMIT_EXCEEDED": codes.ResourceExhausted,
12021207
"INVALID_USAGE": codes.InvalidArgument,

pkg/gce-pd-csi-driver/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ func (gceCS *GCEControllerServer) createSingleDeviceDisk(ctx context.Context, re
632632

633633
disk, err := gceCS.createSingleDisk(ctx, req, params, volKey, zones, accessMode)
634634
if err != nil {
635-
return nil, common.LoggedError("CreateVolume failed: %v", err)
635+
return nil, common.LoggedError("CreateVolume failed: ", err)
636636
}
637637

638638
return gceCS.generateCreateVolumeResponseWithVolumeId(disk, zones, params, dataCacheParams, enableDataCache, volumeID), nil

test/e2e/tests/multi_zone_e2e_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func checkSkipMultiZoneTests() {
6464
// TODO: Remove this once hyperdisk-ml SKU is supported
6565
// If you want to run these tests, set the env variable: RUN_MULTI_ZONE_TESTS=true
6666
if !runMultiZoneTests() {
67-
Skip("Not running multi-zone tests, as RUN_MULTI_ZONE_TESTS is falsy")
67+
Skip("Not running multi-zone tests without RUN_MULTI_ZONE_TESTS=true")
6868
}
6969
}
7070

@@ -1235,8 +1235,9 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
12351235
})
12361236

12371237
It("Should successfully run through entire lifecycle of a HdHA volume on instances in 2 zones", func() {
1238-
// Create new driver and client
1238+
Skip("Flaking on GCP errors. Google internal bug: 463743704")
12391239

1240+
// Create new driver and client
12401241
Expect(hyperdiskTestContexts).NotTo(BeEmpty())
12411242

12421243
zoneToContext := map[string]*remote.TestContext{}
@@ -1255,10 +1256,12 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
12551256
}
12561257

12571258
Expect(len(zoneToContext)).To(Equal(2), "Must have instances in 2 zones")
1259+
klog.Infof("Using zones %s and %s", zones[0], zones[1])
12581260

12591261
controllerContext := zoneToContext[zones[0]]
12601262
controllerClient := controllerContext.Client
12611263
controllerInstance := controllerContext.Instance
1264+
klog.Infof("Using controller instance %v", controllerInstance.GetName())
12621265

12631266
p, _, _ := controllerInstance.GetIdentity()
12641267

@@ -1321,6 +1324,8 @@ var _ = Describe("GCE PD CSI Driver Multi-Zone", func() {
13211324
})
13221325

13231326
It("Should create a HdHA instance, write to it, force-attach it to another instance, and read the same data", func() {
1327+
Skip("Flaking on GCP errors. Google internal bug: 463743704")
1328+
13241329
Expect(hyperdiskTestContexts).NotTo(BeEmpty())
13251330

13261331
zoneToContext := map[string]*remote.TestContext{}

test/e2e/tests/setup_e2e_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ var (
6060
// Multi-writer is only supported on M3, C3, and N4
6161
// https://cloud.google.com/compute/docs/disks/sharing-disks-between-vms#hd-multi-writer
6262
hdMachineType = flag.String("hyperdisk-machine-type", "c3-standard-4", "Type of machine to provision instance on, or `none' to skip")
63-
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "sapphirerapids", "Minimum CPU architecture")
63+
hdMinCpuPlatform = flag.String("hyperdisk-min-cpu-platform", "Intel Sapphire Rapids", "Minimum CPU architecture")
6464

6565
// Some architectures don't have local ssd. Give way to opt out of tests like datacache.
6666
skipLocalSsdTests = flag.Bool("skip-local-ssd-tests", false, "Skip local ssd tests like datacache")

0 commit comments

Comments
 (0)