[GCE] Don't delete VM entity on transient error

On transient error we want to keep checking the same hostname. Only
delete the entity if creation definitively failed.

Bug: 897355
Change-Id: Ied3873961146c669aac6d16aecb5129ae46f566b
Reviewed-on: https://chromium-review.googlesource.com/c/infra/luci/luci-go/+/1572451
Reviewed-by: Andrii Shyshkalov <tandrii@chromium.org>
Commit-Queue: smut <smut@google.com>
diff --git a/gce/appengine/backend/instances.go b/gce/appengine/backend/instances.go
index 00444dd..bb040e1 100644
--- a/gce/appengine/backend/instances.go
+++ b/gce/appengine/backend/instances.go
@@ -128,11 +128,14 @@
 			if gerr.Code == http.StatusConflict {
 				return conflictingInstance(c, vm)
 			}
+			logErrors(c, gerr)
 			metrics.UpdateFailures(c, 1, vm)
+			if gerr.Code == http.StatusTooManyRequests || gerr.Code >= 500 {
+				return errors.Reason("transiently failed to create instance").Err()
+			}
 			if err := deleteVM(c, task.Id, vm.Hostname); err != nil {
 				return errors.Annotate(err, "failed to create instance").Err()
 			}
-			logErrors(c, gerr)
 			return errors.Reason("failed to create instance").Err()
 		}
 		return errors.Annotate(err, "failed to create instance").Err()
diff --git a/gce/appengine/backend/instances_test.go b/gce/appengine/backend/instances_test.go
index 4ee2f79..d848ab9 100644
--- a/gce/appengine/backend/instances_test.go
+++ b/gce/appengine/backend/instances_test.go
@@ -103,22 +103,44 @@
 
 			Convey("error", func() {
 				Convey("http", func() {
-					rt.Handler = func(req interface{}) (int, interface{}) {
-						return http.StatusInternalServerError, nil
-					}
-					rt.Type = reflect.TypeOf(compute.Instance{})
-					datastore.Put(c, &model.VM{
-						ID:       "id",
-						Hostname: "name",
+					Convey("transient", func() {
+						rt.Handler = func(req interface{}) (int, interface{}) {
+							return http.StatusInternalServerError, nil
+						}
+						rt.Type = reflect.TypeOf(compute.Instance{})
+						datastore.Put(c, &model.VM{
+							ID:       "id",
+							Hostname: "name",
+						})
+						err := createInstance(c, &tasks.CreateInstance{
+							Id: "id",
+						})
+						So(err, ShouldErrLike, "transiently failed to create instance")
+						v := &model.VM{
+							ID: "id",
+						}
+						So(datastore.Get(c, v), ShouldBeNil)
+						So(v.Hostname, ShouldEqual, "name")
 					})
-					err := createInstance(c, &tasks.CreateInstance{
-						Id: "id",
+
+					Convey("permanent", func() {
+						rt.Handler = func(req interface{}) (int, interface{}) {
+							return http.StatusBadRequest, nil
+						}
+						rt.Type = reflect.TypeOf(compute.Instance{})
+						datastore.Put(c, &model.VM{
+							ID:       "id",
+							Hostname: "name",
+						})
+						err := createInstance(c, &tasks.CreateInstance{
+							Id: "id",
+						})
+						So(err, ShouldErrLike, "failed to create instance")
+						v := &model.VM{
+							ID: "id",
+						}
+						So(datastore.Get(c, v), ShouldEqual, datastore.ErrNoSuchEntity)
 					})
-					So(err, ShouldErrLike, "failed to create instance")
-					v := &model.VM{
-						ID: "id",
-					}
-					So(datastore.Get(c, v), ShouldEqual, datastore.ErrNoSuchEntity)
 				})
 
 				Convey("operation", func() {