Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/app/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ func (hc *testHostClient) GetReverseProxy() *httputil.ReverseProxy {
return httputil.NewSingleHostReverseProxy(hc.url)
}

func (hc *testHostClient) WaitForHostReady() error {
return nil
}

func TestListZonesSucceeds(t *testing.T) {
controller := NewApp(&testInstanceManager{}, &testAccountManager{}, nil, nil, nil, "", nil, apiv1.InfraConfig{}, &config.Config{})
ts := httptest.NewServer(controller.Handler())
Expand Down
7 changes: 7 additions & 0 deletions pkg/app/instances/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ func (m *DockerInstanceManager) waitCreateHostOperation(host string) (*apiv1.Hos
return nil, fmt.Errorf("failed to inspect docker container: %w", err)
}
if res.State.Running {
client, err := m.GetHostClient("local", host)
if err != nil {
return nil, err
}
if err := client.WaitForHostReady(); err != nil {
return nil, err
}
return &apiv1.HostInstance{
Name: host,
}, nil
Expand Down
119 changes: 103 additions & 16 deletions pkg/app/instances/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"context"
"fmt"
"log"
"net/http"
"net/url"
"path"
"regexp"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
"github.com/google/cloud-android-orchestration/pkg/app/accounts"
Expand All @@ -41,6 +43,7 @@ type GCPIMConfig struct {
UseExternalIP bool
// If true, instances created should be compatible with `acloud CLI`.
AcloudCompatible bool
HostReadyTimeout time.Duration
}

const (
Expand Down Expand Up @@ -78,28 +81,40 @@ func (m *GCEInstanceManager) ListZones() (*apiv1.ListZonesResponse, error) {
}, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
instance, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
ilen := len(instance.NetworkInterfaces)
func getHostAddrWithIns(ins *compute.Instance) (string, error) {
ilen := len(ins.NetworkInterfaces)
if ilen == 0 {
log.Printf("host instance %s in zone %s is missing a network interface", host, zone)
log.Printf("host instance %s in zone %s is missing a network interface", ins.Name, ins.Zone)
return "", errors.NewInternalError("host instance missing a network interface", nil)
}
if ilen > 1 {
log.Printf("host instance %s in zone %s has %d network interfaces", host, zone, ilen)
log.Printf("host instance %s in zone %s has %d network interfaces", ins.Name, ins.Zone, ilen)
}
return ins.NetworkInterfaces[0].NetworkIP, nil
}

func (m *GCEInstanceManager) GetHostAddr(zone string, host string) (string, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return "", err
}
return instance.NetworkInterfaces[0].NetworkIP, nil
return getHostAddrWithIns(ins)
}

func getHostURLWithIns(ins *compute.Instance, config *Config) (*url.URL, error) {
addr, err := getHostAddrWithIns(ins)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", config.HostOrchestratorProtocol, addr, config.GCP.HostOrchestratorPort))
}

func (m *GCEInstanceManager) GetHostURL(zone string, host string) (*url.URL, error) {
addr, err := m.GetHostAddr(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return url.Parse(fmt.Sprintf("%s://%s:%d", m.Config.HostOrchestratorProtocol, addr, m.Config.GCP.HostOrchestratorPort))
return getHostURLWithIns(ins, &m.Config)
}

const operationStatusDone = "DONE"
Expand Down Expand Up @@ -253,16 +268,81 @@ func (m *GCEInstanceManager) WaitOperation(zone string, user accounts.User, name
if op.Status != operationStatusDone {
return nil, errors.NewServiceUnavailableError("Wait for operation timed out", nil)
}
getter := opResultGetter{Service: m.Service, Op: op}
return getter.Get()
getter := opResultGetter{
Service: m.Service,
Op: op,
Config: &m.Config,
}
res, err := getter.Get()
if err != nil {
return nil, err
}
if hostInst, ok := res.(*apiv1.HostInstance); ok && op.OperationType == "insert" {
return m.WaitHostAvailability(zone, user, hostInst.Name)
}
return res, nil
}

func (m *GCEInstanceManager) WaitHostAvailability(zone string, user accounts.User, host string) (*apiv1.HostInstance, error) {
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
hostInstance, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}
client, err := getHostClientWithIns(ins, &m.Config)
if err != nil {
return nil, err
}
if err := m.waitForOrchestrator(zone, hostInstance, client); err != nil {
return nil, err
}
return hostInstance, nil
}

func (m *GCEInstanceManager) waitForOrchestrator(zone string, host *apiv1.HostInstance, client HostClient) error {
timeout := m.Config.GCP.HostReadyTimeout
if timeout == 0 {
timeout = 5 * time.Minute
}
retryDelay := 5 * time.Second
deadline := time.Now().Add(timeout)

for time.Now().Before(deadline) {
_, err := m.Service.Instances.Get(m.Config.GCP.ProjectID, zone, host.Name).Context(context.TODO()).Do()
if err != nil {
if apiErr, ok := err.(*googleapi.Error); ok && apiErr.Code == http.StatusNotFound {
return errors.NewNotFoundError("Host was deleted concurrently", err)
}
return fmt.Errorf("failed to check host existence: %w", err)
}

status, err := client.Get("/", "", nil)
if err == nil && status != http.StatusBadGateway {
return nil
}

time.Sleep(retryDelay)
}
return errors.NewServiceUnavailableError("Wait for host orchestrator timed out", nil)
}

func getHostClientWithIns(ins *compute.Instance, config *Config) (HostClient, error) {
url, err := getHostURLWithIns(ins, config)
if err != nil {
return nil, err
}
return NewNetHostClient(url, config.AllowSelfSignedHostSSLCertificate), nil
}

func (m *GCEInstanceManager) GetHostClient(zone string, host string) (HostClient, error) {
url, err := m.GetHostURL(zone, host)
ins, err := m.getHostInstance(zone, host)
if err != nil {
return nil, err
}
return NewNetHostClient(url, m.Config.AllowSelfSignedHostSSLCertificate), nil
return getHostClientWithIns(ins, &m.Config)
}

func (m *GCEInstanceManager) getHostInstance(zone string, host string) (*compute.Instance, error) {
Expand Down Expand Up @@ -326,6 +406,7 @@ var (
type opResultGetter struct {
Service *compute.Service
Op *compute.Operation
Config *Config
}

func (g *opResultGetter) Get() (any, error) {
Expand Down Expand Up @@ -362,7 +443,13 @@ func (g *opResultGetter) buildCreateInstanceResult() (*apiv1.HostInstance, error
if err != nil {
return nil, toAppError(err)
}
return BuildHostInstance(ins)

host, err := BuildHostInstance(ins)
if err != nil {
return nil, err
}

return host, nil
}

// Converts compute API errors to AppError if relevant, return the same error otherwise
Expand Down
118 changes: 117 additions & 1 deletion pkg/app/instances/gce_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ import (
"net/http/httptest"
"net/url"
"reflect"
"strconv"
"strings"
"testing"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
apperr "github.com/google/cloud-android-orchestration/pkg/app/errors"
Expand Down Expand Up @@ -572,9 +574,14 @@ func TestWaitCreateInstanceOperationSucceeds(t *testing.T) {
Name: "foo",
MachineType: "mt",
MinCpuPlatform: "mcp",
NetworkInterfaces: []*compute.NetworkInterface{
{NetworkIP: "127.0.0.1"},
},
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch path := r.URL.Path; path {
case "/":
replyJSON(w, map[string]string{})
case "/projects/google.com:test-project/zones/us-central1-a/operations/operation-1/wait":
replyJSON(w, operation)
case "/projects/google.com:test-project/zones/us-central1-a/instances/foo":
Expand All @@ -584,7 +591,18 @@ func TestWaitCreateInstanceOperationSucceeds(t *testing.T) {
}
}))
defer ts.Close()
im := NewGCEInstanceManager(testConfig, buildTestService(t, ts), testNameGenerator)

tsURL, _ := url.Parse(ts.URL)
port, _ := strconv.Atoi(tsURL.Port())

cfg := testConfig
cfg.HostOrchestratorProtocol = "http"

gcpCfg := *cfg.GCP
gcpCfg.HostOrchestratorPort = port
cfg.GCP = &gcpCfg

im := NewGCEInstanceManager(cfg, buildTestService(t, ts), testNameGenerator)

res, _ := im.WaitOperation(zone, &TestUser{}, opName)

Expand All @@ -594,6 +612,104 @@ func TestWaitCreateInstanceOperationSucceeds(t *testing.T) {
}
}

func TestWaitCreateInstanceOperationHOTimeout(t *testing.T) {
zone := "us-central1-a"
opName := "operation-1"
operation := &compute.Operation{
Name: opName,
OperationType: "insert",
TargetLink: "https://xyzzy.com/compute/v1/projects/google.com:test-project/zones/us-central1-a/instances/foo",
Status: "DONE",
}
instance := &compute.Instance{
Disks: []*compute.AttachedDisk{{DiskSizeGb: 10}},
Name: "foo",
MachineType: "mt",
MinCpuPlatform: "mcp",
NetworkInterfaces: []*compute.NetworkInterface{
{NetworkIP: "127.0.0.1"},
},
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch path := r.URL.Path; path {
case "/":
w.WriteHeader(http.StatusBadGateway)
case "/projects/google.com:test-project/zones/us-central1-a/operations/operation-1/wait":
replyJSON(w, operation)
case "/projects/google.com:test-project/zones/us-central1-a/instances/foo":
replyJSON(w, instance)
default:
t.Fatalf("unexpected path: %q", path)
}
}))
defer ts.Close()

tsURL, _ := url.Parse(ts.URL)
port, _ := strconv.Atoi(tsURL.Port())

cfg := testConfig
cfg.HostOrchestratorProtocol = "http"

gcpCfg := *cfg.GCP
gcpCfg.HostOrchestratorPort = port
gcpCfg.HostReadyTimeout = 100 * time.Millisecond
cfg.GCP = &gcpCfg

im := NewGCEInstanceManager(cfg, buildTestService(t, ts), testNameGenerator)

_, err := im.WaitHostAvailability(zone, &TestUser{}, "foo")

if err == nil {
t.Error("expected error")
}
var appErr *apperr.AppError
if errors.As(err, &appErr) {
if appErr.StatusCode != http.StatusServiceUnavailable {
t.Errorf("expected status 503, got %d", appErr.StatusCode)
}
} else {
t.Errorf("expected AppError, got %T", err)
}
}

func TestWaitCreateInstanceOperationConcurrentDeletion(t *testing.T) {
zone := "us-central1-a"
opName := "operation-1"
operation := &compute.Operation{
Name: opName,
OperationType: "insert",
TargetLink: "https://xyzzy.com/compute/v1/projects/google.com:test-project/zones/us-central1-a/instances/foo",
Status: "DONE",
}
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch path := r.URL.Path; path {
case "/projects/google.com:test-project/zones/us-central1-a/operations/operation-1/wait":
replyJSON(w, operation)
case "/projects/google.com:test-project/zones/us-central1-a/instances/foo":
w.WriteHeader(http.StatusNotFound)
default:
t.Fatalf("unexpected path: %q", path)
}
}))
defer ts.Close()

im := NewGCEInstanceManager(testConfig, buildTestService(t, ts), testNameGenerator)

_, err := im.WaitHostAvailability(zone, &TestUser{}, "foo")

if err == nil {
t.Error("expected error")
}
var appErr *apperr.AppError
if errors.As(err, &appErr) {
if appErr.StatusCode != http.StatusNotFound {
t.Errorf("expected status 404, got %d", appErr.StatusCode)
}
} else {
t.Errorf("expected AppError, got %T", err)
}
}

func TestWaitDeleteInstanceOperationSucceeds(t *testing.T) {
zone := "us-central1-a"
opName := "operation-1"
Expand Down
16 changes: 16 additions & 0 deletions pkg/app/instances/hostclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/http"
"net/http/httputil"
"net/url"
"time"

apiv1 "github.com/google/cloud-android-orchestration/api/v1"
)
Expand Down Expand Up @@ -94,6 +95,21 @@ func (c *NetHostClient) Post(path, query string, bodyJSON any, out *HostResponse
return res.StatusCode, err
}

func (c *NetHostClient) WaitForHostReady() error {
maxWait := 2 * time.Minute
retryDelay := 5 * time.Second
deadline := time.Now().Add(maxWait)

for time.Now().Before(deadline) {
status, err := c.Get("/", "", nil)
if err == nil && status != http.StatusBadGateway {
return nil
}
time.Sleep(retryDelay)
}
return fmt.Errorf("wait for host orchestrator timed out")
}

func (c *NetHostClient) GetReverseProxy() *httputil.ReverseProxy {
devProxy := httputil.NewSingleHostReverseProxy(c.url)
if c.client != http.DefaultClient {
Expand Down
1 change: 1 addition & 0 deletions pkg/app/instances/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type HostClient interface {
Get(URLPath, URLQuery string, res *HostResponse) (int, error)
Post(URLPath, URLQuery string, bodyJSON any, res *HostResponse) (int, error)
GetReverseProxy() *httputil.ReverseProxy
WaitForHostReady() error
}

type HostResponse struct {
Expand Down
Loading