package statuspage import ( "context" "fmt" "time" "github.com/jackc/pgx/v5/pgxpool" ) type Service struct { db *pgxpool.Pool } func NewService(db *pgxpool.Pool) *Service { return &Service{db: db} } func (s *Service) Snapshot(ctx context.Context) PublicStatusResponse { now := time.Now().UTC() resp := PublicStatusResponse{ Status: "operational", GeneratedAt: now.Format(time.RFC3339), Components: map[string]ComponentStatus{ "api": { Status: "operational", Message: "Public API is responding.", }, }, } if err := s.db.Ping(ctx); err != nil { resp.Status = "degraded" resp.Components["database"] = ComponentStatus{ Status: "degraded", Message: "Database ping failed.", } return resp } resp.Components["database"] = ComponentStatus{ Status: "operational", Message: "Database connectivity is healthy.", } if err := s.loadSummary(ctx, &resp.Summary); err != nil { resp.Status = "degraded" resp.Components["database"] = ComponentStatus{ Status: "degraded", Message: "Database query failed.", } return resp } lastGatewayRuntime, err := s.latestGatewayRuntime(ctx) if err != nil { resp.Status = "degraded" resp.Components["gateway"] = ComponentStatus{ Status: "degraded", Message: "Gateway runtime state could not be read.", } return resp } resp.Components["gateway"] = componentFromGatewayRuntime(now, lastGatewayRuntime, resp.Summary.ActiveGateways) if resp.Components["gateway"].Status != "operational" { resp.Status = "degraded" } return resp } func (s *Service) loadSummary(ctx context.Context, summary *Summary) error { return s.db.QueryRow(ctx, ` select (select count(*) from users where deleted_at is null), (select count(*) from devices where deleted_at is null), ( select count(*) from devices d join wireguard_peers wp on wp.device_id = d.id and wp.deleted_at is null where d.deleted_at is null and d.status = 'active' and wp.latest_handshake_at is not null and to_timestamp(wp.latest_handshake_at) >= now() - interval '3 minutes' ), (select count(*) from gateways where deleted_at is null), (select count(*) from gateways where deleted_at is null and is_active = true), (select count(*) from services where deleted_at is null and is_active = true), (select count(*) from policies where deleted_at is null and is_active = true) `).Scan( &summary.Users, &summary.Devices, &summary.ConnectedDevices, &summary.Gateways, &summary.ActiveGateways, &summary.Services, &summary.Policies, ) } func (s *Service) latestGatewayRuntime(ctx context.Context) (*time.Time, error) { var updatedAt *time.Time err := s.db.QueryRow(ctx, ` select max(updated_at) from settings where category = 'gateway_runtime' `).Scan(&updatedAt) if err != nil { return nil, err } return updatedAt, nil } func componentFromGatewayRuntime(now time.Time, lastRuntime *time.Time, activeGateways int) ComponentStatus { if activeGateways == 0 { return ComponentStatus{ Status: "degraded", Message: "No active gateway is configured.", } } if lastRuntime == nil { return ComponentStatus{ Status: "degraded", Message: "No gateway telemetry has been received yet.", } } age := now.Sub(lastRuntime.UTC()) if age <= 90*time.Second { return ComponentStatus{ Status: "operational", Message: fmt.Sprintf("Last gateway telemetry %s ago.", humanizeAge(age)), } } return ComponentStatus{ Status: "degraded", Message: fmt.Sprintf("Gateway telemetry is stale (%s ago).", humanizeAge(age)), } } func humanizeAge(age time.Duration) string { if age < time.Minute { seconds := int(age.Seconds()) if seconds < 1 { seconds = 1 } return fmt.Sprintf("%ds", seconds) } if age < time.Hour { return fmt.Sprintf("%dm", int(age.Minutes())) } return fmt.Sprintf("%dh", int(age.Hours())) }