From c7df53708c111aee392dfdb879dd09df7513a186 Mon Sep 17 00:00:00 2001 From: erangel1 Date: Tue, 12 May 2026 19:51:59 +0200 Subject: [PATCH] implemented gitops controller + drift detection --- .env.example | 4 + AGENTS.md | 142 ++++----- CHANGELOG.md | 350 ++++++++++++++--------- README.md | 88 +++--- cmd/forgebucket/main.go | 4 + internal/api/handlers/environment.go | 15 +- internal/api/handlers/gitops.go | 252 ++++++++++++++++ internal/api/router.go | 10 + internal/config/config.go | 18 +- internal/domain/git/binary.go | 9 + internal/domain/gitops/controller.go | 95 ++++++ internal/domain/gitops/drift.go | 168 +++++++++++ internal/domain/gitops/reconciler.go | 97 +++++++ internal/events/types.go | 23 ++ internal/models/gitops.go | 32 +++ internal/models/migrations/001_init.go | 5 +- internal/models/migrations/013_gitops.go | 13 + 17 files changed, 1064 insertions(+), 261 deletions(-) create mode 100644 internal/api/handlers/gitops.go create mode 100644 internal/domain/gitops/controller.go create mode 100644 internal/domain/gitops/drift.go create mode 100644 internal/domain/gitops/reconciler.go create mode 100644 internal/models/gitops.go create mode 100644 internal/models/migrations/013_gitops.go diff --git a/.env.example b/.env.example index 64150af..1865402 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,10 @@ INSTANCE_NAME=ForgeBucket # OIDC_CLIENT_ID= # OIDC_CLIENT_SECRET= +# ─── GitOps ────────────────────────────────────────────────────────────────── +# Seconds between periodic drift checks (0 disables the ticker; push-triggered checks always run). +GITOPS_RECONCILE_INTERVAL=300 + # ─── Event Bus (NATS) ──────────────────────────────────────────────────────── # Leave empty to disable event publishing (no-op mode). # Start NATS with: make docker-up diff --git a/AGENTS.md b/AGENTS.md index 1b87f3f..8d87aae 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,73 +19,66 @@ The full product vision lives in [`ai_agent_master_prompt_for_building_modern_gi ## Architecture Map ``` -cmd/forgebucket/ — binary entry point (main.go) +cmd/forgebucket/ — binary entry point (main.go) internal/ api/ - router.go — Chi router, all route definitions (~26 routes) - middleware/ — auth, CSRF, RBAC, logging - handlers/ — one file per domain (repo, pr, issue, auth, user, ssh...) + router.go — Chi router, all route definitions (60+ routes) + middleware/ — auth.go, csrf.go, rbac.go, audit.go + handlers/ — one file per domain area (see Key Files below) domain/ - git/ — sanitized git binary wrapper (exec.Command only, no shell) - federation/ — ActivityPub / ForgeFed (DATA LAYER ONLY — no handlers yet) - ci/ — CI orchestrator (EMPTY — Phase 2 stub) - models/ — XORM structs + 7 migration files - config/ — ENV-driven config, fails fast on missing secrets -web/ — //go:embed target for the built React SPA + git/ — sanitized git binary wrapper (exec.Command only, no shell) + binary.go — Run, Log, Tree, Diff, BlobCat, RevParse, etc. + agit.go — AGit ref parsing + ci/ — CI/CD execution engine (fully built — Phase 2B) + orchestrator.go — NATS-driven DAG orchestrator + runner_manager.go — job dispatch with Docker executor + executor.go — docker run, log streaming, workspace extraction + dag.go — topological sort, ReadyJobs + parser.go — .forgebucket/workflows/*.yml parser + types.go — WorkflowFile, WorkflowJob, WorkflowStep structs + gitops/ — GitOps controller (fully built — Phase 3D) + controller.go — NATS subscriptions, startup, periodic ticker + drift.go — CheckDrift, handlePush, periodicCheck + reconciler.go — TriggerSync, handleDeploymentSucceeded/Failed + federation/ — ActivityPub / ForgeFed (DATA LAYER ONLY — Phase 3F stub) + models/ — XORM structs + 13 migration files + config/ — ENV-driven config, fails fast on missing secrets + events/ — NATS EventBus interface + NATSBus + NoOpBus +web/ — //go:embed target for the built React SPA frontend/ src/ - pages/ — 20 route-level page components - components/ — shared UI (AppShell, Sidebar, Header, DiffViewer, etc.) + pages/ — route-level page components + components/ — shared UI (AppShell, Sidebar, Header, DiffViewer, etc.) ui/ - tokens.ts — SINGLE SOURCE OF TRUTH for all design tokens - hooks/ — custom React hooks - api/ — typed API client (fetch wrappers) + tokens.ts — SINGLE SOURCE OF TRUTH for all design tokens + hooks/ — custom React hooks + api/ — typed API client (fetch wrappers) ``` **Middleware chain — this order is fixed, do not reorder:** ``` -Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → RBAC → Handler +Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → AuditLog → Handler ``` --- ## Current Phase Status -Understand the phases before adding code — don't build Phase 3 infrastructure when Phase 2 is incomplete. - | Phase | Scope | Status | |-------|-------|--------| -| 1 | Auth, Git HTTP, repos, PRs, issues, RBAC, webhooks, LFS, design system, 20-page SPA | **Complete** | -| 2A | NATS event bus, WebSocket hub upgrade, audit log | **Complete** | +| 1 | Auth, Git HTTP, repos, PRs, issues, RBAC, webhooks, LFS, design system | **Complete** | +| 2A | NATS event bus, WebSocket hub, audit log | **Complete** | | 2B | CI orchestrator, runner manager, Docker executor, artifact registry | **Complete** | -| 2C | Pipeline DAG visualization, dashboard CI upgrade, command palette wiring | **Complete** | +| 2C | Pipeline DAG visualization, dashboard CI upgrade, command palette | **Complete** | | 3A | Environment model + deployment tracking | **Complete** | | 3B | Unified operational timeline | **Complete** | -| 3C | Workspaces + secret management hierarchy | **Active** | -| 3D | GitOps controller + drift detection | Planned | -| 3E | Observability (Prometheus, health sparklines) | Planned | +| 3C | Workspaces + secret management (Global → Workspace → Repo → Env) | **Complete** | +| 3D | GitOps controller + drift detection + auto-sync | **Complete** | +| 3E | Observability (Prometheus endpoint, health checks, sparklines) | **Next** | | 3F | Federation handlers (ActivityPub inbox/outbox) | Planned | -| 4 | AI diagnostics, signed artifacts, OCI registry, secret/dep scanning | Planned | +| 4 | AI diagnostics, signed artifacts, OCI registry, dep/secret scanning | Planned | -Do not implement Phase 3+ features without explicit discussion. The `domain/federation/` directory is an intentional stub — the data model exists but no HTTP handlers should be wired until Phase 3F. - -### Phase 3A — What to Build - -Backend and frontend are both net-new for Phase 3A. Nothing exists yet. - -**Backend:** -1. `internal/models/environment.go` — `Environment` (id, repoId, name, url, protectionRules JSON) + `Deployment` (id, envId, repoId, sha, ref, status, triggeredBy, description, runId, startedAt, finishedAt) -2. `internal/models/migrations/010_environments.go` — `Run010()` syncing both structs; call from `001_init.go` -3. `internal/api/handlers/environment.go` — `ListEnvironments`, `CreateEnvironment`, `GetEnvironment`, `UpdateEnvironment`, `DeleteEnvironment`, `ListDeployments`, `CreateDeployment`, `UpdateDeploymentStatus`; publish `deployment.*` NATS events -4. `internal/api/router.go` — wire routes under `/{owner}/{repo}/environments` and `/{owner}/{repo}/environments/{envName}/deployments` - -**Frontend:** -5. `frontend/src/types/api.ts` — add `Environment`, `Deployment`, `DeployStatus` types -6. `frontend/src/api/queries/environments.ts` — `useEnvironments`, `useEnvironment`, `useCreateEnvironment`, `useUpdateEnvironment`, `useDeleteEnvironment`, `useDeployments`, `useCreateDeployment`, `useUpdateDeploymentStatus` -7. `frontend/src/pages/EnvironmentsPage.tsx` — environment cards each showing latest deployment status, SHA, who deployed, time; "New environment" flow; deployment history per env -8. `frontend/src/components/layout/Sidebar.tsx` — add `Environments` nav item between Pipelines and Settings in `RepoSubNav` -9. `frontend/src/pages/RepoPage.tsx` — surface deployment status badges in the repo header (latest deploy per env at a glance) -10. `frontend/src/App.tsx` — add route `repos/:owner/:repo/environments` +The `domain/federation/` directory is an intentional stub — the data model exists but no HTTP handlers should be wired until Phase 3F. --- @@ -107,13 +100,19 @@ This rule is non-negotiable. It prevents command injection. ### Router / handlers - Chi router. Route definitions in `internal/api/router.go`. - One handler file per domain area. Keep handlers thin — business logic belongs in domain packages. -- All POST/PUT/DELETE routes require `X-CSRF-Token` header matching the session cookie. The middleware enforces this, but don't remove it from routes. +- All POST/PUT/DELETE routes require `X-CSRF-Token` header matching the session cookie. The CSRF middleware enforces this, but don't remove it from route definitions. +- There is a shared `resolveRepoID(db, w, r)` function in `internal/api/handlers/repo_lookup.go` — use it instead of duplicating repo resolution logic. ### Database - XORM for all DB access. Structs in `internal/models/`. -- Migrations are numbered files in `internal/models/migrations/`. Always add a new migration file; never edit existing ones. +- Migrations are numbered files in `internal/models/migrations/`. Always add a new file; never edit existing ones. Current highest: **013**. - No raw SQL strings built from user input. +### Events +- Publish to NATS via `bus.Publish(events.SubjectXxx, payload)` where the subject is a constant from `internal/events/subjects.go`. +- Payload types are in `internal/events/types.go` — use them for type-safe unmarshaling in subscribers. +- `NoOpBus` silently drops events when `NATS_URL` is unset — the app must work normally without NATS. + ### Secrets and config - All secrets come from environment variables via `internal/config/`. - Never hardcode secrets, tokens, or credentials anywhere. @@ -121,7 +120,7 @@ This rule is non-negotiable. It prevents command injection. ### Error handling - Return errors up the call stack. Don't swallow them silently. -- HTTP handlers use consistent JSON error responses — follow the pattern in existing handlers. +- HTTP handlers use consistent JSON error responses — follow the pattern in `jsonError` / `jsonOK` in `internal/api/handlers/helpers.go`. --- @@ -143,7 +142,7 @@ All spacing, color, and sizing values must come from `frontend/src/ui/tokens.ts` - Touch targets: 44px minimum height/width on all interactive elements (buttons, links, icon buttons). ### Dark mode -- Use Tailwind v4 `@variant dark` — not hardcoded dark: classes unless inside a component that explicitly handles both. +- Use Tailwind v4 `@variant dark` — not hardcoded `dark:` classes unless inside a component that explicitly handles both. - Colors must work in both light and dark modes. Test both. ### Component patterns @@ -155,20 +154,21 @@ All spacing, color, and sizing values must come from `frontend/src/ui/tokens.ts` ### API calls - Use the typed API client in `frontend/src/api/` — don't write raw `fetch` calls in components. -- Always include `X-CSRF-Token` header on mutating requests. +- Always include `X-CSRF-Token` header on mutating requests (the client does this automatically via `getCSRFToken()`). --- ## What NOT to Do -- **No shell string injection** — see Go conventions above +- **No shell string injection** — see Go conventions above; always discrete `exec.Command` args - **No hardcoded secrets** — everything via env - **No skipping CSRF** — all mutating routes require it -- **No arbitrary design values** — tokens.ts is the law -- **No Phase 3+ features without discussion** — don't wire up GitOps, federation handlers, or the command palette until Phase 2 is complete -- **No new color tokens** — if the design requires a new color, discuss it; don't invent one -- **No modal-heavy UX** — this platform uses progressive disclosure; avoid deep modal chains -- **No YAML-centric UI** — pipeline and environment config should feel operational, not config-file editing +- **No arbitrary design values** — `tokens.ts` is the law +- **No new color tokens without discussion** — the existing palette covers all cases +- **No modal-heavy UX** — progressive disclosure; avoid deep modal chains +- **No YAML-centric UI** — pipeline and GitOps config should feel operational, not config-file editing +- **No editing existing migration files** — always add a new numbered migration +- **No direct `fmt.Println` for logging** — use `log.Printf` so structured logs work correctly --- @@ -189,16 +189,26 @@ make lint # go vet + ESLint | File | Purpose | |------|---------| -| `internal/api/router.go` | All route definitions — start here for backend | -| `internal/models/` | XORM models + migrations — all DB schemas | -| `internal/config/config.go` | Env-driven config, required vars | -| `internal/domain/git/` | Git binary wrapper — safe exec patterns | +| `internal/api/router.go` | All route definitions — start here for backend work | +| `internal/api/handlers/repo_lookup.go` | Shared `resolveRepoID` helper | +| `internal/models/` | All XORM models + 13 migration files | +| `internal/config/config.go` | All env vars, fail-fast validation | +| `internal/events/subjects.go` | All NATS event subject constants | +| `internal/events/types.go` | Typed event payload structs | +| `internal/domain/git/binary.go` | Git binary wrapper — safe exec patterns, `RevParse`, `BlobCat`, etc. | +| `internal/domain/ci/orchestrator.go` | CI DAG orchestrator | +| `internal/domain/ci/executor.go` | Docker job executor + log streaming | +| `internal/domain/gitops/controller.go` | GitOps reconciliation controller | +| `internal/domain/gitops/drift.go` | `CheckDrift`, drift detection logic | +| `internal/api/handlers/environment.go` | Environment + deployment CRUD | +| `internal/api/handlers/gitops.go` | GitOps config + drift HTTP endpoints | +| `internal/api/handlers/secret.go` | Scoped secret management | +| `internal/api/handlers/workspace.go` | Workspace + member management | +| `internal/api/middleware/audit.go` | Audit log middleware | | `frontend/src/ui/tokens.ts` | Design token source of truth | | `frontend/src/components/AppShell.tsx` | Root layout wrapper | -| `frontend/src/components/Sidebar.tsx` | 3-state navigation sidebar | -| `frontend/src/pages/` | All 20 route-level pages | -| `frontend/src/api/` | Typed API client | -| `.env.example` | All required environment variables | +| `frontend/src/api/client.ts` | Typed API client with CSRF handling | +| `.env.example` | All environment variables with documentation | | `CLAUDE.md` | Developer guide (rules overlap with this file — CLAUDE.md takes precedence on conflicts) | --- @@ -207,7 +217,9 @@ make lint # go vet + ESLint ```bash cp .env.example .env # fill SESSION_SECRET and CSRF_SECRET -make docker-up # PostgreSQL via Docker Compose -make migrate # run XORM migrations +make docker-up # PostgreSQL + NATS via Docker Compose +make migrate # run XORM migrations (currently 013) make dev # Go :8080 + Vite :5173 ``` + +CI execution requires Docker to be running locally. If unavailable, the runner logs a warning and CI jobs are queued but not executed. diff --git a/CHANGELOG.md b/CHANGELOG.md index e543e0f..8865565 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,63 +9,154 @@ Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] -### In Progress — Phase 3C (Workspaces + Secret management hierarchy) -- `Workspace` model — named collaborative namespace (handle, displayName, description, avatarUrl) -- `WorkspaceMember` model — user membership with owner/admin/member roles -- Repos can be owned by a workspace; URL format stays `/{owner}/{repo}` where owner is a workspace handle or username -- `Secret` model — AES-256-GCM encrypted, scoped to global / workspace / repo / env -- Secret hierarchy resolution in CI executor: Env → Repo → Workspace → Global -- Full CRUD APIs for workspaces, workspace members, secrets at all scope levels -- WorkspacesPage, WorkspacePage, WorkspaceSettingsPage (settings + members) -- Workspace switcher in sidebar header -- Create repo: workspace owner selector -- RepoSecretsPage — write-only secret management per repo and per environment +### Planned — Phase 3E (Observability) +- Prometheus metrics endpoint `GET /metrics` +- Structured internal metrics: pipeline duration, queue depth, deployment frequency, error rates +- Health check endpoint `GET /health` returning DB + NATS status +- Environment cards: live health status via HTTP health check polling +- Repo page: error rate and deployment frequency sparklines + +### Planned — Phase 3F (Federation) +- ActivityPub inbox/outbox HTTP handlers +- HTTP signature verification middleware +- WebFinger `/.well-known/webfinger` endpoint +- Cross-instance pull requests via ActivityPub activities + +### Planned — Phase 4 (Intelligence + Artifacts) +- AI failure diagnosis (pipeline failure root-cause analysis via Claude API) +- AI deployment risk scoring +- Signed artifacts (Sigstore/Cosign) +- SBOM generation (CycloneDX/SPDX) +- OCI container registry +- Secret scanning (commit-level pattern detection) +- Dependency vulnerability scanning + +--- + +## [0.7.0] — 2026-05-12 + +Phase 3D complete. Git is now the source of truth for environment deployment state. + +### Added — GitOps Controller (`internal/domain/gitops/`) +- `controller.go` — starts as a background goroutine; subscribes to `push.received`, + `deployment.succeeded`, `deployment.failed`; runs a periodic reconciliation ticker + (interval configurable via `GITOPS_RECONCILE_INTERVAL`); recovers stale `syncing` + configs to `drifted` on startup +- `drift.go` — `CheckDrift` calls `git rev-parse` via the existing git domain wrapper; + `handlePush` queries all GitOpsConfigs matching the pushed branch and evaluates drift; + `periodicCheck` iterates configs whose `SyncInterval` has elapsed; publishes + `environment.drift_detected` when drift is found +- `reconciler.go` — `TriggerSync` creates a `Deployment` record and publishes + `deployment.started` (same lifecycle path as manual deployments, `TriggeredBy="gitops"`); + `handleDeploymentSucceeded` resolves open drift events and marks config `synced` for + both GitOps and manual deployments; `handleDeploymentFailed` reverts to `drifted` + +### Added — GitOps HTTP API (`internal/api/handlers/gitops.go`) +All routes live under `/api/v1/repos/{owner}/{repo}/environments/{envName}/gitops/`: +- `GET /gitops` — current GitOpsConfig or 404 if not configured +- `PUT /gitops` — idempotent upsert (branch, autoSync, syncInterval) +- `DELETE /gitops` — remove config without deleting deployments +- `POST /gitops/sync` — manual reconciliation trigger; creates deployment record +- `GET /gitops/drift` — current sync status: syncStatus, desiredSha, actualSha, isDrifted +- `GET /gitops/drift/history` — paginated drift event log (newest first) +- `POST /gitops/drift/{driftID}/acknowledge` — acknowledge without syncing + +### Added — Database Models (migration `013_gitops`) +- `GitOpsConfig` — links environment to a branch; tracks `DesiredSHA`, `ActualSHA`, + `SyncStatus` (`unknown/synced/drifted/syncing`), `AutoSync`, `SyncInterval`, + `LastCheckedAt` +- `GitOpsDriftEvent` — append-only drift record: `DesiredSHA`, `ActualSHA`, + `SyncStatus` (`drifted/synced/acknowledged`), `DetectedAt`, `ResolvedAt` + +### Added — Supporting Changes +- `git.RevParse(repoPath, ref)` — new function in `internal/domain/git/binary.go` + used by `CheckDrift` to resolve branch HEAD SHA +- `events.DeploymentEvent` + `events.DriftEvent` types added to `internal/events/types.go` +- `EnvironmentHandler.publishDeployEvent` updated to use shared `events.DeploymentEvent` + so the GitOps controller can unmarshal deployment lifecycle events correctly +- `GITOPS_RECONCILE_INTERVAL` env var (default `300`s); `0` disables the periodic ticker +- `ArtifactRoot` config field + `ARTIFACT_ROOT` env var + +--- + +## [0.6.0] — 2026-05-12 + +Phase 3C complete. Multi-tenant workspaces and a full secret management hierarchy operational. + +### Added — Workspaces +- `Workspace` model (migration `011`): globally unique handle, display name, description, avatarUrl +- `WorkspaceMember` model: owner/admin/member roles per workspace +- Repository `workspace_id` column (optional; null = personal repo) +- Full workspace CRUD API: `GET/POST /api/v1/workspaces`, `GET/PATCH/DELETE /api/v1/workspaces/{handle}` +- Workspace member management: list, add, update role, remove +- `GET /api/v1/workspaces/{handle}/repos` — repos in workspace +- Workspace frontend: WorkspacesPage, WorkspacePage, workspace switcher in sidebar header +- Workspace owner selector in repo create flow + +### Added — Secret Management (`internal/api/handlers/secret.go`) +- `Secret` model (migration `012`): `Scope` (global/workspace/repo/env), `ScopeID`, `Name`, + `EncryptedValue` (AES-256-GCM, never returned by API) +- Unique constraint on (scope, scope_id, name) +- CRUD at all scope levels: + - `GET/POST/DELETE /api/v1/admin/secrets` (global, admin-only) + - `GET/POST/DELETE /api/v1/workspaces/{handle}/secrets` (workspace-scoped) + - `GET/POST/DELETE /api/v1/repos/{owner}/{repo}/secrets` (repo-scoped) + - `GET/POST/DELETE /api/v1/repos/{owner}/{repo}/environments/{envName}/secrets` (env-scoped) +- `ResolveSecretsForRun(db, repoID, workspaceID, envID, sessionSecret)` — hierarchy + resolution for CI executor: Env > Repo > Workspace > Global +- CI executor updated to inject resolved secrets as Docker `--env` flags +- RepoSecretsPage — write-only UI, values never displayed after creation - Sidebar "Secrets" nav item in repo context -### Completed — Phase 3B (Unified Operational Timeline) -- `GET /api/v1/repos/:owner/:repo/timeline` — merges commits, pipeline runs, and deployments into a single chronological feed -- `RepoTimelinePage` at `/repos/:owner/:repo/timeline` — vertical event feed with type filter tabs +--- + +## [0.5.0] — 2026-05-11 + +Phases 3A and 3B complete. Environments, deployments, and the operational timeline are operational. + +### Added — Environments + Deployments (Phase 3A) +- `Environment` model (migration `010`): repoId, name, URL, protectionRules (JSON) +- `Deployment` model: envId, repoId, sha, ref, status lifecycle + (`pending → in_progress → success/failure/cancelled`), triggeredBy, description, runId link +- CRUD API for environments: `GET/POST /environments`, `GET/PATCH/DELETE /environments/{envName}` +- Deployment API: `GET/POST /environments/{envName}/deployments`, + `PATCH /environments/{envName}/deployments/{id}/status` +- NATS events published on status transitions: `deployment.started`, `deployment.succeeded`, + `deployment.failed` +- `EnvironmentsPage` — environment cards each showing latest deployment status, SHA, actor, + and time since deploy; deployment history per env +- Sidebar "Environments" nav item in repo context +- Repo page deployment status badges (latest deploy per env at a glance) + +### Added — Unified Operational Timeline (Phase 3B) +- `GET /api/v1/repos/{owner}/{repo}/timeline` — merged chronological feed of commits, + pipeline runs, and deployments; default 60 events, max 200 +- `RepoTimelinePage` at `/repos/:owner/:repo/timeline` — vertical event feed with type + filter tabs (all / commits / runs / deployments) - Sidebar "Timeline" nav item between Environments and Settings -- Event types: commit (SHA, message, author), run (status, ref, duration), deployment (env, status, SHA) +- Answers "what changed before things broke?" without navigating between separate pages -### Completed — Phase 3A (Environment model + deployment tracking) -- `Environment` model per repo (name, URL, protection rules) -- `Deployment` model (sha, ref, status, triggered_by, run_id link) -- Full CRUD API for environments -- Deployment trigger + status update API -- NATS event publishing for `deployment.*` subjects -- `EnvironmentsPage` per repo — environment cards with live deployment status -- Deployment history per environment -- Sidebar "Environments" nav item -- Repo page deployment status badges +--- -### Completed — Phase 2C (CI Legibility) -- `PipelinesPage` — real cross-repo runs feed with status filter tabs +## [0.4.0] — 2026-05-11 + +Phase 2C complete. CI results are legible in the UI; the dashboard is an operational command center. + +### Added — Pipeline Visualization +- `PipelinesPage` — cross-repo pipeline runs feed with status filter tabs (all / running / failed / succeeded) - `RepoPipelinesPage` — repo-scoped runs list at `/repos/:owner/:repo/pipelines` -- `PipelineRunPage` — run detail with topological DAG visualization + step log viewer -- `PipelineWaterfall` — rewritten to accept real `PipelineJob[]` data with `needs` graph -- Dashboard CI widget — live recent runs replacing "coming soon" placeholder -- Command palette — pipeline run results + Pipelines quick-nav -- `GET /api/v1/pipelines/runs` — cross-repo recent runs endpoint -- Dashboard `recentRuns[]` field added +- `PipelineRunPage` — run detail with topological DAG visualization using real `PipelineJob[]` + + `needs` graph; step log viewer (collapsible per step, ANSI color, auto-scroll with lock toggle) +- `PipelineWaterfall` — rewritten to accept live job data instead of static mock stages +- `GET /api/v1/pipelines/runs` — cross-repo recent runs for the dashboard -### Planned — Phase 3 (GitOps + Observability + Federation) -- GitOps controller with reconciliation loops -- Environment model + deployment tracking -- Unified operational timeline (commits + deployments + CI failures merged) -- Drift detection and sync status -- Deployment promotion workflows (dev → staging → production) -- Rollback visualization and one-click rollbacks -- Canary and blue/green deployment support -- ActivityPub / ForgeFed federation handlers (inbox, outbox, cross-instance PRs) -- Secret management hierarchy (Global → Org → Repo → Env) -- Observability (Prometheus endpoint, health sparklines) +### Added — Dashboard CI Command Center +- Dashboard CI widget replaced "coming soon" with live recent pipeline runs +- Dashboard `recentRuns[]` field added to the `/api/v1/dashboard` response -### Planned — Phase 4 -- AI diagnostics (pipeline failure root-cause analysis) -- Signed artifacts (Sigstore/Cosign) -- OCI package registry -- Secret and dependency vulnerability scanning +### Added — Command Palette Wiring +- Pipeline run results surfaced in command palette results +- "Pipelines" quick-nav action --- @@ -75,38 +166,35 @@ Phase 2B complete. Full CI/CD execution backend operational. ### Added — CI Orchestrator (`internal/domain/ci/`) - DAG-based pipeline orchestrator (`orchestrator.go`): subscribes to NATS `push.received`, - parses `.forgebucket/workflows/*.yml`, creates `PipelineRun`/`PipelineJob`/`PipelineStep` - records, advances DAG on `job.completed`/`job.failed`, recovers stale runs on startup -- Docker executor (`executor.go`): runs steps in isolated containers (`docker run --rm`), - streams logs to DB and NATS via `pipeline.log` subject, handles `git archive` workspace extraction -- Runner manager (`runner_manager.go`): semaphore-limited concurrent job dispatch (default 4), - subscribes to `job.queued`, calls executor when Docker is available -- DAG engine (`dag.go`): full topological sort (`TopoSort`) and `ReadyJobs` for dependency resolution -- Workflow parser (`parser.go`): reads `.forgebucket/workflows/*.yml` from git ref, - `MatchesPushTrigger` with glob pattern support -- CI types (`types.go`): `WorkflowFile`, `WorkflowJob`, `WorkflowStep`, YAML `StringOrSlice` unmarshaler + parses `.forgebucket/workflows/*.yml`, creates `PipelineRun/Job/Step` records, advances + DAG on `job.completed/failed`, recovers stale runs on startup +- Docker executor (`executor.go`): steps run in isolated containers (`docker run --rm`), + logs stream to DB and NATS via `pipeline.log`, workspace extracted via `git archive` +- Runner manager (`runner_manager.go`): semaphore-limited (default 4 concurrent), + subscribes to `job.queued`, skips gracefully if Docker is unavailable +- DAG engine (`dag.go`): `TopoSort`, `ReadyJobs` +- Workflow parser (`parser.go`): `.forgebucket/workflows/*.yml` from git ref, + `MatchesPushTrigger` with glob branch patterns; `StringOrSlice` YAML unmarshaler ### Added — CI API Handlers -- `GET /api/v1/repos/:owner/:repo/pipelines` — list pipeline definitions -- `GET /api/v1/repos/:owner/:repo/runs` — list pipeline runs (most recent first, limit 30) -- `GET /api/v1/repos/:owner/:repo/runs/:runID` — run detail with full job + step tree -- `POST /api/v1/repos/:owner/:repo/runs/:runID/cancel` — cancel queued or running run -- `POST /api/v1/repos/:owner/:repo/runs/:runID/jobs/:jobID/retry` — re-queue failed/cancelled job -- `GET /api/v1/repos/:owner/:repo/runs/:runID/jobs/:jobID/logs` — step-level log chunks -- `GET /api/v1/repos/:owner/:repo/runs/:runID/artifacts` — list artifacts for a run -- `POST /api/v1/repos/:owner/:repo/runs/:runID/artifacts` — upload artifact (multipart, 512 MB max) -- `GET /api/v1/repos/:owner/:repo/artifacts/:artifactID/download` — artifact download with path traversal guard -- `GET /api/v1/admin/runners` — list registered runners (admin-only) -- `POST /api/v1/admin/runners/register` — register a new runner with bcrypt token hashing (admin-only) +- `GET /api/v1/repos/:owner/:repo/pipelines` — pipeline definitions +- `GET /api/v1/repos/:owner/:repo/runs` — pipeline runs (newest first) +- `GET /api/v1/repos/:owner/:repo/runs/:runID` — run detail with job + step tree +- `POST /api/v1/repos/:owner/:repo/runs/:runID/cancel` +- `POST /api/v1/repos/:owner/:repo/runs/:runID/jobs/:jobID/retry` +- `GET /api/v1/repos/:owner/:repo/runs/:runID/jobs/:jobID/logs` — step log chunks +- `GET/POST /api/v1/repos/:owner/:repo/runs/:runID/artifacts` +- `GET /api/v1/repos/:owner/:repo/artifacts/:artifactID/download` — path-traversal guarded +- `GET/POST /api/v1/admin/runners` — runner list + registration (admin-only, bcrypt token) ### Added — Database Models (migration `009_ci`) -- `Pipeline` — workflow definition record (name, filePath, repoId) -- `PipelineRun` — execution record (triggerRef, triggerSha, triggeredBy, status, startedAt, finishedAt) -- `PipelineJob` — single DAG node (name, image, needs JSON, status, timing) -- `PipelineStep` — single command within a job (seq, runCmd, usesAction, exitCode, timing) -- `PipelineStepLog` — append-only log chunk storage (stepId, chunkIndex, content) -- `Runner` — registered execution backend (name, labels, status, tokenHash, lastSeenAt) -- `Artifact` — build artifact (runId, repoId, name, storagePath, size, contentType) +- `Pipeline`, `PipelineRun`, `PipelineJob`, `PipelineStep`, `PipelineStepLog` +- `Runner` (name, labels, status, tokenHash, lastSeenAt) +- `Artifact` (runId, repoId, name, storagePath, size, contentType) + +### Changed — Git HTTP handler +- `parseAndCheckBody` replaces `checkProtectionsFromBody` — now also returns parsed + `refUpdate` structs for publishing `push.received` after each successful receive-pack --- @@ -116,105 +204,81 @@ Phase 2A complete. Real-time event infrastructure and audit log operational. ### Added — NATS Event Bus (`internal/events/`) - `EventBus` interface: `Publish`, `Subscribe`, `Close` -- `NATSBus`: NATS-backed implementation with auto-reconnect, max-reconnect disabled -- `NoOpBus`: silent fallback when `NATS_URL` is not configured (app fully functional without NATS) -- `New(url)` factory: returns `NATSBus` if URL is set, `NoOpBus` otherwise -- Event subjects defined in `subjects.go`: - - `repo.*` (created, deleted, pushed) - - `push.received` - - `pr.*` (opened, merged, closed) - - `issue.*` (opened, closed) - - `pipeline.*` (queued, started, succeeded, failed, cancelled) - - `job.*` (queued, started, completed, failed), `pipeline.log` - - `deployment.*`, `environment.*` (Phase 3 stubs) - - `audit.event` +- `NATSBus`: NATS-backed with auto-reconnect; `NoOpBus` fallback when `NATS_URL` unset +- `New(url)` factory: returns `NATSBus` or `NoOpBus` +- 40+ event subjects in `subjects.go` covering repo, push, PR, issue, pipeline, job, + deployment, environment, and audit namespaces -### Added — WebSocket Hub (`internal/api/handlers/ws.go`) -- `GET /ws` — upgrades HTTP to WebSocket (nhooyr.io/websocket) -- Subscribes to all NATS subjects on connect, fans events to the client as JSON -- Optional session auth (`auth.Optional` middleware) — works for guests too -- Phase 2B note: per-user event filtering is a planned upgrade +### Added — WebSocket Hub +- `GET /ws` — NATS wildcard subscription (`>`) fans all events to connected clients as JSON +- `{ subject, payload }` envelope format +- Goroutine per client with buffered send channel (64 events); slow clients drop events -### Added — Audit Log -- `AuditLog` model (migration `008_audit_log`): actor, method, path, statusCode, requestBody, ipAddr, timestamp -- `AuditLog` middleware: records every authenticated request to the DB and publishes `audit.event` -- `GET /api/v1/audit` — paginated audit log query (admin-only, filterable by actor/method/time range) - -### Fixed — Local development environment -- `DATABASE_URL` was using Docker-internal hostname `postgres`; corrected to `localhost` for `make dev` -- Added `NATS_URL=nats://localhost:4222` to `.env` (was missing; CI orchestrator requires it) -- `REPO_ROOT` corrected to `/tmp/forgebucket/repos` (Docker path `/var/lib/forgebucket/repos` requires sudo on macOS) +### Added — Audit Log (migration `008_audit_log`) +- `AuditLog` model: actorId, actorName, method, path, statusCode, ipAddress, userAgent +- Middleware records every POST/PUT/PATCH/DELETE in the protected route group +- Writes DB row + publishes `audit.event` asynchronously (never blocks the response) +- `GET /api/v1/audit` — paginated, filterable by actor/method/since (admin-only) --- ## [0.1.0] — 2026-05-11 -Initial development milestone. Core Git hosting, collaboration, and frontend SPA are functional. +Initial development milestone. Core Git hosting, collaboration, and frontend SPA functional. ### Added — Authentication & Security - User registration and login with secure session cookies -- CSRF protection on all mutating routes via `X-CSRF-Token` header -- Middleware chain: Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → RBAC → Handler +- CSRF protection via double-submit cookie pattern (`X-CSRF-Token`) - SSH key management per user -- OIDC / OAuth2 optional integration (configurable via env) -- Scoped access tokens with optional expiration dates -- Repository deploy keys (read-only or read-write HTTP tokens) -- ENV-driven config with fail-fast validation on missing secrets +- OIDC / OAuth2 optional integration +- Scoped access tokens with optional expiration +- Repository deploy keys (read-only or read-write) +- ENV-driven config with fail-fast on missing secrets ### Added — Git Hosting -- Smart HTTP transport (git clone, push, pull over HTTP) -- AGit protocol support (`refs/for/` push for instant PR creation without branch switching) -- Branch management (list, create, delete, default branch configuration) -- Commit log and diff viewing -- Git LFS per-repository (configurable file size limits, locking) -- Branch protection rules (force-push blocking, required reviews) +- Smart HTTP transport (clone, push, pull over HTTP) +- AGit protocol (`refs/for/` push for instant PR creation) +- Branch management, commit log, diff viewing +- Git LFS per-repository (configurable file size limits) +- Branch protection rules (force-push blocking) - Repository visibility (public / private) ### Added — Collaboration -- Pull requests (open / merged / closed states) with author tracking +- Pull requests (open / merged / closed) with author tracking - Issues (open / closed) -- Reviewer assignment (default reviewer per repo, per-PR reviewer assignment) -- Merge strategy selection per repository (merge commit / squash / rebase) +- Reviewer assignment (default reviewer per repo, per-PR overrides) +- Merge strategy selection per repository (merge / squash / rebase) - Branching model configuration (feature / bugfix / release / hotfix prefixes) -- PR default description templates (per-repo) -- Excluded files from diffs (glob pattern configuration) +- PR default description templates + excluded-files configuration - Webhook system with event filtering (push, pull_request, issue) -- Repository member RBAC (read / write / admin roles) +- Repository member RBAC (read / write / admin) ### Added — Frontend SPA - React 18 + TypeScript + Vite, embedded into Go binary via `//go:embed` -- 20 route-level pages: Login, Register, Dashboard, Repos, CreateRepo, ImportRepo, Repo, - RepoSettings, Blob, Commits, Branches, RepoIssues, RepoPRs, CreatePR, PRDetail, Starred, - PRs (cross-repo), Pipelines (placeholder), Explore, Profile, Settings -- AppShell layout wrapper for all authenticated pages +- 20 route-level pages covering auth, dashboard, repos, code, PRs, issues, and settings - Triple-state sidebar: expanded (320px) / collapsed (56px) / mobile bottom bar - Mobile-first responsive design (375px → 1440px) -- DiffViewer: side-by-side and unified views with syntax highlighting -- MobileComment: bottom-sheet overlay for inline code review on mobile -- TreeBrowser: repository file tree navigation -- PipelineWaterfall: placeholder pipeline visualization component -- Skeleton loading states for perceived performance +- DiffViewer (side-by-side + unified), MobileComment (bottom-sheet), TreeBrowser ### Added — Design System - Custom semantic token palette in `frontend/src/ui/tokens.ts` -- Full dark/light mode support via Tailwind CSS v4 `@variant dark` -- Brand colors: `#0052CC` (light) / `#3B82F6` (dark) -- 8px grid system (xs: 4px, sm: 8px, md: 16px, lg: 24px, xl: 32px, xxl: 48px) -- 44px minimum touch targets on all interactive elements (WCAG 2.5.5) -- Consistent border radius scale (subtle 3–8px, full 9999px) +- Full dark/light mode via Tailwind CSS v4 `@variant dark` +- 8px grid system; 44px minimum touch targets (WCAG 2.5.5) - System font stack (Segoe UI, Roboto, sans-serif) ### Added — Infrastructure -- PostgreSQL + XORM with 7 migration files covering: users, repositories, issues, SSH keys, - access tokens, deploy keys, workflows, and LFS settings -- ActivityPub actor data model (FederationActor with inbox/outbox URLs and RSA key pairs) — data layer only -- Docker Compose setup for local PostgreSQL + NATS -- Makefile targets: dev, build, migrate, test, lint, docker-up -- WebSockets foundation for live logs and notifications +- PostgreSQL + XORM with migrations 001–007 +- ActivityPub actor data model (FederationActor) — data layer only +- Docker Compose for local PostgreSQL + NATS +- Makefile: dev, build, migrate, test, lint, docker-up --- -[Unreleased]: https://github.com/forgeo/forgebucket/compare/v0.3.0...HEAD +[Unreleased]: https://github.com/forgeo/forgebucket/compare/v0.7.0...HEAD +[0.7.0]: https://github.com/forgeo/forgebucket/compare/v0.6.0...v0.7.0 +[0.6.0]: https://github.com/forgeo/forgebucket/compare/v0.5.0...v0.6.0 +[0.5.0]: https://github.com/forgeo/forgebucket/compare/v0.4.0...v0.5.0 +[0.4.0]: https://github.com/forgeo/forgebucket/compare/v0.3.0...v0.4.0 [0.3.0]: https://github.com/forgeo/forgebucket/compare/v0.2.0...v0.3.0 [0.2.0]: https://github.com/forgeo/forgebucket/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/forgeo/forgebucket/releases/tag/v0.1.0 diff --git a/README.md b/README.md index 0da3947..7b85b4c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ForgeBucket is a self-hosted, federated developer operations platform. Where other Git platforms show you a list of files, ForgeBucket surfaces deployments, pipeline health, environment drift, and operational context directly alongside your code. Repositories are runtime systems. The dashboard is a command center. -**Status:** Phase 2C in progress. CI/CD execution backend is fully operational. Pipeline visualization and dashboard integration are being wired up now. +**Status:** Active development. Phase 3D (GitOps controller + drift detection) complete. Phase 3E (observability) is next. --- @@ -32,7 +32,8 @@ ForgeBucket is a self-hosted, federated developer operations platform. Where oth | OIDC / OAuth2 (optional) | Done | | Access tokens (scoped, expiring) | Done | | Deploy keys | Done | -| Audit log | Done | +| Audit log (admin-only, filterable) | Done | +| Workspaces (multi-tenant namespaces) | Done | ### Git Hosting | Feature | Status | @@ -59,34 +60,36 @@ ForgeBucket is a self-hosted, federated developer operations platform. Where oth ### CI/CD | Feature | Status | |---------|--------| -| CI orchestrator (DAG pipeline execution) | Done (Phase 2B) | -| Runner manager (Docker backend) | Done (Phase 2B) | -| Build artifact storage | Done (Phase 2B) | -| Pipeline cancellation + job retry | Done (Phase 2B) | -| NATS event bus + WebSocket live push | Done (Phase 2A) | -| Pipeline DAG visualization (frontend) | Done (Phase 2C) | -| Dashboard CI command center | Done (Phase 2C) | -| Pipeline log viewer (per-step, collapsible) | Done (Phase 2C) | +| NATS event bus + WebSocket live push | Done | +| CI orchestrator (DAG pipeline execution) | Done | +| Runner manager (Docker backend) | Done | +| Build artifact storage + download | Done | +| Pipeline cancellation + job retry | Done | +| Pipeline log streaming (per-step, NATS) | Done | +| Pipeline DAG visualization (frontend) | Done | +| Dashboard CI command center | Done | +| Pipeline log viewer (collapsible, per-step) | Done | | Kubernetes / Firecracker runner backends | Planned (Phase 2D) | -| Forgejo Actions gRPC integration | Planned | | Matrix builds + reusable workflow templates | Planned | | Flaky test detection | Planned | -### GitOps + Environments +### Environments + GitOps | Feature | Status | |---------|--------| -| Environment model + deployment tracking | **In progress (Phase 3A)** | -| Unified operational timeline | Planned (Phase 3B) | -| Secret management hierarchy | Planned (Phase 3C) | -| GitOps controller + drift detection | Planned (Phase 3D) | -| Deployment promotion workflows | Planned (Phase 3D) | -| Rollback visualization | Planned (Phase 3D) | -| Canary / blue-green support | Planned (Phase 3D) | +| Environment model + deployment tracking | Done | +| Deployment status lifecycle API | Done | +| Unified operational timeline | Done | +| Secret management (Global → Workspace → Repo → Env) | Done | +| GitOps controller (drift detection + auto-sync) | Done | +| Deployment promotion workflows | Planned (Phase 4) | +| Rollback visualization | Planned (Phase 4) | +| Canary / blue-green support | Planned (Phase 4) | ### Observability + Security | Feature | Status | |---------|--------| -| Prometheus endpoint + health sparklines | Planned (Phase 3E) | +| Prometheus endpoint + health checks | Planned (Phase 3E) | +| Health sparklines in repo/env pages | Planned (Phase 3E) | | Secret scanning | Planned (Phase 4) | | Dependency scanning | Planned (Phase 4) | | Signed artifacts (Sigstore/Cosign) | Planned (Phase 4) | @@ -120,7 +123,7 @@ make dev The Go API runs at `http://localhost:8080`. The Vite dev server runs at `http://localhost:5173` and proxies API requests. -> **Local dev note:** `DATABASE_URL` must use `localhost` (not `postgres`) and `NATS_URL` must be set to `nats://localhost:4222`. The `.env` file ships with correct defaults for local development. See `.env.example` for all variables. +> **Docker note:** CI execution requires the Docker daemon to be running. If Docker is unavailable, the runner manager logs a warning and disables CI; the rest of the platform works normally. --- @@ -128,22 +131,26 @@ The Go API runs at `http://localhost:8080`. The Vite dev server runs at `http:// ``` ForgeBucket -├── API Gateway (Chi router, internal/api/) +├── API Gateway (Chi router — internal/api/router.go) ├── Auth Service (sessions, CSRF, OIDC — internal/api/handlers/) ├── Repository Service (git HTTP, branches, LFS — internal/domain/git/) ├── Pull Request Service (PRs, reviews, merge — internal/api/handlers/) -├── Issue Service (issues, labels — internal/api/handlers/) -├── CI Orchestrator (DAG execution, Docker runner — internal/domain/ci/) ← Phase 2B done -├── Event Bus (NATS core, NoOp fallback — internal/events/) ← Phase 2A done -├── Federation Layer (ActivityPub actors — internal/domain/federation/) ← Phase 3F stub -├── Secret Manager (env-based, scoped tokens — internal/config/) +├── Issue Service (issues — internal/api/handlers/) +├── CI Orchestrator (DAG execution, Docker runner — internal/domain/ci/) +├── GitOps Controller (drift detection, auto-sync — internal/domain/gitops/) +├── Environment Service (environments, deployments — internal/api/handlers/environment.go) +├── Secret Manager (scoped AES-256-GCM — internal/api/handlers/secret.go) +├── Workspace Service (multi-tenant namespaces — internal/api/handlers/workspace.go) +├── Event Bus (NATS core, NoOp fallback — internal/events/) +├── Audit Log (every mutating request — internal/api/middleware/audit.go) +├── Federation Layer (ActivityPub actors — internal/domain/federation/) ← Phase 3F stub ├── Database (PostgreSQL + XORM — internal/models/) -└── Web Frontend (React 18 + TypeScript, embedded via //go:embed — web/) +└── Web Frontend (React 18 + TypeScript, //go:embed — web/) ``` -**Middleware chain (every request):** +**Middleware chain (every authenticated request):** ``` -Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → RBAC → AuditLog → Handler +Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → AuditLog → Handler ``` --- @@ -155,12 +162,13 @@ Logger → RealIP → Recoverer → CORS → CSRF → SessionAuth → RBAC → A | Language | Go 1.21+ | | Router | Chi | | ORM / Migrations | XORM + PostgreSQL | -| Event bus | NATS (core; JetStream planned for Phase 2B durability) | -| Real-time | WebSockets (nhooyr.io/websocket) | -| CI execution | Docker (`docker run --rm`) | +| Event bus | NATS core (`github.com/nats-io/nats.go`) | +| Real-time | WebSockets (`nhooyr.io/websocket`) | +| CI execution | Docker (`docker run --rm` via `exec.Command`) | | Frontend framework | React 18 + TypeScript | | Build tool | Vite | | Styling | Tailwind CSS v4 | +| YAML parsing | `gopkg.in/yaml.v3` (workflow definitions) | | Code editing | CodeMirror | | Container | Docker Compose (dev) | | Federation | ActivityPub / ForgeFed (data layer only) | @@ -186,12 +194,14 @@ ForgeBucket has its own design language — intentionally distinct from GitHub a | Variable | Required | Description | |----------|----------|-------------| -| `DATABASE_URL` | Yes | PostgreSQL connection string — use `localhost` for local dev | +| `DATABASE_URL` | Yes | PostgreSQL connection string | | `SESSION_SECRET` | Yes | Session signing key, ≥ 32 chars (`openssl rand -hex 32`) | | `CSRF_SECRET` | Yes | CSRF key, exactly 32 chars (`openssl rand -hex 16`) | | `PORT` | No | HTTP port, default `8080` | | `REPO_ROOT` | Yes | Absolute path for bare git repository storage | -| `NATS_URL` | No | NATS connection URL (e.g. `nats://localhost:4222`). If unset, CI runs in no-op mode | +| `ARTIFACT_ROOT` | No | Artifact storage path, defaults to `../artifacts` relative to `REPO_ROOT` | +| `NATS_URL` | No | NATS connection URL (e.g. `nats://localhost:4222`). If unset, event bus is no-op | +| `GITOPS_RECONCILE_INTERVAL` | No | Seconds between periodic drift checks, default `300`. `0` disables the ticker | | `INSTANCE_URL` | Yes | Public URL of this instance (no trailing slash) | | `INSTANCE_NAME` | No | Display name, default `ForgeBucket` | | `OIDC_ISSUER` | No | OIDC provider URL | @@ -224,9 +234,11 @@ ForgeBucket has its own design language — intentionally distinct from GitHub a | Phase 2C | Pipeline DAG visualization, dashboard CI upgrade, command palette | Done | | Phase 3A | Environment model + deployment tracking | Done | | Phase 3B | Unified operational timeline | Done | -| Phase 3C | Workspaces + secret management hierarchy | **In progress** | -| Phase 3D–F | GitOps/drift, federation, observability | Planned | -| Phase 4 | AI diagnostics, signed artifacts, OCI registry, dep scanning | Planned | +| Phase 3C | Workspaces + secret management hierarchy (Global → Workspace → Repo → Env) | Done | +| Phase 3D | GitOps controller + drift detection + auto-sync | Done | +| Phase 3E | Observability (Prometheus endpoint, health checks, sparklines) | Next | +| Phase 3F | Federation handlers (ActivityPub inbox/outbox, cross-instance PRs) | Planned | +| Phase 4 | AI diagnostics, signed artifacts, OCI registry, secret/dep scanning | Planned | --- diff --git a/cmd/forgebucket/main.go b/cmd/forgebucket/main.go index 1c2a21b..9169728 100644 --- a/cmd/forgebucket/main.go +++ b/cmd/forgebucket/main.go @@ -19,6 +19,7 @@ import ( "github.com/forgeo/forgebucket/internal/db" "github.com/forgeo/forgebucket/internal/domain/ci" gitdomain "github.com/forgeo/forgebucket/internal/domain/git" + "github.com/forgeo/forgebucket/internal/domain/gitops" "github.com/forgeo/forgebucket/internal/events" "github.com/forgeo/forgebucket/internal/models/migrations" "github.com/forgeo/forgebucket/web" @@ -73,6 +74,9 @@ func main() { runnerMgr := ci.NewRunnerManager(engine, bus, cfg, 4) go runnerMgr.Start(ciCtx) + gitopsCtrl := gitops.NewController(engine, bus, cfg) + go gitopsCtrl.Start(ciCtx) + handler := api.New(cfg, engine, store, bus, cfg.ArtifactRoot, web.FS()) srv := &http.Server{ diff --git a/internal/api/handlers/environment.go b/internal/api/handlers/environment.go index 38276b7..55d3279 100644 --- a/internal/api/handlers/environment.go +++ b/internal/api/handlers/environment.go @@ -340,26 +340,15 @@ func (h *EnvironmentHandler) resolveEnv(w http.ResponseWriter, r *http.Request) return &env, true } -type deployEventPayload struct { - DeploymentID int64 `json:"deploymentId"` - EnvID int64 `json:"envId"` - EnvName string `json:"envName"` - RepoID int64 `json:"repoId"` - SHA string `json:"sha"` - Ref string `json:"ref"` - Status models.DeployStatus `json:"status"` - TriggeredBy string `json:"triggeredBy"` -} - func (h *EnvironmentHandler) publishDeployEvent(subject string, env *models.Environment, d *models.Deployment) { - h.bus.Publish(subject, deployEventPayload{ //nolint:errcheck + h.bus.Publish(subject, events.DeploymentEvent{ //nolint:errcheck DeploymentID: d.ID, EnvID: env.ID, EnvName: env.Name, RepoID: d.RepoID, SHA: d.SHA, Ref: d.Ref, - Status: d.Status, + Status: string(d.Status), TriggeredBy: d.TriggeredBy, }) } diff --git a/internal/api/handlers/gitops.go b/internal/api/handlers/gitops.go new file mode 100644 index 0000000..a3cad46 --- /dev/null +++ b/internal/api/handlers/gitops.go @@ -0,0 +1,252 @@ +package handlers + +import ( + "encoding/json" + "net/http" + "strconv" + "time" + + "github.com/go-chi/chi/v5" + "xorm.io/xorm" + + "github.com/forgeo/forgebucket/internal/events" + "github.com/forgeo/forgebucket/internal/models" +) + +type GitOpsHandler struct { + db *xorm.Engine + bus events.EventBus +} + +func NewGitOpsHandler(db *xorm.Engine, bus events.EventBus) *GitOpsHandler { + return &GitOpsHandler{db: db, bus: bus} +} + +// GetConfig returns the GitOpsConfig for an environment, or 404 if not configured. +func (h *GitOpsHandler) GetConfig(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + var cfg models.GitOpsConfig + if found, _ := h.db.Where("env_id = ?", env.ID).Get(&cfg); !found { + jsonError(w, "gitops not configured for this environment", http.StatusNotFound) + return + } + jsonOK(w, cfg) +} + +// UpsertConfig creates or replaces the GitOpsConfig for an environment. +func (h *GitOpsHandler) UpsertConfig(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + + var body struct { + Branch string `json:"branch"` + AutoSync bool `json:"autoSync"` + SyncInterval int `json:"syncInterval"` + } + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + jsonError(w, "invalid request body", http.StatusBadRequest) + return + } + if body.Branch == "" { + jsonError(w, "branch is required", http.StatusBadRequest) + return + } + + var cfg models.GitOpsConfig + exists, _ := h.db.Where("env_id = ?", env.ID).Get(&cfg) + + cfg.EnvID = env.ID + cfg.RepoID = env.RepoID + cfg.Branch = body.Branch + cfg.AutoSync = body.AutoSync + cfg.SyncInterval = body.SyncInterval + if cfg.SyncStatus == "" { + cfg.SyncStatus = "unknown" + } + + var err error + if exists { + _, err = h.db.ID(cfg.ID).Cols("branch", "auto_sync", "sync_interval").Update(&cfg) + } else { + _, err = h.db.Insert(&cfg) + } + if err != nil { + jsonError(w, "could not save gitops config", http.StatusInternalServerError) + return + } + jsonOK(w, cfg) +} + +// DeleteConfig removes the GitOpsConfig for an environment without deleting deployments. +func (h *GitOpsHandler) DeleteConfig(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + if _, err := h.db.Where("env_id = ?", env.ID).Delete(&models.GitOpsConfig{}); err != nil { + jsonError(w, "could not delete gitops config", http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusNoContent) +} + +// TriggerSync manually initiates a reconciliation for the environment. +func (h *GitOpsHandler) TriggerSync(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + + var cfg models.GitOpsConfig + if found, _ := h.db.Where("env_id = ?", env.ID).Get(&cfg); !found { + jsonError(w, "gitops not configured for this environment", http.StatusNotFound) + return + } + if cfg.DesiredSHA == "" { + jsonError(w, "no desired SHA known yet — push to the configured branch first", http.StatusConflict) + return + } + if cfg.SyncStatus == "syncing" { + jsonError(w, "a sync is already in progress", http.StatusConflict) + return + } + + now := time.Now().UTC() + deploy := &models.Deployment{ + EnvID: env.ID, + RepoID: env.RepoID, + SHA: cfg.DesiredSHA, + Ref: "refs/heads/" + cfg.Branch, + Status: models.DeployStatusPending, + TriggeredBy: "gitops-manual", + Description: "Manual GitOps sync", + StartedAt: &now, + } + if _, err := h.db.Insert(deploy); err != nil { + jsonError(w, "could not create deployment", http.StatusInternalServerError) + return + } + + cfg.SyncStatus = "syncing" + h.db.ID(cfg.ID).Cols("sync_status").Update(&cfg) //nolint:errcheck + + h.bus.Publish(events.SubjectDeploymentStarted, events.DeploymentEvent{ //nolint:errcheck + DeploymentID: deploy.ID, + EnvID: env.ID, + EnvName: env.Name, + RepoID: deploy.RepoID, + SHA: deploy.SHA, + Ref: deploy.Ref, + Status: string(deploy.Status), + TriggeredBy: deploy.TriggeredBy, + }) + + w.WriteHeader(http.StatusCreated) + jsonOK(w, deploy) +} + +// GetDriftStatus returns the current sync status and SHA comparison for an environment. +func (h *GitOpsHandler) GetDriftStatus(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + + var cfg models.GitOpsConfig + if found, _ := h.db.Where("env_id = ?", env.ID).Get(&cfg); !found { + jsonError(w, "gitops not configured for this environment", http.StatusNotFound) + return + } + + type driftStatus struct { + SyncStatus string `json:"syncStatus"` + DesiredSHA string `json:"desiredSha"` + ActualSHA string `json:"actualSha"` + Branch string `json:"branch"` + IsDrifted bool `json:"isDrifted"` + } + jsonOK(w, driftStatus{ + SyncStatus: cfg.SyncStatus, + DesiredSHA: cfg.DesiredSHA, + ActualSHA: cfg.ActualSHA, + Branch: cfg.Branch, + IsDrifted: cfg.DesiredSHA != cfg.ActualSHA && cfg.DesiredSHA != "", + }) +} + +// ListDriftEvents returns the drift history for an environment, newest first. +func (h *GitOpsHandler) ListDriftEvents(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + + limit := 50 + if l, err := strconv.Atoi(r.URL.Query().Get("limit")); err == nil && l > 0 && l <= 200 { + limit = l + } + + var drifts []models.GitOpsDriftEvent + if err := h.db.Where("env_id = ?", env.ID).Desc("id").Limit(limit).Find(&drifts); err != nil { + jsonError(w, "could not list drift events", http.StatusInternalServerError) + return + } + if drifts == nil { + drifts = []models.GitOpsDriftEvent{} + } + jsonOK(w, drifts) +} + +// AcknowledgeDrift marks a drift event as acknowledged without triggering a sync. +func (h *GitOpsHandler) AcknowledgeDrift(w http.ResponseWriter, r *http.Request) { + env, ok := h.resolveGitOpsEnv(w, r) + if !ok { + return + } + + driftID, err := strconv.ParseInt(chi.URLParam(r, "driftID"), 10, 64) + if err != nil { + jsonError(w, "invalid drift event ID", http.StatusBadRequest) + return + } + + var drift models.GitOpsDriftEvent + if found, _ := h.db.Where("id = ? AND env_id = ?", driftID, env.ID).Get(&drift); !found { + jsonError(w, "drift event not found", http.StatusNotFound) + return + } + if drift.ResolvedAt != nil { + jsonError(w, "drift event is already resolved", http.StatusConflict) + return + } + + now := time.Now().UTC() + drift.SyncStatus = "acknowledged" + drift.ResolvedAt = &now + if _, err := h.db.ID(drift.ID).Cols("sync_status", "resolved_at").Update(&drift); err != nil { + jsonError(w, "could not acknowledge drift", http.StatusInternalServerError) + return + } + jsonOK(w, drift) +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +func (h *GitOpsHandler) resolveGitOpsEnv(w http.ResponseWriter, r *http.Request) (*models.Environment, bool) { + repoID, ok := resolveRepoID(h.db, w, r) + if !ok { + return nil, false + } + envName := chi.URLParam(r, "envName") + var env models.Environment + if found, _ := h.db.Where("repo_id = ? AND name = ?", repoID, envName).Get(&env); !found { + jsonError(w, "environment not found", http.StatusNotFound) + return nil, false + } + return &env, true +} diff --git a/internal/api/router.go b/internal/api/router.go index 4183d21..c582754 100644 --- a/internal/api/router.go +++ b/internal/api/router.go @@ -58,6 +58,7 @@ func New(cfg *config.Config, engine *xorm.Engine, store sessions.Store, bus even auditH := handlers.NewAuditHandler(engine) artifactH := handlers.NewArtifactHandler(engine, artifactRoot) runnerH := handlers.NewRunnerHandler(engine) + gitopsH := handlers.NewGitOpsHandler(engine, bus) envH := handlers.NewEnvironmentHandler(engine, bus) timelineH := handlers.NewTimelineHandler(engine, cfg.RepoRoot) workspaceH := handlers.NewWorkspaceHandler(engine, cfg) @@ -254,6 +255,15 @@ func New(cfg *config.Config, engine *xorm.Engine, store sessions.Store, bus even r.Get("/secrets", secretH.ListEnvSecrets) r.With(csrf).Post("/secrets", secretH.UpsertEnvSecret) r.With(csrf).Delete("/secrets/{name}", secretH.DeleteEnvSecret) + r.Route("/gitops", func(r chi.Router) { + r.Get("/", gitopsH.GetConfig) + r.With(csrf).Put("/", gitopsH.UpsertConfig) + r.With(csrf).Delete("/", gitopsH.DeleteConfig) + r.With(csrf).Post("/sync", gitopsH.TriggerSync) + r.Get("/drift", gitopsH.GetDriftStatus) + r.Get("/drift/history", gitopsH.ListDriftEvents) + r.With(csrf).Post("/drift/{driftID}/acknowledge", gitopsH.AcknowledgeDrift) + }) }) }) }) diff --git a/internal/config/config.go b/internal/config/config.go index 313b9a5..48fe72b 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -30,6 +30,9 @@ type Config struct { // Event bus NATSUrl string + // GitOps + GitOpsReconcileInterval int // seconds between periodic drift checks; 0 disables + // Federation InstanceURL string InstanceName string @@ -46,7 +49,8 @@ func Load() (*Config, error) { ArtifactRoot: getEnv("ARTIFACT_ROOT", filepath.Join(filepath.Dir(repoRoot), "artifacts")), Debug: getEnvBool("DEBUG", false), - NATSUrl: getEnv("NATS_URL", ""), + NATSUrl: getEnv("NATS_URL", ""), + GitOpsReconcileInterval: getEnvInt("GITOPS_RECONCILE_INTERVAL", 300), InstanceURL: getEnv("INSTANCE_URL", ""), InstanceName: getEnv("INSTANCE_NAME", "ForgeBucket"), } @@ -91,6 +95,18 @@ func getEnv(key, fallback string) string { return fallback } +func getEnvInt(key string, fallback int) int { + v := os.Getenv(key) + if v == "" { + return fallback + } + n, err := strconv.Atoi(v) + if err != nil { + return fallback + } + return n +} + func getEnvBool(key string, fallback bool) bool { v := os.Getenv(key) if v == "" { diff --git a/internal/domain/git/binary.go b/internal/domain/git/binary.go index efb1d7b..9b5dfd8 100644 --- a/internal/domain/git/binary.go +++ b/internal/domain/git/binary.go @@ -283,6 +283,15 @@ func RepoSize(repoPath string) int64 { return total } +// RevParse resolves a ref (branch name, tag, or SHA) to its full commit SHA. +func RevParse(repoPath, ref string) (string, error) { + out, err := run(repoPath, "rev-parse", "--verify", ref) + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + // SetDefaultBranch updates HEAD to point at the given branch name. func SetDefaultBranch(repoPath, branch string) error { _, err := run(repoPath, "symbolic-ref", "HEAD", "refs/heads/"+branch) diff --git a/internal/domain/gitops/controller.go b/internal/domain/gitops/controller.go new file mode 100644 index 0000000..b7a609a --- /dev/null +++ b/internal/domain/gitops/controller.go @@ -0,0 +1,95 @@ +package gitops + +import ( + "context" + "encoding/json" + "log" + "time" + + "xorm.io/xorm" + + "github.com/forgeo/forgebucket/internal/config" + "github.com/forgeo/forgebucket/internal/events" + "github.com/forgeo/forgebucket/internal/models" +) + +// Controller is the GitOps reconciliation engine. It subscribes to NATS events +// and drives drift detection + auto-sync for every configured environment. +type Controller struct { + db *xorm.Engine + bus events.EventBus + cfg *config.Config +} + +func NewController(db *xorm.Engine, bus events.EventBus, cfg *config.Config) *Controller { + return &Controller{db: db, bus: bus, cfg: cfg} +} + +// Start subscribes to relevant events and blocks until ctx is cancelled. +func (c *Controller) Start(ctx context.Context) { + c.recoverSyncingState() + + unsub1, err := c.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) { + var evt events.PushEvent + if err := json.Unmarshal(data, &evt); err != nil { + log.Printf("gitops: bad push.received payload: %v", err) + return + } + go c.handlePush(evt) + }) + if err != nil { + log.Printf("gitops: subscribe push.received: %v", err) + } else { + defer unsub1() + } + + unsub2, err := c.bus.Subscribe(events.SubjectDeploymentSucceeded, func(_ string, data []byte) { + go c.handleDeploymentSucceeded(data) + }) + if err != nil { + log.Printf("gitops: subscribe deployment.succeeded: %v", err) + } else { + defer unsub2() + } + + unsub3, err := c.bus.Subscribe(events.SubjectDeploymentFailed, func(_ string, data []byte) { + go c.handleDeploymentFailed(data) + }) + if err != nil { + log.Printf("gitops: subscribe deployment.failed: %v", err) + } else { + defer unsub3() + } + + if c.cfg.GitOpsReconcileInterval > 0 { + go c.runTicker(ctx) + } + + log.Printf("gitops: controller started (reconcile interval: %ds)", c.cfg.GitOpsReconcileInterval) + <-ctx.Done() +} + +func (c *Controller) runTicker(ctx context.Context) { + interval := time.Duration(c.cfg.GitOpsReconcileInterval) * time.Second + ticker := time.NewTicker(interval) + defer ticker.Stop() + for { + select { + case <-ticker.C: + c.periodicCheck() + case <-ctx.Done(): + return + } + } +} + +// recoverSyncingState marks any configs left in "syncing" as "drifted" on startup +// (they were in-flight when the server last stopped). +func (c *Controller) recoverSyncingState() { + affected, _ := c.db.Where("sync_status = 'syncing'"). + Cols("sync_status"). + Update(&models.GitOpsConfig{SyncStatus: "drifted"}) + if affected > 0 { + log.Printf("gitops: recovered %d stale syncing configs → drifted", affected) + } +} diff --git a/internal/domain/gitops/drift.go b/internal/domain/gitops/drift.go new file mode 100644 index 0000000..e78f2f4 --- /dev/null +++ b/internal/domain/gitops/drift.go @@ -0,0 +1,168 @@ +package gitops + +import ( + "log" + "strings" + "time" + + "xorm.io/xorm" + + gitdomain "github.com/forgeo/forgebucket/internal/domain/git" + "github.com/forgeo/forgebucket/internal/events" + "github.com/forgeo/forgebucket/internal/models" +) + +// CheckDrift resolves the HEAD SHA of branch in the repo at repoPath and +// compares it against actualSHA. Returns the resolved HEAD SHA, whether drift +// exists, and any error. +func CheckDrift(repoPath, branch, actualSHA string) (desiredSHA string, drifted bool, err error) { + sha, err := gitdomain.RevParse(repoPath, branch) + if err != nil { + return "", false, err + } + return sha, sha != actualSHA, nil +} + +// refToBranch strips the refs/heads/ prefix from a full git ref. +// Returns "" for non-branch refs (tags, etc.). +func refToBranch(ref string) string { + return strings.TrimPrefix(ref, "refs/heads/") +} + +// handlePush is called on every push.received event. For each GitOpsConfig +// on the pushed repo whose branch matches, it runs a drift check. +func (c *Controller) handlePush(evt events.PushEvent) { + pushedBranch := refToBranch(evt.Ref) + if pushedBranch == "" { + return // tag push or other non-branch ref — ignore + } + + var cfgs []models.GitOpsConfig + if err := c.db.Where("repo_id = ?", evt.RepoID).Find(&cfgs); err != nil { + return + } + + for _, cfg := range cfgs { + if cfg.Branch != pushedBranch { + continue + } + c.evaluateDrift(cfg, evt.After) + } +} + +// evaluateDrift compares desiredSHA against the config's ActualSHA and takes +// the appropriate action: record drift and optionally auto-sync. +func (c *Controller) evaluateDrift(cfg models.GitOpsConfig, desiredSHA string) { + now := time.Now().UTC() + cfg.LastCheckedAt = &now + cfg.DesiredSHA = desiredSHA + + if desiredSHA == cfg.ActualSHA { + // Already in sync. + cfg.SyncStatus = "synced" + c.db.ID(cfg.ID).Cols("sync_status", "desired_sha", "last_checked_at").Update(&cfg) //nolint:errcheck + return + } + + // Drift detected — record and publish. + log.Printf("gitops: drift on env %d: desired=%s actual=%s", cfg.EnvID, desiredSHA[:7], sha7(cfg.ActualSHA)) + + drift := &models.GitOpsDriftEvent{ + EnvID: cfg.EnvID, + RepoID: cfg.RepoID, + DesiredSHA: desiredSHA, + ActualSHA: cfg.ActualSHA, + SyncStatus: "drifted", + DetectedAt: now, + } + c.db.Insert(drift) //nolint:errcheck + + cfg.SyncStatus = "drifted" + c.db.ID(cfg.ID).Cols("sync_status", "desired_sha", "last_checked_at").Update(&cfg) //nolint:errcheck + + // Look up env name for the event payload. + var env models.Environment + c.db.ID(cfg.EnvID).Get(&env) //nolint:errcheck + + c.bus.Publish(events.SubjectEnvironmentDriftDetected, events.DriftEvent{ //nolint:errcheck + EnvID: cfg.EnvID, + EnvName: env.Name, + RepoID: cfg.RepoID, + DesiredSHA: desiredSHA, + ActualSHA: cfg.ActualSHA, + At: now, + }) + + if cfg.AutoSync { + c.TriggerSync(cfg, desiredSHA) + } +} + +// periodicCheck runs on a ticker and re-evaluates drift for every GitOpsConfig +// whose SyncInterval has elapsed. +func (c *Controller) periodicCheck() { + now := time.Now().UTC() + + var cfgs []models.GitOpsConfig + if err := c.db.Where("sync_interval > 0").Find(&cfgs); err != nil { + return + } + + for _, cfg := range cfgs { + elapsed := now.Unix() - lastChecked(cfg).Unix() + if int(elapsed) < cfg.SyncInterval { + continue + } + + var repo models.Repository + if found, _ := c.db.ID(cfg.RepoID).Get(&repo); !found { + continue + } + + desiredSHA, drifted, err := CheckDrift(repo.DiskPath, cfg.Branch, cfg.ActualSHA) + if err != nil { + log.Printf("gitops: periodic check env %d: %v", cfg.EnvID, err) + now2 := time.Now().UTC() + cfg.LastCheckedAt = &now2 + c.db.ID(cfg.ID).Cols("last_checked_at").Update(&cfg) //nolint:errcheck + continue + } + + if drifted { + c.evaluateDrift(cfg, desiredSHA) + } else { + now2 := time.Now().UTC() + cfg.LastCheckedAt = &now2 + c.db.ID(cfg.ID).Cols("last_checked_at").Update(&cfg) //nolint:errcheck + } + } +} + +// markSynced resolves any open drift events for envID and updates the config. +func markSynced(db *xorm.Engine, envID int64, sha string) { + now := time.Now().UTC() + db.Where("env_id = ? AND resolved_at IS NULL", envID). + Cols("sync_status", "resolved_at"). + Update(&models.GitOpsDriftEvent{SyncStatus: "synced", ResolvedAt: &now}) //nolint:errcheck + + db.Where("env_id = ?", envID). + Cols("sync_status", "actual_sha", "last_checked_at"). + Update(&models.GitOpsConfig{SyncStatus: "synced", ActualSHA: sha, LastCheckedAt: &now}) //nolint:errcheck +} + +func lastChecked(cfg models.GitOpsConfig) time.Time { + if cfg.LastCheckedAt != nil { + return *cfg.LastCheckedAt + } + return cfg.CreatedAt +} + +func sha7(s string) string { + if len(s) >= 7 { + return s[:7] + } + if s == "" { + return "(none)" + } + return s +} diff --git a/internal/domain/gitops/reconciler.go b/internal/domain/gitops/reconciler.go new file mode 100644 index 0000000..8db2ec8 --- /dev/null +++ b/internal/domain/gitops/reconciler.go @@ -0,0 +1,97 @@ +package gitops + +import ( + "encoding/json" + "log" + "time" + + "github.com/forgeo/forgebucket/internal/events" + "github.com/forgeo/forgebucket/internal/models" +) + +// TriggerSync creates a Deployment record in "pending" state and fires +// deployment.started — the same path as a manually-triggered deployment. +// GitOps is just the trigger; actual execution is handled externally (or via CI). +func (c *Controller) TriggerSync(cfg models.GitOpsConfig, desiredSHA string) { + var env models.Environment + if found, _ := c.db.ID(cfg.EnvID).Get(&env); !found { + log.Printf("gitops: sync env %d not found", cfg.EnvID) + return + } + + now := time.Now().UTC() + deploy := &models.Deployment{ + EnvID: cfg.EnvID, + RepoID: cfg.RepoID, + SHA: desiredSHA, + Ref: "refs/heads/" + cfg.Branch, + Status: models.DeployStatusPending, + TriggeredBy: "gitops", + Description: "GitOps auto-sync", + StartedAt: &now, + } + if _, err := c.db.Insert(deploy); err != nil { + log.Printf("gitops: create deployment: %v", err) + return + } + + cfg.SyncStatus = "syncing" + c.db.ID(cfg.ID).Cols("sync_status").Update(&cfg) //nolint:errcheck + + c.bus.Publish(events.SubjectDeploymentStarted, events.DeploymentEvent{ //nolint:errcheck + DeploymentID: deploy.ID, + EnvID: env.ID, + EnvName: env.Name, + RepoID: deploy.RepoID, + SHA: deploy.SHA, + Ref: deploy.Ref, + Status: string(deploy.Status), + TriggeredBy: deploy.TriggeredBy, + }) + + log.Printf("gitops: triggered sync deploy %d for env %d (%s)", deploy.ID, cfg.EnvID, desiredSHA[:7]) +} + +// handleDeploymentSucceeded is called when any deployment.succeeded event fires. +// If the deployment was GitOps-triggered, it marks the config as synced. +func (c *Controller) handleDeploymentSucceeded(data []byte) { + var evt events.DeploymentEvent + if err := json.Unmarshal(data, &evt); err != nil { + return + } + + // Only act on deployments triggered by gitops. + if evt.TriggeredBy != "gitops" { + // Still update ActualSHA and resolve drift if this env has a GitOps config — + // manual deployments also advance the state. + var cfg models.GitOpsConfig + if found, _ := c.db.Where("env_id = ?", evt.EnvID).Get(&cfg); found { + markSynced(c.db, evt.EnvID, evt.SHA) + log.Printf("gitops: env %d synced via manual deploy (%s)", evt.EnvID, sha7(evt.SHA)) + } + return + } + + markSynced(c.db, evt.EnvID, evt.SHA) + log.Printf("gitops: env %d synced (%s)", evt.EnvID, sha7(evt.SHA)) +} + +// handleDeploymentFailed is called when deployment.failed fires. +// If the deployment was GitOps-triggered, it reverts SyncStatus back to drifted. +func (c *Controller) handleDeploymentFailed(data []byte) { + var evt events.DeploymentEvent + if err := json.Unmarshal(data, &evt); err != nil { + return + } + if evt.TriggeredBy != "gitops" { + return + } + + var cfg models.GitOpsConfig + if found, _ := c.db.Where("env_id = ?", evt.EnvID).Get(&cfg); !found { + return + } + cfg.SyncStatus = "drifted" + c.db.ID(cfg.ID).Cols("sync_status").Update(&cfg) //nolint:errcheck + log.Printf("gitops: env %d sync failed — reverting to drifted", evt.EnvID) +} diff --git a/internal/events/types.go b/internal/events/types.go index 77260ad..235d5ff 100644 --- a/internal/events/types.go +++ b/internal/events/types.go @@ -79,6 +79,29 @@ type LogChunkEvent struct { Content string `json:"content"` } +// DeploymentEvent is published on deployment lifecycle transitions. +// It matches the payload shape used by EnvironmentHandler.publishDeployEvent. +type DeploymentEvent struct { + DeploymentID int64 `json:"deploymentId"` + EnvID int64 `json:"envId"` + EnvName string `json:"envName"` + RepoID int64 `json:"repoId"` + SHA string `json:"sha"` + Ref string `json:"ref"` + Status string `json:"status"` + TriggeredBy string `json:"triggeredBy"` +} + +// DriftEvent is published when an environment's actual state diverges from desired. +type DriftEvent struct { + EnvID int64 `json:"envId"` + EnvName string `json:"envName"` + RepoID int64 `json:"repoId"` + DesiredSHA string `json:"desiredSha"` + ActualSHA string `json:"actualSha"` + At time.Time `json:"at"` +} + // WSEnvelope wraps any event for delivery over the WebSocket connection. type WSEnvelope struct { Subject string `json:"subject"` diff --git a/internal/models/gitops.go b/internal/models/gitops.go new file mode 100644 index 0000000..ef14894 --- /dev/null +++ b/internal/models/gitops.go @@ -0,0 +1,32 @@ +package models + +import "time" + +// GitOpsConfig links an Environment to a branch that serves as its desired state. +// When the HEAD SHA of Branch diverges from ActualSHA, the environment is "drifted". +type GitOpsConfig struct { + ID int64 `xorm:"'id' pk autoincr" json:"id"` + EnvID int64 `xorm:"'env_id' unique notnull index" json:"envId"` // one config per env + RepoID int64 `xorm:"'repo_id' notnull index" json:"repoId"` + Branch string `xorm:"'branch' varchar(255) notnull" json:"branch"` // source-of-truth branch + AutoSync bool `xorm:"'auto_sync' default false" json:"autoSync"` // create deployment on drift + SyncInterval int `xorm:"'sync_interval' default 0" json:"syncInterval"` // seconds; 0 = push-only + SyncStatus string `xorm:"'sync_status' varchar(20) default 'unknown'" json:"syncStatus"` // unknown/synced/drifted/syncing + DesiredSHA string `xorm:"'desired_sha' varchar(40)" json:"desiredSha"` // last known branch HEAD + ActualSHA string `xorm:"'actual_sha' varchar(40)" json:"actualSha"` // SHA of last successful deploy + LastCheckedAt *time.Time `xorm:"'last_checked_at'" json:"lastCheckedAt"` + CreatedAt time.Time `xorm:"'created_at' created" json:"createdAt"` + UpdatedAt time.Time `xorm:"'updated_at' updated" json:"updatedAt"` +} + +// GitOpsDriftEvent is an append-only record of each drift detection and its resolution. +type GitOpsDriftEvent struct { + ID int64 `xorm:"'id' pk autoincr" json:"id"` + EnvID int64 `xorm:"'env_id' notnull index" json:"envId"` + RepoID int64 `xorm:"'repo_id' notnull index" json:"repoId"` + DesiredSHA string `xorm:"'desired_sha' varchar(40)" json:"desiredSha"` // SHA that should be deployed + ActualSHA string `xorm:"'actual_sha' varchar(40)" json:"actualSha"` // SHA actually deployed (empty = never) + SyncStatus string `xorm:"'sync_status' varchar(20)" json:"syncStatus"` // drifted/synced/acknowledged + DetectedAt time.Time `xorm:"'detected_at' notnull index" json:"detectedAt"` + ResolvedAt *time.Time `xorm:"'resolved_at'" json:"resolvedAt"` +} diff --git a/internal/models/migrations/001_init.go b/internal/models/migrations/001_init.go index b8bda03..72215da 100644 --- a/internal/models/migrations/001_init.go +++ b/internal/models/migrations/001_init.go @@ -46,5 +46,8 @@ func Run(engine *xorm.Engine) error { if err := Run011(engine); err != nil { return err } - return Run012(engine) + if err := Run012(engine); err != nil { + return err + } + return Run013(engine) } diff --git a/internal/models/migrations/013_gitops.go b/internal/models/migrations/013_gitops.go new file mode 100644 index 0000000..eecefe1 --- /dev/null +++ b/internal/models/migrations/013_gitops.go @@ -0,0 +1,13 @@ +package migrations + +import ( + "github.com/forgeo/forgebucket/internal/models" + "xorm.io/xorm" +) + +func Run013(engine *xorm.Engine) error { + return engine.Sync2( + &models.GitOpsConfig{}, + &models.GitOpsDriftEvent{}, + ) +}