completed phase 2b

2026-05-11 20:10:45 +02:00
parent 83d96d0a1e
commit 4002a3b84d
20 changed files with 1566 additions and 50 deletions
@@ -0,0 +1,78 @@
+package ci
+
+import "fmt"
+
+// dagNode holds a job name and its resolved dependencies.
+type dagNode struct {
+	name  string
+	needs []string
+}
+
+// TopoSort returns the job names in a valid topological execution order.
+// Returns an error if the dependency graph has cycles or references unknown jobs.
+func TopoSort(jobs map[string]WorkflowJob) ([]string, error) {
+	nodes := make(map[string]*dagNode, len(jobs))
+	for name, job := range jobs {
+		nodes[name] = &dagNode{name: name, needs: []string(job.Needs)}
+	}
+	// Validate all dependencies exist.
+	for _, node := range nodes {
+		for _, dep := range node.needs {
+			if _, ok := nodes[dep]; !ok {
+				return nil, fmt.Errorf("job %q depends on unknown job %q", node.name, dep)
+			}
+		}
+	}
+
+	var order []string
+	visited := make(map[string]bool, len(nodes))
+	inStack := make(map[string]bool, len(nodes))
+
+	var visit func(name string) error
+	visit = func(name string) error {
+		if inStack[name] {
+			return fmt.Errorf("cycle detected at job %q", name)
+		}
+		if visited[name] {
+			return nil
+		}
+		inStack[name] = true
+		for _, dep := range nodes[name].needs {
+			if err := visit(dep); err != nil {
+				return err
+			}
+		}
+		inStack[name] = false
+		visited[name] = true
+		order = append(order, name)
+		return nil
+	}
+
+	for name := range nodes {
+		if err := visit(name); err != nil {
+			return nil, err
+		}
+	}
+	return order, nil
+}
+
+// ReadyJobs returns the names of jobs whose dependencies are all in completedJobs.
+func ReadyJobs(jobs map[string]WorkflowJob, completedJobs map[string]bool, enqueuedJobs map[string]bool) []string {
+	var ready []string
+	for name, job := range jobs {
+		if enqueuedJobs[name] {
+			continue
+		}
+		allDone := true
+		for _, dep := range job.Needs {
+			if !completedJobs[dep] {
+				allDone = false
+				break
+			}
+		}
+		if allDone {
+			ready = append(ready, name)
+		}
+	}
+	return ready
+}
@@ -0,0 +1,271 @@
+package ci
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"xorm.io/xorm"
+
+	"github.com/forgeo/forgebucket/internal/events"
+	"github.com/forgeo/forgebucket/internal/models"
+)
+
+// JobContext holds everything needed to execute a single pipeline job.
+type JobContext struct {
+	Run   models.PipelineRun
+	Job   models.PipelineJob
+	Steps []models.PipelineStep
+	Repo  models.Repository
+}
+
+// ExecuteJob runs all steps of a job inside isolated Docker containers,
+// streams log output to NATS and the DB, then publishes job.completed or job.failed.
+func ExecuteJob(ctx context.Context, db *xorm.Engine, bus events.EventBus, jc JobContext, workspaceRoot string) {
+	now := time.Now().UTC()
+	jc.Job.Status = "running"
+	jc.Job.StartedAt = &now
+	db.ID(jc.Job.ID).Cols("status", "started_at").Update(&jc.Job) //nolint:errcheck
+
+	// Extract repo snapshot into a workspace directory.
+	workDir, err := extractWorkspace(jc.Repo.DiskPath, jc.Run.TriggerSHA, workspaceRoot, jc.Run.ID)
+	if err != nil {
+		failJob(db, bus, jc, fmt.Sprintf("workspace setup failed: %v", err))
+		return
+	}
+	defer os.RemoveAll(workDir)
+
+	image := jc.Job.Image
+	if image == "" {
+		image = "ubuntu:22.04"
+	}
+
+	// Pull image once per job (non-fatal if pull fails and image exists locally).
+	pullCmd := exec.CommandContext(ctx, "docker", "pull", image)
+	pullCmd.Run() //nolint:errcheck
+
+	for i := range jc.Steps {
+		step := &jc.Steps[i]
+		if step.UsesAction == "checkout" {
+			// Built-in checkout: workspace is already set up by extractWorkspace.
+			markStep(db, step, "succeeded", 0)
+			continue
+		}
+		if step.RunCmd == "" {
+			markStep(db, step, "skipped", 0)
+			continue
+		}
+		exitCode, err := runStep(ctx, db, bus, jc.Run.ID, jc.Job.ID, step, image, workDir)
+		if err != nil || exitCode != 0 {
+			if exitCode == 0 {
+				exitCode = 1
+			}
+			markStep(db, step, "failed", exitCode)
+			failJob(db, bus, jc, fmt.Sprintf("step %q exited %d", step.Name, exitCode))
+			return
+		}
+		markStep(db, step, "succeeded", 0)
+	}
+
+	fin := time.Now().UTC()
+	jc.Job.Status = "succeeded"
+	jc.Job.FinishedAt = &fin
+	db.ID(jc.Job.ID).Cols("status", "finished_at").Update(&jc.Job) //nolint:errcheck
+	bus.Publish(events.SubjectJobCompleted, events.JobEvent{ //nolint:errcheck
+		RunID: jc.Run.ID, JobID: jc.Job.ID, Status: "succeeded", At: fin,
+	})
+}
+
+// runStep runs a single shell-command step inside a Docker container.
+func runStep(ctx context.Context, db *xorm.Engine, bus events.EventBus,
+	runID, jobID int64, step *models.PipelineStep, image, workDir string) (int, error) {
+
+	now := time.Now().UTC()
+	step.Status = "running"
+	step.StartedAt = &now
+	db.ID(step.ID).Cols("status", "started_at").Update(step) //nolint:errcheck
+
+	cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
+		"-v", workDir+":/workspace",
+		"-w", "/workspace",
+		"--network=none", // no network by default; Phase 2C will add network scopes
+		image,
+		"/bin/sh", "-ec", step.RunCmd,
+	)
+
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		return 1, err
+	}
+	cmd.Stderr = cmd.Stdout // merge stderr into stdout
+
+	if err := cmd.Start(); err != nil {
+		return 1, fmt.Errorf("docker run: %w", err)
+	}
+
+	chunk := 0
+	scanner := bufio.NewScanner(stdout)
+	for scanner.Scan() {
+		line := scanner.Text() + "\n"
+		writeLogChunk(db, bus, runID, jobID, step.ID, chunk, line)
+		chunk++
+	}
+
+	exitCode := 0
+	if err := cmd.Wait(); err != nil {
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			exitCode = exitErr.ExitCode()
+		} else {
+			exitCode = 1
+		}
+	}
+	return exitCode, nil
+}
+
+// extractWorkspace uses git-archive to export the repo at a given SHA into a
+// temporary directory under workspaceRoot.
+func extractWorkspace(repoPath, sha, workspaceRoot string, runID int64) (string, error) {
+	dir := filepath.Join(workspaceRoot, fmt.Sprintf("run-%d", runID))
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return "", err
+	}
+
+	archive := exec.Command("git", "archive", "--format=tar", sha)
+	archive.Dir = repoPath
+	archive.Env = []string{"GIT_TERMINAL_PROMPT=0", "HOME=/tmp"}
+
+	tar := exec.Command("tar", "-x", "-C", dir)
+	tar.Stdin, _ = archive.StdoutPipe()
+
+	if err := archive.Start(); err != nil {
+		os.RemoveAll(dir)
+		return "", fmt.Errorf("git archive: %w", err)
+	}
+	if err := tar.Start(); err != nil {
+		archive.Process.Kill() //nolint:errcheck
+		os.RemoveAll(dir)
+		return "", fmt.Errorf("tar: %w", err)
+	}
+
+	archiveErr := archive.Wait()
+	tarErr := tar.Wait()
+	if archiveErr != nil {
+		os.RemoveAll(dir)
+		return "", fmt.Errorf("git archive wait: %w", archiveErr)
+	}
+	if tarErr != nil {
+		os.RemoveAll(dir)
+		return "", fmt.Errorf("tar wait: %w", tarErr)
+	}
+	return dir, nil
+}
+
+func writeLogChunk(db *xorm.Engine, bus events.EventBus, runID, jobID, stepID int64, idx int, content string) {
+	entry := &models.PipelineStepLog{
+		StepID:     stepID,
+		ChunkIndex: idx,
+		Content:    content,
+	}
+	db.Insert(entry) //nolint:errcheck
+	bus.Publish(events.SubjectPipelineLog, events.LogChunkEvent{ //nolint:errcheck
+		RunID: runID, JobID: jobID, StepID: stepID, ChunkIndex: idx, Content: content,
+	})
+}
+
+func markStep(db *xorm.Engine, step *models.PipelineStep, status string, exitCode int) {
+	now := time.Now().UTC()
+	step.Status = status
+	step.ExitCode = exitCode
+	step.FinishedAt = &now
+	db.ID(step.ID).Cols("status", "exit_code", "finished_at").Update(step) //nolint:errcheck
+}
+
+func failJob(db *xorm.Engine, bus events.EventBus, jc JobContext, reason string) {
+	now := time.Now().UTC()
+	jc.Job.Status = "failed"
+	jc.Job.FinishedAt = &now
+	db.ID(jc.Job.ID).Cols("status", "finished_at").Update(&jc.Job) //nolint:errcheck
+
+	// Write the failure reason as a synthetic log line.
+	var lastStep models.PipelineStep
+	if found, _ := db.Where("job_id = ?", jc.Job.ID).Desc("seq").Get(&lastStep); found {
+		writeLogChunk(db, bus, jc.Run.ID, jc.Job.ID, lastStep.ID, 0,
+			"\n[ForgeBucket] Job failed: "+reason+"\n")
+	}
+
+	bus.Publish(events.SubjectJobFailed, events.JobEvent{ //nolint:errcheck
+		RunID: jc.Run.ID, JobID: jc.Job.ID, Status: "failed", At: now,
+	})
+}
+
+// workspaceDir returns the scratch directory root for CI job workspaces.
+func workspaceDir(artifactRoot string) string {
+	return filepath.Join(filepath.Dir(artifactRoot), "ci-workspaces")
+}
+
+// IsDockerAvailable checks whether the docker CLI is reachable.
+func IsDockerAvailable() bool {
+	cmd := exec.Command("docker", "info")
+	cmd.Env = []string{"HOME=/tmp"}
+	return cmd.Run() == nil
+}
+
+// stepsForJob loads PipelineStep rows for a job ordered by seq.
+func stepsForJob(db *xorm.Engine, jobID int64) ([]models.PipelineStep, error) {
+	var steps []models.PipelineStep
+	err := db.Where("job_id = ?", jobID).Asc("seq").Find(&steps)
+	return steps, err
+}
+
+// repoForRun loads the Repository for a given run.
+func repoForRun(db *xorm.Engine, runID int64) (models.Repository, models.PipelineRun, bool) {
+	var run models.PipelineRun
+	if found, _ := db.ID(runID).Get(&run); !found {
+		return models.Repository{}, run, false
+	}
+	var repo models.Repository
+	if found, _ := db.ID(run.RepoID).Get(&repo); !found {
+		return models.Repository{}, run, false
+	}
+	return repo, run, true
+}
+
+// buildJobContext assembles a JobContext from DB rows.
+func buildJobContext(db *xorm.Engine, jobID int64) (JobContext, bool) {
+	var job models.PipelineJob
+	if found, _ := db.ID(jobID).Get(&job); !found {
+		return JobContext{}, false
+	}
+	repo, run, ok := repoForRun(db, job.RunID)
+	if !ok {
+		return JobContext{}, false
+	}
+	steps, err := stepsForJob(db, jobID)
+	if err != nil {
+		return JobContext{}, false
+	}
+	return JobContext{Run: run, Job: job, Steps: steps, Repo: repo}, true
+}
+
+// pipeForRun returns the longest-matching step label for an image.
+// Phase 2B: unused placeholder for future label matching.
+func pipeForRun(_ string) string { return "" }
+
+// sanitizeImage prevents injection in docker image names.
+func sanitizeImage(image string) string {
+	// Allow only characters valid in Docker image references.
+	var b strings.Builder
+	for _, c := range image {
+		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+			(c >= '0' && c <= '9') || c == '.' || c == '-' || c == '_' ||
+			c == '/' || c == ':' || c == '@' {
+			b.WriteRune(c)
+		}
+	}
+	return b.String()
+}
@@ -0,0 +1,292 @@
+package ci
+
+import (
+	"context"
+	"encoding/json"
+	"log"
+	"strings"
+	"time"
+
+	"xorm.io/xorm"
+
+	"github.com/forgeo/forgebucket/internal/events"
+	"github.com/forgeo/forgebucket/internal/models"
+)
+
+// Orchestrator listens for push events, creates pipeline run records, and
+// advances the DAG as jobs complete. It does NOT execute jobs directly —
+// that is the RunnerManager's responsibility.
+type Orchestrator struct {
+	db  *xorm.Engine
+	bus events.EventBus
+}
+
+func NewOrchestrator(db *xorm.Engine, bus events.EventBus) *Orchestrator {
+	return &Orchestrator{db: db, bus: bus}
+}
+
+// Start subscribes to relevant NATS subjects and blocks until ctx is cancelled.
+func (o *Orchestrator) Start(ctx context.Context) {
+	o.recoverStaleRuns()
+
+	unsub1, err := o.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
+		var evt events.PushEvent
+		if err := json.Unmarshal(data, &evt); err != nil {
+			log.Printf("orchestrator: bad push event: %v", err)
+			return
+		}
+		go o.handlePush(evt)
+	})
+	if err != nil {
+		log.Printf("orchestrator: subscribe push.received: %v", err)
+	} else {
+		defer unsub1()
+	}
+
+	unsub2, err := o.bus.Subscribe(events.SubjectJobCompleted, func(_ string, data []byte) {
+		var evt events.JobEvent
+		if err := json.Unmarshal(data, &evt); err != nil {
+			return
+		}
+		go o.advanceDAG(evt.RunID, evt.JobID, "succeeded")
+	})
+	if err != nil {
+		log.Printf("orchestrator: subscribe job.completed: %v", err)
+	} else {
+		defer unsub2()
+	}
+
+	unsub3, err := o.bus.Subscribe(events.SubjectJobFailed, func(_ string, data []byte) {
+		var evt events.JobEvent
+		if err := json.Unmarshal(data, &evt); err != nil {
+			return
+		}
+		go o.advanceDAG(evt.RunID, evt.JobID, "failed")
+	})
+	if err != nil {
+		log.Printf("orchestrator: subscribe job.failed: %v", err)
+	} else {
+		defer unsub3()
+	}
+
+	<-ctx.Done()
+}
+
+// handlePush is called for every successful git push. It finds matching workflow
+// files, creates run records, and enqueues the first wave of jobs.
+func (o *Orchestrator) handlePush(evt events.PushEvent) {
+	// Ignore branch deletions (new SHA = all zeros).
+	if evt.After == "" || strings.Repeat("0", len(evt.After)) == evt.After {
+		return
+	}
+
+	var repo models.Repository
+	if found, _ := o.db.ID(evt.RepoID).Get(&repo); !found {
+		return
+	}
+
+	workflowPaths, err := ListWorkflows(repo.DiskPath, evt.After)
+	if err != nil || len(workflowPaths) == 0 {
+		return
+	}
+
+	for _, path := range workflowPaths {
+		wf, err := ParseWorkflow(repo.DiskPath, evt.After, path)
+		if err != nil {
+			log.Printf("orchestrator: parse workflow %s: %v", path, err)
+			continue
+		}
+		if !MatchesPushTrigger(wf, evt.Ref) {
+			continue
+		}
+		if err := o.createRun(repo, evt, wf, path); err != nil {
+			log.Printf("orchestrator: create run for %s: %v", path, err)
+		}
+	}
+}
+
+func (o *Orchestrator) createRun(repo models.Repository, evt events.PushEvent, wf *WorkflowFile, filePath string) error {
+	// Upsert the Pipeline definition record.
+	pipeline := &models.Pipeline{RepoID: repo.ID, FilePath: filePath}
+	has, _ := o.db.Where("repo_id = ? AND file_path = ?", repo.ID, filePath).Get(pipeline)
+	pipeline.Name = wf.Name
+	if pipeline.Name == "" {
+		pipeline.Name = filePath
+	}
+	if has {
+		o.db.ID(pipeline.ID).Cols("name").Update(pipeline) //nolint:errcheck
+	} else {
+		if _, err := o.db.Insert(pipeline); err != nil {
+			return err
+		}
+	}
+
+	// Validate DAG before writing anything.
+	if _, err := TopoSort(wf.Jobs); err != nil {
+		return err
+	}
+
+	now := time.Now().UTC()
+	run := &models.PipelineRun{
+		PipelineID:  pipeline.ID,
+		RepoID:      repo.ID,
+		TriggerRef:  evt.Ref,
+		TriggerSHA:  evt.After,
+		TriggeredBy: evt.Pusher,
+		Status:      "queued",
+		StartedAt:   &now,
+	}
+	if _, err := o.db.Insert(run); err != nil {
+		return err
+	}
+
+	// Create job + step records for every job in the workflow.
+	for jobName, wfJob := range wf.Jobs {
+		needsJSON, _ := json.Marshal([]string(wfJob.Needs))
+		job := &models.PipelineJob{
+			RunID:  run.ID,
+			Name:   jobName,
+			Image:  wfJob.RunsOn,
+			Needs:  string(needsJSON),
+			Status: "queued",
+		}
+		if _, err := o.db.Insert(job); err != nil {
+			return err
+		}
+		for seq, step := range wfJob.Steps {
+			s := &models.PipelineStep{
+				JobID:      job.ID,
+				Seq:        seq,
+				Name:       step.Name,
+				RunCmd:     step.Run,
+				UsesAction: step.Uses,
+				Status:     "queued",
+			}
+			if _, err := o.db.Insert(s); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Enqueue jobs with no dependencies.
+	o.enqueueReadyJobs(run.ID, wf.Jobs)
+
+	o.bus.Publish(events.SubjectPipelineTriggered, events.PipelineEvent{ //nolint:errcheck
+		RunID:  run.ID,
+		RepoID: repo.ID,
+		Status: "queued",
+		At:     now,
+	})
+
+	log.Printf("orchestrator: created run %d for %s/%s (%s)", run.ID, repo.Name, filePath, evt.After[:7])
+	return nil
+}
+
+// advanceDAG is called when a job finishes. It marks the job, checks whether
+// all jobs are done (completing the run) or enqueues the next wave.
+func (o *Orchestrator) advanceDAG(runID, jobID int64, result string) {
+	var job models.PipelineJob
+	if found, _ := o.db.ID(jobID).Get(&job); !found {
+		return
+	}
+	now := time.Now().UTC()
+	job.Status = result
+	job.FinishedAt = &now
+	o.db.ID(job.ID).Cols("status", "finished_at").Update(&job) //nolint:errcheck
+
+	var run models.PipelineRun
+	if found, _ := o.db.ID(runID).Get(&run); !found {
+		return
+	}
+
+	// Load all jobs for this run to check completion.
+	var allJobs []models.PipelineJob
+	o.db.Where("run_id = ?", runID).Find(&allJobs)
+
+	// If any job failed, cancel remaining queued jobs and fail the run.
+	if result == "failed" {
+		for _, j := range allJobs {
+			if j.Status == "queued" {
+				j.Status = "skipped"
+				o.db.ID(j.ID).Cols("status").Update(&j) //nolint:errcheck
+			}
+		}
+		run.Status = "failed"
+		run.FinishedAt = &now
+		o.db.ID(run.ID).Cols("status", "finished_at").Update(&run) //nolint:errcheck
+		o.bus.Publish(events.SubjectPipelineFailed, events.PipelineEvent{RunID: run.ID, RepoID: run.RepoID, Status: "failed", At: now}) //nolint:errcheck
+		return
+	}
+
+	// Check if all jobs are done.
+	allDone := true
+	for _, j := range allJobs {
+		if j.Status != "succeeded" && j.Status != "failed" && j.Status != "skipped" && j.Status != "cancelled" {
+			allDone = false
+			break
+		}
+	}
+	if allDone {
+		run.Status = "succeeded"
+		run.FinishedAt = &now
+		o.db.ID(run.ID).Cols("status", "finished_at").Update(&run) //nolint:errcheck
+		o.bus.Publish(events.SubjectPipelineCompleted, events.PipelineEvent{RunID: run.ID, RepoID: run.RepoID, Status: "succeeded", At: now}) //nolint:errcheck
+		return
+	}
+
+	// Reload the workflow to get the job dependency graph, then enqueue next wave.
+	var pipeline models.Pipeline
+	if found, _ := o.db.ID(run.PipelineID).Get(&pipeline); !found {
+		return
+	}
+	var repo models.Repository
+	if found, _ := o.db.ID(run.RepoID).Get(&repo); !found {
+		return
+	}
+	wf, err := ParseWorkflow(repo.DiskPath, run.TriggerSHA, pipeline.FilePath)
+	if err != nil {
+		return
+	}
+	o.enqueueReadyJobs(runID, wf.Jobs)
+}
+
+func (o *Orchestrator) enqueueReadyJobs(runID int64, wfJobs map[string]WorkflowJob) {
+	var dbJobs []models.PipelineJob
+	o.db.Where("run_id = ?", runID).Find(&dbJobs)
+
+	completedNames := make(map[string]bool)
+	enqueuedNames := make(map[string]bool)
+	for _, j := range dbJobs {
+		if j.Status == "succeeded" {
+			completedNames[j.Name] = true
+		}
+		if j.Status == "running" || j.Status == "succeeded" {
+			enqueuedNames[j.Name] = true
+		}
+	}
+
+	readyNames := ReadyJobs(wfJobs, completedNames, enqueuedNames)
+	for _, name := range readyNames {
+		for _, j := range dbJobs {
+			if j.Name == name && j.Status == "queued" {
+				o.bus.Publish(events.SubjectJobQueued, events.JobEvent{ //nolint:errcheck
+					RunID: runID,
+					JobID: j.ID,
+				})
+				break
+			}
+		}
+	}
+}
+
+// recoverStaleRuns marks any jobs/runs left in "running" state as failed
+// (they were interrupted by a previous server crash).
+func (o *Orchestrator) recoverStaleRuns() {
+	now := time.Now().UTC()
+	o.db.Where("status = 'running'").Cols("status", "finished_at").
+		Update(&models.PipelineRun{Status: "failed", FinishedAt: &now})   //nolint:errcheck
+	o.db.Where("status = 'running'").Cols("status", "finished_at").
+		Update(&models.PipelineJob{Status: "failed", FinishedAt: &now})   //nolint:errcheck
+	o.db.Where("status = 'running'").Cols("status", "finished_at").
+		Update(&models.PipelineStep{Status: "failed", FinishedAt: &now})  //nolint:errcheck
+}
@@ -0,0 +1,79 @@
+package ci
+
+import (
+	"fmt"
+	"strings"
+
+	gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
+	"gopkg.in/yaml.v3"
+)
+
+const workflowDir = ".forgebucket/workflows"
+
+// ListWorkflows returns the file paths of all workflow YAML files in a repo at a
+// given ref. Returns nil (no error) when the workflows directory doesn't exist.
+func ListWorkflows(repoPath, ref string) ([]string, error) {
+	entries, err := gitdomain.TreeLS(repoPath, ref, workflowDir)
+	if err != nil {
+		// Directory does not exist at this ref — no workflows, not an error.
+		return nil, nil
+	}
+	var paths []string
+	for _, e := range entries {
+		if e.Type == "blob" && (strings.HasSuffix(e.Name, ".yml") || strings.HasSuffix(e.Name, ".yaml")) {
+			paths = append(paths, workflowDir+"/"+e.Name)
+		}
+	}
+	return paths, nil
+}
+
+// ParseWorkflow reads and parses a single workflow YAML file from the repo at ref.
+func ParseWorkflow(repoPath, ref, filePath string) (*WorkflowFile, error) {
+	data, err := gitdomain.BlobCat(repoPath, ref, filePath)
+	if err != nil {
+		return nil, fmt.Errorf("read %s: %w", filePath, err)
+	}
+	var wf WorkflowFile
+	if err := yaml.Unmarshal(data, &wf); err != nil {
+		return nil, fmt.Errorf("parse %s: %w", filePath, err)
+	}
+	return &wf, nil
+}
+
+// MatchesPushTrigger reports whether a workflow should run for a push to ref.
+// ref is the full ref name, e.g. "refs/heads/main".
+func MatchesPushTrigger(wf *WorkflowFile, ref string) bool {
+	if wf.On.Push == nil {
+		return false
+	}
+	trigger := wf.On.Push
+	// No branch filter means "all branches".
+	if len(trigger.Branches) == 0 && len(trigger.Tags) == 0 {
+		return true
+	}
+	branch := strings.TrimPrefix(ref, "refs/heads/")
+	for _, pattern := range trigger.Branches {
+		if matchGlob(pattern, branch) {
+			return true
+		}
+	}
+	tag := strings.TrimPrefix(ref, "refs/tags/")
+	for _, pattern := range trigger.Tags {
+		if matchGlob(pattern, tag) {
+			return true
+		}
+	}
+	return false
+}
+
+// matchGlob supports simple "*" wildcards (not full glob).
+func matchGlob(pattern, s string) bool {
+	if pattern == "*" {
+		return true
+	}
+	if !strings.Contains(pattern, "*") {
+		return pattern == s
+	}
+	parts := strings.SplitN(pattern, "*", 2)
+	return strings.HasPrefix(s, parts[0]) && strings.HasSuffix(s, parts[1])
+}
@@ -0,0 +1,86 @@
+package ci
+
+import (
+	"context"
+	"encoding/json"
+	"log"
+
+	"xorm.io/xorm"
+
+	"github.com/forgeo/forgebucket/internal/config"
+	"github.com/forgeo/forgebucket/internal/events"
+)
+
+// RunnerManager subscribes to job.queued events and dispatches them to the
+// local Docker executor. A semaphore limits concurrent executions.
+type RunnerManager struct {
+	db  *xorm.Engine
+	bus events.EventBus
+	cfg *config.Config
+	sem chan struct{}
+}
+
+func NewRunnerManager(db *xorm.Engine, bus events.EventBus, cfg *config.Config, maxConcurrent int) *RunnerManager {
+	if maxConcurrent <= 0 {
+		maxConcurrent = 4
+	}
+	return &RunnerManager{
+		db:  db,
+		bus: bus,
+		cfg: cfg,
+		sem: make(chan struct{}, maxConcurrent),
+	}
+}
+
+// Start subscribes to job.queued and dispatches executions until ctx is cancelled.
+func (m *RunnerManager) Start(ctx context.Context) {
+	if !IsDockerAvailable() {
+		log.Printf("runner: Docker not available — CI execution disabled")
+		<-ctx.Done()
+		return
+	}
+	log.Printf("runner: started (max concurrent jobs: %d)", cap(m.sem))
+
+	wsDir := workspaceDir(m.cfg.ArtifactRoot)
+
+	unsub, err := m.bus.Subscribe(events.SubjectJobQueued, func(_ string, data []byte) {
+		var evt events.JobEvent
+		if err := json.Unmarshal(data, &evt); err != nil {
+			log.Printf("runner: bad job.queued payload: %v", err)
+			return
+		}
+
+		jc, ok := buildJobContext(m.db, evt.JobID)
+		if !ok {
+			log.Printf("runner: could not build job context for job %d", evt.JobID)
+			return
+		}
+
+		// Acquire semaphore slot — blocks if at capacity.
+		select {
+		case m.sem <- struct{}{}:
+		case <-ctx.Done():
+			return
+		}
+
+		go func() {
+			defer func() { <-m.sem }()
+			// Sanitize the Docker image name before execution.
+			jc.Job.Image = sanitizeImage(jc.Job.Image)
+			ExecuteJob(ctx, m.db, m.bus, jc, wsDir)
+		}()
+	})
+	if err != nil {
+		log.Printf("runner: subscribe job.queued: %v", err)
+		<-ctx.Done()
+		return
+	}
+	defer unsub()
+
+	<-ctx.Done()
+	log.Printf("runner: stopping — draining %d active jobs", len(m.sem))
+	// Wait for all running jobs to finish by filling the semaphore.
+	for i := 0; i < cap(m.sem); i++ {
+		m.sem <- struct{}{}
+	}
+}
@@ -0,0 +1,58 @@
+package ci
+
+import "gopkg.in/yaml.v3"
+
+// WorkflowFile is the parsed representation of a .forgebucket/workflows/*.yml file.
+type WorkflowFile struct {
+	Name string                 `yaml:"name"`
+	On   WorkflowTrigger        `yaml:"on"`
+	Jobs map[string]WorkflowJob `yaml:"jobs"`
+}
+
+type WorkflowTrigger struct {
+	Push        *PushTrigger `yaml:"push"`
+	PullRequest *PRTrigger   `yaml:"pull_request"`
+}
+
+type PushTrigger struct {
+	Branches []string `yaml:"branches"`
+	Tags     []string `yaml:"tags"`
+}
+
+type PRTrigger struct {
+	Branches []string `yaml:"branches"`
+}
+
+type WorkflowJob struct {
+	Name   string         `yaml:"name"`
+	RunsOn string         `yaml:"runs-on"`
+	Needs  StringOrSlice  `yaml:"needs"`
+	Steps  []WorkflowStep `yaml:"steps"`
+}
+
+type WorkflowStep struct {
+	Name string            `yaml:"name"`
+	Uses string            `yaml:"uses"`
+	Run  string            `yaml:"run"`
+	Env  map[string]string `yaml:"env"`
+}
+
+// StringOrSlice unmarshals a YAML value that may be either a single string
+// ("needs: test") or a list ("needs: [test, build]").
+type StringOrSlice []string
+
+func (s *StringOrSlice) UnmarshalYAML(value *yaml.Node) error {
+	switch value.Kind {
+	case yaml.ScalarNode:
+		if value.Value != "" {
+			*s = []string{value.Value}
+		}
+	case yaml.SequenceNode:
+		var items []string
+		if err := value.Decode(&items); err != nil {
+			return err
+		}
+		*s = items
+	}
+	return nil
+}