completed phase 2b
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
package ci
|
||||
|
||||
import "fmt"
|
||||
|
||||
// dagNode holds a job name and its resolved dependencies.
|
||||
type dagNode struct {
|
||||
name string
|
||||
needs []string
|
||||
}
|
||||
|
||||
// TopoSort returns the job names in a valid topological execution order.
|
||||
// Returns an error if the dependency graph has cycles or references unknown jobs.
|
||||
func TopoSort(jobs map[string]WorkflowJob) ([]string, error) {
|
||||
nodes := make(map[string]*dagNode, len(jobs))
|
||||
for name, job := range jobs {
|
||||
nodes[name] = &dagNode{name: name, needs: []string(job.Needs)}
|
||||
}
|
||||
// Validate all dependencies exist.
|
||||
for _, node := range nodes {
|
||||
for _, dep := range node.needs {
|
||||
if _, ok := nodes[dep]; !ok {
|
||||
return nil, fmt.Errorf("job %q depends on unknown job %q", node.name, dep)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var order []string
|
||||
visited := make(map[string]bool, len(nodes))
|
||||
inStack := make(map[string]bool, len(nodes))
|
||||
|
||||
var visit func(name string) error
|
||||
visit = func(name string) error {
|
||||
if inStack[name] {
|
||||
return fmt.Errorf("cycle detected at job %q", name)
|
||||
}
|
||||
if visited[name] {
|
||||
return nil
|
||||
}
|
||||
inStack[name] = true
|
||||
for _, dep := range nodes[name].needs {
|
||||
if err := visit(dep); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
inStack[name] = false
|
||||
visited[name] = true
|
||||
order = append(order, name)
|
||||
return nil
|
||||
}
|
||||
|
||||
for name := range nodes {
|
||||
if err := visit(name); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return order, nil
|
||||
}
|
||||
|
||||
// ReadyJobs returns the names of jobs whose dependencies are all in completedJobs.
|
||||
func ReadyJobs(jobs map[string]WorkflowJob, completedJobs map[string]bool, enqueuedJobs map[string]bool) []string {
|
||||
var ready []string
|
||||
for name, job := range jobs {
|
||||
if enqueuedJobs[name] {
|
||||
continue
|
||||
}
|
||||
allDone := true
|
||||
for _, dep := range job.Needs {
|
||||
if !completedJobs[dep] {
|
||||
allDone = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allDone {
|
||||
ready = append(ready, name)
|
||||
}
|
||||
}
|
||||
return ready
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
package ci
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"xorm.io/xorm"
|
||||
|
||||
"github.com/forgeo/forgebucket/internal/events"
|
||||
"github.com/forgeo/forgebucket/internal/models"
|
||||
)
|
||||
|
||||
// JobContext holds everything needed to execute a single pipeline job.
|
||||
type JobContext struct {
|
||||
Run models.PipelineRun
|
||||
Job models.PipelineJob
|
||||
Steps []models.PipelineStep
|
||||
Repo models.Repository
|
||||
}
|
||||
|
||||
// ExecuteJob runs all steps of a job inside isolated Docker containers,
|
||||
// streams log output to NATS and the DB, then publishes job.completed or job.failed.
|
||||
func ExecuteJob(ctx context.Context, db *xorm.Engine, bus events.EventBus, jc JobContext, workspaceRoot string) {
|
||||
now := time.Now().UTC()
|
||||
jc.Job.Status = "running"
|
||||
jc.Job.StartedAt = &now
|
||||
db.ID(jc.Job.ID).Cols("status", "started_at").Update(&jc.Job) //nolint:errcheck
|
||||
|
||||
// Extract repo snapshot into a workspace directory.
|
||||
workDir, err := extractWorkspace(jc.Repo.DiskPath, jc.Run.TriggerSHA, workspaceRoot, jc.Run.ID)
|
||||
if err != nil {
|
||||
failJob(db, bus, jc, fmt.Sprintf("workspace setup failed: %v", err))
|
||||
return
|
||||
}
|
||||
defer os.RemoveAll(workDir)
|
||||
|
||||
image := jc.Job.Image
|
||||
if image == "" {
|
||||
image = "ubuntu:22.04"
|
||||
}
|
||||
|
||||
// Pull image once per job (non-fatal if pull fails and image exists locally).
|
||||
pullCmd := exec.CommandContext(ctx, "docker", "pull", image)
|
||||
pullCmd.Run() //nolint:errcheck
|
||||
|
||||
for i := range jc.Steps {
|
||||
step := &jc.Steps[i]
|
||||
if step.UsesAction == "checkout" {
|
||||
// Built-in checkout: workspace is already set up by extractWorkspace.
|
||||
markStep(db, step, "succeeded", 0)
|
||||
continue
|
||||
}
|
||||
if step.RunCmd == "" {
|
||||
markStep(db, step, "skipped", 0)
|
||||
continue
|
||||
}
|
||||
exitCode, err := runStep(ctx, db, bus, jc.Run.ID, jc.Job.ID, step, image, workDir)
|
||||
if err != nil || exitCode != 0 {
|
||||
if exitCode == 0 {
|
||||
exitCode = 1
|
||||
}
|
||||
markStep(db, step, "failed", exitCode)
|
||||
failJob(db, bus, jc, fmt.Sprintf("step %q exited %d", step.Name, exitCode))
|
||||
return
|
||||
}
|
||||
markStep(db, step, "succeeded", 0)
|
||||
}
|
||||
|
||||
fin := time.Now().UTC()
|
||||
jc.Job.Status = "succeeded"
|
||||
jc.Job.FinishedAt = &fin
|
||||
db.ID(jc.Job.ID).Cols("status", "finished_at").Update(&jc.Job) //nolint:errcheck
|
||||
bus.Publish(events.SubjectJobCompleted, events.JobEvent{ //nolint:errcheck
|
||||
RunID: jc.Run.ID, JobID: jc.Job.ID, Status: "succeeded", At: fin,
|
||||
})
|
||||
}
|
||||
|
||||
// runStep runs a single shell-command step inside a Docker container.
|
||||
func runStep(ctx context.Context, db *xorm.Engine, bus events.EventBus,
|
||||
runID, jobID int64, step *models.PipelineStep, image, workDir string) (int, error) {
|
||||
|
||||
now := time.Now().UTC()
|
||||
step.Status = "running"
|
||||
step.StartedAt = &now
|
||||
db.ID(step.ID).Cols("status", "started_at").Update(step) //nolint:errcheck
|
||||
|
||||
cmd := exec.CommandContext(ctx, "docker", "run", "--rm",
|
||||
"-v", workDir+":/workspace",
|
||||
"-w", "/workspace",
|
||||
"--network=none", // no network by default; Phase 2C will add network scopes
|
||||
image,
|
||||
"/bin/sh", "-ec", step.RunCmd,
|
||||
)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return 1, err
|
||||
}
|
||||
cmd.Stderr = cmd.Stdout // merge stderr into stdout
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return 1, fmt.Errorf("docker run: %w", err)
|
||||
}
|
||||
|
||||
chunk := 0
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text() + "\n"
|
||||
writeLogChunk(db, bus, runID, jobID, step.ID, chunk, line)
|
||||
chunk++
|
||||
}
|
||||
|
||||
exitCode := 0
|
||||
if err := cmd.Wait(); err != nil {
|
||||
if exitErr, ok := err.(*exec.ExitError); ok {
|
||||
exitCode = exitErr.ExitCode()
|
||||
} else {
|
||||
exitCode = 1
|
||||
}
|
||||
}
|
||||
return exitCode, nil
|
||||
}
|
||||
|
||||
// extractWorkspace uses git-archive to export the repo at a given SHA into a
|
||||
// temporary directory under workspaceRoot.
|
||||
func extractWorkspace(repoPath, sha, workspaceRoot string, runID int64) (string, error) {
|
||||
dir := filepath.Join(workspaceRoot, fmt.Sprintf("run-%d", runID))
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
archive := exec.Command("git", "archive", "--format=tar", sha)
|
||||
archive.Dir = repoPath
|
||||
archive.Env = []string{"GIT_TERMINAL_PROMPT=0", "HOME=/tmp"}
|
||||
|
||||
tar := exec.Command("tar", "-x", "-C", dir)
|
||||
tar.Stdin, _ = archive.StdoutPipe()
|
||||
|
||||
if err := archive.Start(); err != nil {
|
||||
os.RemoveAll(dir)
|
||||
return "", fmt.Errorf("git archive: %w", err)
|
||||
}
|
||||
if err := tar.Start(); err != nil {
|
||||
archive.Process.Kill() //nolint:errcheck
|
||||
os.RemoveAll(dir)
|
||||
return "", fmt.Errorf("tar: %w", err)
|
||||
}
|
||||
|
||||
archiveErr := archive.Wait()
|
||||
tarErr := tar.Wait()
|
||||
if archiveErr != nil {
|
||||
os.RemoveAll(dir)
|
||||
return "", fmt.Errorf("git archive wait: %w", archiveErr)
|
||||
}
|
||||
if tarErr != nil {
|
||||
os.RemoveAll(dir)
|
||||
return "", fmt.Errorf("tar wait: %w", tarErr)
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func writeLogChunk(db *xorm.Engine, bus events.EventBus, runID, jobID, stepID int64, idx int, content string) {
|
||||
entry := &models.PipelineStepLog{
|
||||
StepID: stepID,
|
||||
ChunkIndex: idx,
|
||||
Content: content,
|
||||
}
|
||||
db.Insert(entry) //nolint:errcheck
|
||||
bus.Publish(events.SubjectPipelineLog, events.LogChunkEvent{ //nolint:errcheck
|
||||
RunID: runID, JobID: jobID, StepID: stepID, ChunkIndex: idx, Content: content,
|
||||
})
|
||||
}
|
||||
|
||||
func markStep(db *xorm.Engine, step *models.PipelineStep, status string, exitCode int) {
|
||||
now := time.Now().UTC()
|
||||
step.Status = status
|
||||
step.ExitCode = exitCode
|
||||
step.FinishedAt = &now
|
||||
db.ID(step.ID).Cols("status", "exit_code", "finished_at").Update(step) //nolint:errcheck
|
||||
}
|
||||
|
||||
func failJob(db *xorm.Engine, bus events.EventBus, jc JobContext, reason string) {
|
||||
now := time.Now().UTC()
|
||||
jc.Job.Status = "failed"
|
||||
jc.Job.FinishedAt = &now
|
||||
db.ID(jc.Job.ID).Cols("status", "finished_at").Update(&jc.Job) //nolint:errcheck
|
||||
|
||||
// Write the failure reason as a synthetic log line.
|
||||
var lastStep models.PipelineStep
|
||||
if found, _ := db.Where("job_id = ?", jc.Job.ID).Desc("seq").Get(&lastStep); found {
|
||||
writeLogChunk(db, bus, jc.Run.ID, jc.Job.ID, lastStep.ID, 0,
|
||||
"\n[ForgeBucket] Job failed: "+reason+"\n")
|
||||
}
|
||||
|
||||
bus.Publish(events.SubjectJobFailed, events.JobEvent{ //nolint:errcheck
|
||||
RunID: jc.Run.ID, JobID: jc.Job.ID, Status: "failed", At: now,
|
||||
})
|
||||
}
|
||||
|
||||
// workspaceDir returns the scratch directory root for CI job workspaces.
|
||||
func workspaceDir(artifactRoot string) string {
|
||||
return filepath.Join(filepath.Dir(artifactRoot), "ci-workspaces")
|
||||
}
|
||||
|
||||
// IsDockerAvailable checks whether the docker CLI is reachable.
|
||||
func IsDockerAvailable() bool {
|
||||
cmd := exec.Command("docker", "info")
|
||||
cmd.Env = []string{"HOME=/tmp"}
|
||||
return cmd.Run() == nil
|
||||
}
|
||||
|
||||
// stepsForJob loads PipelineStep rows for a job ordered by seq.
|
||||
func stepsForJob(db *xorm.Engine, jobID int64) ([]models.PipelineStep, error) {
|
||||
var steps []models.PipelineStep
|
||||
err := db.Where("job_id = ?", jobID).Asc("seq").Find(&steps)
|
||||
return steps, err
|
||||
}
|
||||
|
||||
// repoForRun loads the Repository for a given run.
|
||||
func repoForRun(db *xorm.Engine, runID int64) (models.Repository, models.PipelineRun, bool) {
|
||||
var run models.PipelineRun
|
||||
if found, _ := db.ID(runID).Get(&run); !found {
|
||||
return models.Repository{}, run, false
|
||||
}
|
||||
var repo models.Repository
|
||||
if found, _ := db.ID(run.RepoID).Get(&repo); !found {
|
||||
return models.Repository{}, run, false
|
||||
}
|
||||
return repo, run, true
|
||||
}
|
||||
|
||||
// buildJobContext assembles a JobContext from DB rows.
|
||||
func buildJobContext(db *xorm.Engine, jobID int64) (JobContext, bool) {
|
||||
var job models.PipelineJob
|
||||
if found, _ := db.ID(jobID).Get(&job); !found {
|
||||
return JobContext{}, false
|
||||
}
|
||||
repo, run, ok := repoForRun(db, job.RunID)
|
||||
if !ok {
|
||||
return JobContext{}, false
|
||||
}
|
||||
steps, err := stepsForJob(db, jobID)
|
||||
if err != nil {
|
||||
return JobContext{}, false
|
||||
}
|
||||
return JobContext{Run: run, Job: job, Steps: steps, Repo: repo}, true
|
||||
}
|
||||
|
||||
// pipeForRun returns the longest-matching step label for an image.
|
||||
// Phase 2B: unused placeholder for future label matching.
|
||||
func pipeForRun(_ string) string { return "" }
|
||||
|
||||
// sanitizeImage prevents injection in docker image names.
|
||||
func sanitizeImage(image string) string {
|
||||
// Allow only characters valid in Docker image references.
|
||||
var b strings.Builder
|
||||
for _, c := range image {
|
||||
if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
||||
(c >= '0' && c <= '9') || c == '.' || c == '-' || c == '_' ||
|
||||
c == '/' || c == ':' || c == '@' {
|
||||
b.WriteRune(c)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
@@ -0,0 +1,292 @@
|
||||
package ci
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"xorm.io/xorm"
|
||||
|
||||
"github.com/forgeo/forgebucket/internal/events"
|
||||
"github.com/forgeo/forgebucket/internal/models"
|
||||
)
|
||||
|
||||
// Orchestrator listens for push events, creates pipeline run records, and
|
||||
// advances the DAG as jobs complete. It does NOT execute jobs directly —
|
||||
// that is the RunnerManager's responsibility.
|
||||
type Orchestrator struct {
|
||||
db *xorm.Engine
|
||||
bus events.EventBus
|
||||
}
|
||||
|
||||
func NewOrchestrator(db *xorm.Engine, bus events.EventBus) *Orchestrator {
|
||||
return &Orchestrator{db: db, bus: bus}
|
||||
}
|
||||
|
||||
// Start subscribes to relevant NATS subjects and blocks until ctx is cancelled.
|
||||
func (o *Orchestrator) Start(ctx context.Context) {
|
||||
o.recoverStaleRuns()
|
||||
|
||||
unsub1, err := o.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
|
||||
var evt events.PushEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
log.Printf("orchestrator: bad push event: %v", err)
|
||||
return
|
||||
}
|
||||
go o.handlePush(evt)
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("orchestrator: subscribe push.received: %v", err)
|
||||
} else {
|
||||
defer unsub1()
|
||||
}
|
||||
|
||||
unsub2, err := o.bus.Subscribe(events.SubjectJobCompleted, func(_ string, data []byte) {
|
||||
var evt events.JobEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
return
|
||||
}
|
||||
go o.advanceDAG(evt.RunID, evt.JobID, "succeeded")
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("orchestrator: subscribe job.completed: %v", err)
|
||||
} else {
|
||||
defer unsub2()
|
||||
}
|
||||
|
||||
unsub3, err := o.bus.Subscribe(events.SubjectJobFailed, func(_ string, data []byte) {
|
||||
var evt events.JobEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
return
|
||||
}
|
||||
go o.advanceDAG(evt.RunID, evt.JobID, "failed")
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("orchestrator: subscribe job.failed: %v", err)
|
||||
} else {
|
||||
defer unsub3()
|
||||
}
|
||||
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
// handlePush is called for every successful git push. It finds matching workflow
|
||||
// files, creates run records, and enqueues the first wave of jobs.
|
||||
func (o *Orchestrator) handlePush(evt events.PushEvent) {
|
||||
// Ignore branch deletions (new SHA = all zeros).
|
||||
if evt.After == "" || strings.Repeat("0", len(evt.After)) == evt.After {
|
||||
return
|
||||
}
|
||||
|
||||
var repo models.Repository
|
||||
if found, _ := o.db.ID(evt.RepoID).Get(&repo); !found {
|
||||
return
|
||||
}
|
||||
|
||||
workflowPaths, err := ListWorkflows(repo.DiskPath, evt.After)
|
||||
if err != nil || len(workflowPaths) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, path := range workflowPaths {
|
||||
wf, err := ParseWorkflow(repo.DiskPath, evt.After, path)
|
||||
if err != nil {
|
||||
log.Printf("orchestrator: parse workflow %s: %v", path, err)
|
||||
continue
|
||||
}
|
||||
if !MatchesPushTrigger(wf, evt.Ref) {
|
||||
continue
|
||||
}
|
||||
if err := o.createRun(repo, evt, wf, path); err != nil {
|
||||
log.Printf("orchestrator: create run for %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *Orchestrator) createRun(repo models.Repository, evt events.PushEvent, wf *WorkflowFile, filePath string) error {
|
||||
// Upsert the Pipeline definition record.
|
||||
pipeline := &models.Pipeline{RepoID: repo.ID, FilePath: filePath}
|
||||
has, _ := o.db.Where("repo_id = ? AND file_path = ?", repo.ID, filePath).Get(pipeline)
|
||||
pipeline.Name = wf.Name
|
||||
if pipeline.Name == "" {
|
||||
pipeline.Name = filePath
|
||||
}
|
||||
if has {
|
||||
o.db.ID(pipeline.ID).Cols("name").Update(pipeline) //nolint:errcheck
|
||||
} else {
|
||||
if _, err := o.db.Insert(pipeline); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Validate DAG before writing anything.
|
||||
if _, err := TopoSort(wf.Jobs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
run := &models.PipelineRun{
|
||||
PipelineID: pipeline.ID,
|
||||
RepoID: repo.ID,
|
||||
TriggerRef: evt.Ref,
|
||||
TriggerSHA: evt.After,
|
||||
TriggeredBy: evt.Pusher,
|
||||
Status: "queued",
|
||||
StartedAt: &now,
|
||||
}
|
||||
if _, err := o.db.Insert(run); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create job + step records for every job in the workflow.
|
||||
for jobName, wfJob := range wf.Jobs {
|
||||
needsJSON, _ := json.Marshal([]string(wfJob.Needs))
|
||||
job := &models.PipelineJob{
|
||||
RunID: run.ID,
|
||||
Name: jobName,
|
||||
Image: wfJob.RunsOn,
|
||||
Needs: string(needsJSON),
|
||||
Status: "queued",
|
||||
}
|
||||
if _, err := o.db.Insert(job); err != nil {
|
||||
return err
|
||||
}
|
||||
for seq, step := range wfJob.Steps {
|
||||
s := &models.PipelineStep{
|
||||
JobID: job.ID,
|
||||
Seq: seq,
|
||||
Name: step.Name,
|
||||
RunCmd: step.Run,
|
||||
UsesAction: step.Uses,
|
||||
Status: "queued",
|
||||
}
|
||||
if _, err := o.db.Insert(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Enqueue jobs with no dependencies.
|
||||
o.enqueueReadyJobs(run.ID, wf.Jobs)
|
||||
|
||||
o.bus.Publish(events.SubjectPipelineTriggered, events.PipelineEvent{ //nolint:errcheck
|
||||
RunID: run.ID,
|
||||
RepoID: repo.ID,
|
||||
Status: "queued",
|
||||
At: now,
|
||||
})
|
||||
|
||||
log.Printf("orchestrator: created run %d for %s/%s (%s)", run.ID, repo.Name, filePath, evt.After[:7])
|
||||
return nil
|
||||
}
|
||||
|
||||
// advanceDAG is called when a job finishes. It marks the job, checks whether
|
||||
// all jobs are done (completing the run) or enqueues the next wave.
|
||||
func (o *Orchestrator) advanceDAG(runID, jobID int64, result string) {
|
||||
var job models.PipelineJob
|
||||
if found, _ := o.db.ID(jobID).Get(&job); !found {
|
||||
return
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
job.Status = result
|
||||
job.FinishedAt = &now
|
||||
o.db.ID(job.ID).Cols("status", "finished_at").Update(&job) //nolint:errcheck
|
||||
|
||||
var run models.PipelineRun
|
||||
if found, _ := o.db.ID(runID).Get(&run); !found {
|
||||
return
|
||||
}
|
||||
|
||||
// Load all jobs for this run to check completion.
|
||||
var allJobs []models.PipelineJob
|
||||
o.db.Where("run_id = ?", runID).Find(&allJobs)
|
||||
|
||||
// If any job failed, cancel remaining queued jobs and fail the run.
|
||||
if result == "failed" {
|
||||
for _, j := range allJobs {
|
||||
if j.Status == "queued" {
|
||||
j.Status = "skipped"
|
||||
o.db.ID(j.ID).Cols("status").Update(&j) //nolint:errcheck
|
||||
}
|
||||
}
|
||||
run.Status = "failed"
|
||||
run.FinishedAt = &now
|
||||
o.db.ID(run.ID).Cols("status", "finished_at").Update(&run) //nolint:errcheck
|
||||
o.bus.Publish(events.SubjectPipelineFailed, events.PipelineEvent{RunID: run.ID, RepoID: run.RepoID, Status: "failed", At: now}) //nolint:errcheck
|
||||
return
|
||||
}
|
||||
|
||||
// Check if all jobs are done.
|
||||
allDone := true
|
||||
for _, j := range allJobs {
|
||||
if j.Status != "succeeded" && j.Status != "failed" && j.Status != "skipped" && j.Status != "cancelled" {
|
||||
allDone = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allDone {
|
||||
run.Status = "succeeded"
|
||||
run.FinishedAt = &now
|
||||
o.db.ID(run.ID).Cols("status", "finished_at").Update(&run) //nolint:errcheck
|
||||
o.bus.Publish(events.SubjectPipelineCompleted, events.PipelineEvent{RunID: run.ID, RepoID: run.RepoID, Status: "succeeded", At: now}) //nolint:errcheck
|
||||
return
|
||||
}
|
||||
|
||||
// Reload the workflow to get the job dependency graph, then enqueue next wave.
|
||||
var pipeline models.Pipeline
|
||||
if found, _ := o.db.ID(run.PipelineID).Get(&pipeline); !found {
|
||||
return
|
||||
}
|
||||
var repo models.Repository
|
||||
if found, _ := o.db.ID(run.RepoID).Get(&repo); !found {
|
||||
return
|
||||
}
|
||||
wf, err := ParseWorkflow(repo.DiskPath, run.TriggerSHA, pipeline.FilePath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
o.enqueueReadyJobs(runID, wf.Jobs)
|
||||
}
|
||||
|
||||
func (o *Orchestrator) enqueueReadyJobs(runID int64, wfJobs map[string]WorkflowJob) {
|
||||
var dbJobs []models.PipelineJob
|
||||
o.db.Where("run_id = ?", runID).Find(&dbJobs)
|
||||
|
||||
completedNames := make(map[string]bool)
|
||||
enqueuedNames := make(map[string]bool)
|
||||
for _, j := range dbJobs {
|
||||
if j.Status == "succeeded" {
|
||||
completedNames[j.Name] = true
|
||||
}
|
||||
if j.Status == "running" || j.Status == "succeeded" {
|
||||
enqueuedNames[j.Name] = true
|
||||
}
|
||||
}
|
||||
|
||||
readyNames := ReadyJobs(wfJobs, completedNames, enqueuedNames)
|
||||
for _, name := range readyNames {
|
||||
for _, j := range dbJobs {
|
||||
if j.Name == name && j.Status == "queued" {
|
||||
o.bus.Publish(events.SubjectJobQueued, events.JobEvent{ //nolint:errcheck
|
||||
RunID: runID,
|
||||
JobID: j.ID,
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// recoverStaleRuns marks any jobs/runs left in "running" state as failed
|
||||
// (they were interrupted by a previous server crash).
|
||||
func (o *Orchestrator) recoverStaleRuns() {
|
||||
now := time.Now().UTC()
|
||||
o.db.Where("status = 'running'").Cols("status", "finished_at").
|
||||
Update(&models.PipelineRun{Status: "failed", FinishedAt: &now}) //nolint:errcheck
|
||||
o.db.Where("status = 'running'").Cols("status", "finished_at").
|
||||
Update(&models.PipelineJob{Status: "failed", FinishedAt: &now}) //nolint:errcheck
|
||||
o.db.Where("status = 'running'").Cols("status", "finished_at").
|
||||
Update(&models.PipelineStep{Status: "failed", FinishedAt: &now}) //nolint:errcheck
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
package ci
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const workflowDir = ".forgebucket/workflows"
|
||||
|
||||
// ListWorkflows returns the file paths of all workflow YAML files in a repo at a
|
||||
// given ref. Returns nil (no error) when the workflows directory doesn't exist.
|
||||
func ListWorkflows(repoPath, ref string) ([]string, error) {
|
||||
entries, err := gitdomain.TreeLS(repoPath, ref, workflowDir)
|
||||
if err != nil {
|
||||
// Directory does not exist at this ref — no workflows, not an error.
|
||||
return nil, nil
|
||||
}
|
||||
var paths []string
|
||||
for _, e := range entries {
|
||||
if e.Type == "blob" && (strings.HasSuffix(e.Name, ".yml") || strings.HasSuffix(e.Name, ".yaml")) {
|
||||
paths = append(paths, workflowDir+"/"+e.Name)
|
||||
}
|
||||
}
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
// ParseWorkflow reads and parses a single workflow YAML file from the repo at ref.
|
||||
func ParseWorkflow(repoPath, ref, filePath string) (*WorkflowFile, error) {
|
||||
data, err := gitdomain.BlobCat(repoPath, ref, filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read %s: %w", filePath, err)
|
||||
}
|
||||
var wf WorkflowFile
|
||||
if err := yaml.Unmarshal(data, &wf); err != nil {
|
||||
return nil, fmt.Errorf("parse %s: %w", filePath, err)
|
||||
}
|
||||
return &wf, nil
|
||||
}
|
||||
|
||||
// MatchesPushTrigger reports whether a workflow should run for a push to ref.
|
||||
// ref is the full ref name, e.g. "refs/heads/main".
|
||||
func MatchesPushTrigger(wf *WorkflowFile, ref string) bool {
|
||||
if wf.On.Push == nil {
|
||||
return false
|
||||
}
|
||||
trigger := wf.On.Push
|
||||
// No branch filter means "all branches".
|
||||
if len(trigger.Branches) == 0 && len(trigger.Tags) == 0 {
|
||||
return true
|
||||
}
|
||||
branch := strings.TrimPrefix(ref, "refs/heads/")
|
||||
for _, pattern := range trigger.Branches {
|
||||
if matchGlob(pattern, branch) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
tag := strings.TrimPrefix(ref, "refs/tags/")
|
||||
for _, pattern := range trigger.Tags {
|
||||
if matchGlob(pattern, tag) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// matchGlob supports simple "*" wildcards (not full glob).
|
||||
func matchGlob(pattern, s string) bool {
|
||||
if pattern == "*" {
|
||||
return true
|
||||
}
|
||||
if !strings.Contains(pattern, "*") {
|
||||
return pattern == s
|
||||
}
|
||||
parts := strings.SplitN(pattern, "*", 2)
|
||||
return strings.HasPrefix(s, parts[0]) && strings.HasSuffix(s, parts[1])
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
package ci
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
|
||||
"xorm.io/xorm"
|
||||
|
||||
"github.com/forgeo/forgebucket/internal/config"
|
||||
"github.com/forgeo/forgebucket/internal/events"
|
||||
)
|
||||
|
||||
// RunnerManager subscribes to job.queued events and dispatches them to the
|
||||
// local Docker executor. A semaphore limits concurrent executions.
|
||||
type RunnerManager struct {
|
||||
db *xorm.Engine
|
||||
bus events.EventBus
|
||||
cfg *config.Config
|
||||
sem chan struct{}
|
||||
}
|
||||
|
||||
func NewRunnerManager(db *xorm.Engine, bus events.EventBus, cfg *config.Config, maxConcurrent int) *RunnerManager {
|
||||
if maxConcurrent <= 0 {
|
||||
maxConcurrent = 4
|
||||
}
|
||||
return &RunnerManager{
|
||||
db: db,
|
||||
bus: bus,
|
||||
cfg: cfg,
|
||||
sem: make(chan struct{}, maxConcurrent),
|
||||
}
|
||||
}
|
||||
|
||||
// Start subscribes to job.queued and dispatches executions until ctx is cancelled.
|
||||
func (m *RunnerManager) Start(ctx context.Context) {
|
||||
if !IsDockerAvailable() {
|
||||
log.Printf("runner: Docker not available — CI execution disabled")
|
||||
<-ctx.Done()
|
||||
return
|
||||
}
|
||||
log.Printf("runner: started (max concurrent jobs: %d)", cap(m.sem))
|
||||
|
||||
wsDir := workspaceDir(m.cfg.ArtifactRoot)
|
||||
|
||||
unsub, err := m.bus.Subscribe(events.SubjectJobQueued, func(_ string, data []byte) {
|
||||
var evt events.JobEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
log.Printf("runner: bad job.queued payload: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
jc, ok := buildJobContext(m.db, evt.JobID)
|
||||
if !ok {
|
||||
log.Printf("runner: could not build job context for job %d", evt.JobID)
|
||||
return
|
||||
}
|
||||
|
||||
// Acquire semaphore slot — blocks if at capacity.
|
||||
select {
|
||||
case m.sem <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer func() { <-m.sem }()
|
||||
// Sanitize the Docker image name before execution.
|
||||
jc.Job.Image = sanitizeImage(jc.Job.Image)
|
||||
ExecuteJob(ctx, m.db, m.bus, jc, wsDir)
|
||||
}()
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("runner: subscribe job.queued: %v", err)
|
||||
<-ctx.Done()
|
||||
return
|
||||
}
|
||||
defer unsub()
|
||||
|
||||
<-ctx.Done()
|
||||
log.Printf("runner: stopping — draining %d active jobs", len(m.sem))
|
||||
// Wait for all running jobs to finish by filling the semaphore.
|
||||
for i := 0; i < cap(m.sem); i++ {
|
||||
m.sem <- struct{}{}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package ci
|
||||
|
||||
import "gopkg.in/yaml.v3"
|
||||
|
||||
// WorkflowFile is the parsed representation of a .forgebucket/workflows/*.yml file.
|
||||
type WorkflowFile struct {
|
||||
Name string `yaml:"name"`
|
||||
On WorkflowTrigger `yaml:"on"`
|
||||
Jobs map[string]WorkflowJob `yaml:"jobs"`
|
||||
}
|
||||
|
||||
type WorkflowTrigger struct {
|
||||
Push *PushTrigger `yaml:"push"`
|
||||
PullRequest *PRTrigger `yaml:"pull_request"`
|
||||
}
|
||||
|
||||
type PushTrigger struct {
|
||||
Branches []string `yaml:"branches"`
|
||||
Tags []string `yaml:"tags"`
|
||||
}
|
||||
|
||||
type PRTrigger struct {
|
||||
Branches []string `yaml:"branches"`
|
||||
}
|
||||
|
||||
type WorkflowJob struct {
|
||||
Name string `yaml:"name"`
|
||||
RunsOn string `yaml:"runs-on"`
|
||||
Needs StringOrSlice `yaml:"needs"`
|
||||
Steps []WorkflowStep `yaml:"steps"`
|
||||
}
|
||||
|
||||
type WorkflowStep struct {
|
||||
Name string `yaml:"name"`
|
||||
Uses string `yaml:"uses"`
|
||||
Run string `yaml:"run"`
|
||||
Env map[string]string `yaml:"env"`
|
||||
}
|
||||
|
||||
// StringOrSlice unmarshals a YAML value that may be either a single string
|
||||
// ("needs: test") or a list ("needs: [test, build]").
|
||||
type StringOrSlice []string
|
||||
|
||||
func (s *StringOrSlice) UnmarshalYAML(value *yaml.Node) error {
|
||||
switch value.Kind {
|
||||
case yaml.ScalarNode:
|
||||
if value.Value != "" {
|
||||
*s = []string{value.Value}
|
||||
}
|
||||
case yaml.SequenceNode:
|
||||
var items []string
|
||||
if err := value.Decode(&items); err != nil {
|
||||
return err
|
||||
}
|
||||
*s = items
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user