package sbom import ( "context" "encoding/json" "fmt" "log" "time" "xorm.io/xorm" "github.com/forgeo/forgebucket/internal/events" gitdomain "github.com/forgeo/forgebucket/internal/domain/git" "github.com/forgeo/forgebucket/internal/models" ) // manifestEntry maps a known manifest file path to its parser function. type manifestEntry struct { path string parser func([]byte) []Component } // knownManifests is the ordered list of manifest files the generator probes. // Files are tried in order; all that exist at the given SHA are parsed. var knownManifests = []manifestEntry{ {"go.mod", ParseGoMod}, {"package.json", ParsePackageJSON}, {"requirements.txt", ParseRequirementsTxt}, {"Cargo.toml", ParseCargoToml}, {"Gemfile.lock", ParseGemfileLock}, {"pom.xml", ParsePomXML}, } // Generator subscribes to pipeline.completed events and produces SBOM reports. type Generator struct { db *xorm.Engine bus events.EventBus } func NewGenerator(db *xorm.Engine, bus events.EventBus) *Generator { return &Generator{db: db, bus: bus} } // Start subscribes to pipeline.completed and blocks until ctx is cancelled. func (g *Generator) Start(ctx context.Context) { unsub, err := g.bus.Subscribe(events.SubjectPipelineCompleted, func(_ string, data []byte) { var evt events.PipelineEvent if err := json.Unmarshal(data, &evt); err != nil { log.Printf("sbom: bad pipeline.completed event: %v", err) return } if evt.Status != "succeeded" { return } go g.generateForRun(evt.RunID, evt.RepoID) }) if err != nil { log.Printf("sbom: subscribe pipeline.completed: %v", err) } else { defer unsub() } <-ctx.Done() } // generateForRun generates an SBOM for the pipeline run identified by runID. func (g *Generator) generateForRun(runID, repoID int64) { var run models.PipelineRun if found, _ := g.db.ID(runID).Get(&run); !found { return } var repo models.Repository if found, _ := g.db.ID(repoID).Get(&repo); !found { return } doc, err := Generate(repo.DiskPath, repo.Name, run.TriggerSHA) if err != nil { log.Printf("sbom: generate for run %d: %v", runID, err) return } if err := g.persist(repoID, runID, run.TriggerSHA, doc); err != nil { log.Printf("sbom: persist for run %d: %v", runID, err) } } // GenerateOnDemand generates an SBOM for a specific repo + SHA and stores it // (or returns the cached one if the SHA was already processed). func (g *Generator) GenerateOnDemand(repoID, runID int64, ref string) (*models.SBOMReport, error) { var repo models.Repository if found, _ := g.db.ID(repoID).Get(&repo); !found { return nil, fmt.Errorf("repo %d not found", repoID) } // Resolve the ref to a full commit SHA — ref can be a branch name, tag, etc. sha, err := gitdomain.RevParse(repo.DiskPath, ref) if err != nil { return nil, fmt.Errorf("rev-parse %s: %w", ref, err) } // Return cached report for this exact SHA + runID if one already exists. // Without runID in the cache key, a prior on-demand generation (runID=0) // would shadow subsequent per-run generation requests. var existing models.SBOMReport if found, _ := g.db.Where("repo_id = ? AND sha = ? AND run_id = ?", repoID, sha, runID).Get(&existing); found { return &existing, nil } doc, err := Generate(repo.DiskPath, repo.Name, sha) if err != nil { return nil, err } report, err := g.persistAndReturn(repoID, runID, sha, doc) if err != nil { return nil, err } return report, nil } // GetLatest returns the most recent SBOM report for a repo. func (g *Generator) GetLatest(repoID int64) (*models.SBOMReport, error) { var report models.SBOMReport found, err := g.db.Where("repo_id = ?", repoID). OrderBy("generated_at DESC"). Get(&report) if err != nil { return nil, err } if !found { return nil, nil } return &report, nil } // GetForRun returns the SBOM report associated with a pipeline run. func (g *Generator) GetForRun(runID int64) (*models.SBOMReport, error) { var report models.SBOMReport found, err := g.db.Where("run_id = ?", runID).Get(&report) if err != nil { return nil, err } if !found { return nil, nil } return &report, nil } // ─── core generation logic ──────────────────────────────────────────────────── // Generate reads known manifest files from the git repo at sha and builds // a CycloneDX 1.4 document. It is safe to call even if no manifests exist // (the document will have an empty components list). func Generate(repoPath, repoName, sha string) (*Document, error) { doc := NewDocument(repoName, sha) for _, m := range knownManifests { content, err := gitdomain.BlobCat(repoPath, sha, m.path) if err != nil { // File simply doesn't exist at this SHA — skip silently. continue } comps := m.parser(content) doc.Components = append(doc.Components, comps...) } return doc, nil } // ─── persistence helpers ────────────────────────────────────────────────────── func (g *Generator) persist(repoID, runID int64, sha string, doc *Document) error { _, err := g.persistAndReturn(repoID, runID, sha, doc) return err } func (g *Generator) persistAndReturn(repoID, runID int64, sha string, doc *Document) (*models.SBOMReport, error) { bomJSON, err := json.Marshal(doc) if err != nil { return nil, fmt.Errorf("marshal BOM: %w", err) } report := &models.SBOMReport{ RepoID: repoID, RunID: runID, SHA: sha, Format: FormatCycloneDX, ComponentCount: len(doc.Components), BOMDocument: string(bomJSON), GeneratedAt: time.Now().UTC(), } if _, err := g.db.Insert(report); err != nil { return nil, fmt.Errorf("insert sbom_report: %w", err) } log.Printf("sbom: generated report %d for repo %d @ %s (%d components)", report.ID, repoID, sha[:7], report.ComponentCount) return report, nil }