Files
ForgeBucket/internal/domain/sbom/generator.go
T

205 lines
6.0 KiB
Go

package sbom
import (
"context"
"encoding/json"
"fmt"
"log"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/events"
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
"github.com/forgeo/forgebucket/internal/models"
)
// manifestEntry maps a known manifest file path to its parser function.
type manifestEntry struct {
path string
parser func([]byte) []Component
}
// knownManifests is the ordered list of manifest files the generator probes.
// Files are tried in order; all that exist at the given SHA are parsed.
var knownManifests = []manifestEntry{
{"go.mod", ParseGoMod},
{"package.json", ParsePackageJSON},
{"requirements.txt", ParseRequirementsTxt},
{"Cargo.toml", ParseCargoToml},
{"Gemfile.lock", ParseGemfileLock},
{"pom.xml", ParsePomXML},
}
// Generator subscribes to pipeline.completed events and produces SBOM reports.
type Generator struct {
db *xorm.Engine
bus events.EventBus
}
func NewGenerator(db *xorm.Engine, bus events.EventBus) *Generator {
return &Generator{db: db, bus: bus}
}
// Start subscribes to pipeline.completed and blocks until ctx is cancelled.
func (g *Generator) Start(ctx context.Context) {
unsub, err := g.bus.Subscribe(events.SubjectPipelineCompleted, func(_ string, data []byte) {
var evt events.PipelineEvent
if err := json.Unmarshal(data, &evt); err != nil {
log.Printf("sbom: bad pipeline.completed event: %v", err)
return
}
if evt.Status != "succeeded" {
return
}
go g.generateForRun(evt.RunID, evt.RepoID)
})
if err != nil {
log.Printf("sbom: subscribe pipeline.completed: %v", err)
} else {
defer unsub()
}
<-ctx.Done()
}
// generateForRun generates an SBOM for the pipeline run identified by runID.
func (g *Generator) generateForRun(runID, repoID int64) {
var run models.PipelineRun
if found, err := g.db.ID(runID).Get(&run); err != nil {
log.Printf("sbom: look up run %d: %v", runID, err)
return
} else if !found {
return
}
var repo models.Repository
if found, err := g.db.ID(repoID).Get(&repo); err != nil {
log.Printf("sbom: look up repo %d: %v", repoID, err)
return
} else if !found {
return
}
doc, err := Generate(repo.DiskPath, repo.Name, run.TriggerSHA)
if err != nil {
log.Printf("sbom: generate for run %d: %v", runID, err)
return
}
if err := g.persist(repoID, runID, run.TriggerSHA, doc); err != nil {
log.Printf("sbom: persist for run %d: %v", runID, err)
}
}
// GenerateOnDemand generates an SBOM for a specific repo + SHA and stores it
// (or returns the cached one if the SHA was already processed).
func (g *Generator) GenerateOnDemand(repoID, runID int64, ref string) (*models.SBOMReport, error) {
var repo models.Repository
if found, _ := g.db.ID(repoID).Get(&repo); !found {
return nil, fmt.Errorf("repo %d not found", repoID)
}
// Resolve the ref to a full commit SHA — ref can be a branch name, tag, etc.
sha, err := gitdomain.RevParse(repo.DiskPath, ref)
if err != nil {
return nil, fmt.Errorf("rev-parse %s: %w", ref, err)
}
// Return cached report for this exact SHA + runID if one already exists.
// Without runID in the cache key, a prior on-demand generation (runID=0)
// would shadow subsequent per-run generation requests.
var existing models.SBOMReport
if found, _ := g.db.Where("repo_id = ? AND sha = ? AND run_id = ?", repoID, sha, runID).Get(&existing); found {
return &existing, nil
}
doc, err := Generate(repo.DiskPath, repo.Name, sha)
if err != nil {
return nil, err
}
report, err := g.persistAndReturn(repoID, runID, sha, doc)
if err != nil {
return nil, err
}
return report, nil
}
// GetLatest returns the most recent SBOM report for a repo.
func (g *Generator) GetLatest(repoID int64) (*models.SBOMReport, error) {
var report models.SBOMReport
found, err := g.db.Where("repo_id = ?", repoID).
OrderBy("generated_at DESC").
Get(&report)
if err != nil {
return nil, err
}
if !found {
return nil, nil
}
return &report, nil
}
// GetForRun returns the SBOM report associated with a pipeline run.
func (g *Generator) GetForRun(runID int64) (*models.SBOMReport, error) {
var report models.SBOMReport
found, err := g.db.Where("run_id = ?", runID).Get(&report)
if err != nil {
return nil, err
}
if !found {
return nil, nil
}
return &report, nil
}
// ─── core generation logic ────────────────────────────────────────────────────
// Generate reads known manifest files from the git repo at sha and builds
// a CycloneDX 1.4 document. It is safe to call even if no manifests exist
// (the document will have an empty components list).
func Generate(repoPath, repoName, sha string) (*Document, error) {
doc := NewDocument(repoName, sha)
for _, m := range knownManifests {
content, err := gitdomain.BlobCat(repoPath, sha, m.path)
if err != nil {
// File simply doesn't exist at this SHA — skip silently.
continue
}
comps := m.parser(content)
doc.Components = append(doc.Components, comps...)
}
return doc, nil
}
// ─── persistence helpers ──────────────────────────────────────────────────────
func (g *Generator) persist(repoID, runID int64, sha string, doc *Document) error {
_, err := g.persistAndReturn(repoID, runID, sha, doc)
return err
}
func (g *Generator) persistAndReturn(repoID, runID int64, sha string, doc *Document) (*models.SBOMReport, error) {
bomJSON, err := json.Marshal(doc)
if err != nil {
return nil, fmt.Errorf("marshal BOM: %w", err)
}
report := &models.SBOMReport{
RepoID: repoID,
RunID: runID,
SHA: sha,
Format: FormatCycloneDX,
ComponentCount: len(doc.Components),
BOMDocument: string(bomJSON),
GeneratedAt: time.Now().UTC(),
}
if _, err := g.db.Insert(report); err != nil {
return nil, fmt.Errorf("insert sbom_report: %w", err)
}
log.Printf("sbom: generated report %d for repo %d @ %s (%d components)",
report.ID, repoID, sha[:7], report.ComponentCount)
return report, nil
}