package scanning import ( "context" "encoding/json" "fmt" "log" "regexp" "time" "xorm.io/xorm" "github.com/forgeo/forgebucket/internal/events" gitdomain "github.com/forgeo/forgebucket/internal/domain/git" "github.com/forgeo/forgebucket/internal/models" ) // compiledPattern is a pre-compiled regex pattern. type compiledPattern struct { pattern re *regexp.Regexp } // Scanner subscribes to push.received and scans commit content for secrets. type Scanner struct { db *xorm.Engine bus events.EventBus patterns []compiledPattern } // New creates a Scanner with all patterns pre-compiled. func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) { cp := make([]compiledPattern, 0, len(Patterns)) for _, p := range Patterns { re, err := regexp.Compile(p.Raw) if err != nil { return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err) } cp = append(cp, compiledPattern{pattern: p, re: re}) } return &Scanner{db: db, bus: bus, patterns: cp}, nil } // Start subscribes to push.received and blocks until ctx is cancelled. func (s *Scanner) Start(ctx context.Context) { unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) { var evt events.PushEvent if err := json.Unmarshal(data, &evt); err != nil { log.Printf("scanning: bad push event: %v", err) return } go s.scanPush(evt) }) if err != nil { log.Printf("scanning: subscribe: %v", err) } else { defer unsub() } <-ctx.Done() } // scanPush scans the diff between before and after for all patterns. func (s *Scanner) scanPush(evt events.PushEvent) { // Branch deletion — nothing to scan. zeroOID := "0000000000000000000000000000000000000000" if evt.After == zeroOID { return } // Resolve repo. var repo models.Repository if found, err := s.db.ID(evt.RepoID).Get(&repo); err != nil { log.Printf("scanning: look up repo %d: %v", evt.RepoID, err) return } else if !found { return } // Get the diff content between before and after. diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After) if err != nil { log.Printf("scanning: get diff for repo %s: %v", repo.Name, err) return } // Determine the commit SHA for the findings. headSHA := evt.After now := time.Now().UTC() for _, p := range s.patterns { matches := p.re.FindAllString(string(diffContent), -1) for _, match := range matches { // Skip very short matches (likely false positives). if len(match) < 6 { continue } leak := &models.SecretLeak{ RepoID: evt.RepoID, CommitSHA: headSHA[:12], Ref: evt.Ref, PatternName: p.Name, Description: p.Description, Severity: p.Severity, MatchSample: truncate(match, 40), DetectedAt: now, } if _, err := s.db.Insert(leak); err != nil { log.Printf("scanning: insert leak for %s: %v", repo.Name, err) } } } } // getDiff returns the unified diff of all changes between two refs. func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) { // If oldRef is the zero OID (new branch), diff-tree against the empty tree so // we get actual file contents rather than ls-tree metadata. zeroOID := "0000000000000000000000000000000000000000" if oldRef == zeroOID { out, err := gitdomain.Run(repoPath, "diff-tree", "--no-commit-id", "-r", "-p", newRef) if err != nil { return nil, err } return out, nil } out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef) if err != nil { return nil, err } return out, nil } // ListFindings returns all active secret leaks for a repo, newest first. func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) { var leaks []models.SecretLeak if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false). OrderBy("detected_at DESC").Find(&leaks); err != nil { return nil, err } if leaks == nil { leaks = []models.SecretLeak{} } return leaks, nil } // DismissFindings acknowledges a leak so it no longer appears in active lists. func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error { now := time.Now().UTC() affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at"). Update(&models.SecretLeak{ Dismissed: true, DismissedBy: dismissedBy, DismissedAt: &now, }) if err != nil { return err } if affected == 0 { return fmt.Errorf("leak %d not found", leakID) } return nil } // truncate shortens a string to maxLen characters for safe display. func truncate(s string, maxLen int) string { if len(s) <= maxLen { return s } return s[:maxLen] + "..." }