Files

174 lines
4.5 KiB
Go

package scanning
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/events"
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
"github.com/forgeo/forgebucket/internal/models"
)
// compiledPattern is a pre-compiled regex pattern.
type compiledPattern struct {
pattern
re *regexp.Regexp
}
// Scanner subscribes to push.received and scans commit content for secrets.
type Scanner struct {
db *xorm.Engine
bus events.EventBus
patterns []compiledPattern
}
// New creates a Scanner with all patterns pre-compiled.
func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) {
cp := make([]compiledPattern, 0, len(Patterns))
for _, p := range Patterns {
re, err := regexp.Compile(p.Raw)
if err != nil {
return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err)
}
cp = append(cp, compiledPattern{pattern: p, re: re})
}
return &Scanner{db: db, bus: bus, patterns: cp}, nil
}
// Start subscribes to push.received and blocks until ctx is cancelled.
func (s *Scanner) Start(ctx context.Context) {
unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
var evt events.PushEvent
if err := json.Unmarshal(data, &evt); err != nil {
log.Printf("scanning: bad push event: %v", err)
return
}
go s.scanPush(evt)
})
if err != nil {
log.Printf("scanning: subscribe: %v", err)
} else {
defer unsub()
}
<-ctx.Done()
}
// scanPush scans the diff between before and after for all patterns.
func (s *Scanner) scanPush(evt events.PushEvent) {
// Branch deletion — nothing to scan.
zeroOID := "0000000000000000000000000000000000000000"
if evt.After == zeroOID {
return
}
// Resolve repo.
var repo models.Repository
if found, err := s.db.ID(evt.RepoID).Get(&repo); err != nil {
log.Printf("scanning: look up repo %d: %v", evt.RepoID, err)
return
} else if !found {
return
}
// Get the diff content between before and after.
diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After)
if err != nil {
log.Printf("scanning: get diff for repo %s: %v", repo.Name, err)
return
}
// Determine the commit SHA for the findings.
headSHA := evt.After
now := time.Now().UTC()
for _, p := range s.patterns {
matches := p.re.FindAllString(string(diffContent), -1)
for _, match := range matches {
// Skip very short matches (likely false positives).
if len(match) < 6 {
continue
}
leak := &models.SecretLeak{
RepoID: evt.RepoID,
CommitSHA: headSHA[:12],
Ref: evt.Ref,
PatternName: p.Name,
Description: p.Description,
Severity: p.Severity,
MatchSample: truncate(match, 40),
DetectedAt: now,
}
if _, err := s.db.Insert(leak); err != nil {
log.Printf("scanning: insert leak for %s: %v", repo.Name, err)
}
}
}
}
// getDiff returns the unified diff of all changes between two refs.
func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) {
// If oldRef is the zero OID (new branch), diff-tree against the empty tree so
// we get actual file contents rather than ls-tree metadata.
zeroOID := "0000000000000000000000000000000000000000"
if oldRef == zeroOID {
out, err := gitdomain.Run(repoPath, "diff-tree", "--no-commit-id", "-r", "-p", newRef)
if err != nil {
return nil, err
}
return out, nil
}
out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef)
if err != nil {
return nil, err
}
return out, nil
}
// ListFindings returns all active secret leaks for a repo, newest first.
func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) {
var leaks []models.SecretLeak
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
OrderBy("detected_at DESC").Find(&leaks); err != nil {
return nil, err
}
if leaks == nil {
leaks = []models.SecretLeak{}
}
return leaks, nil
}
// DismissFindings acknowledges a leak so it no longer appears in active lists.
func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error {
now := time.Now().UTC()
affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at").
Update(&models.SecretLeak{
Dismissed: true,
DismissedBy: dismissedBy,
DismissedAt: &now,
})
if err != nil {
return err
}
if affected == 0 {
return fmt.Errorf("leak %d not found", leakID)
}
return nil
}
// truncate shortens a string to maxLen characters for safe display.
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}