174 lines
4.5 KiB
Go
174 lines
4.5 KiB
Go
package scanning
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"regexp"
|
|
"time"
|
|
|
|
"xorm.io/xorm"
|
|
|
|
"github.com/forgeo/forgebucket/internal/events"
|
|
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
|
|
"github.com/forgeo/forgebucket/internal/models"
|
|
)
|
|
|
|
// compiledPattern is a pre-compiled regex pattern.
|
|
type compiledPattern struct {
|
|
pattern
|
|
re *regexp.Regexp
|
|
}
|
|
|
|
// Scanner subscribes to push.received and scans commit content for secrets.
|
|
type Scanner struct {
|
|
db *xorm.Engine
|
|
bus events.EventBus
|
|
patterns []compiledPattern
|
|
}
|
|
|
|
// New creates a Scanner with all patterns pre-compiled.
|
|
func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) {
|
|
cp := make([]compiledPattern, 0, len(Patterns))
|
|
for _, p := range Patterns {
|
|
re, err := regexp.Compile(p.Raw)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err)
|
|
}
|
|
cp = append(cp, compiledPattern{pattern: p, re: re})
|
|
}
|
|
return &Scanner{db: db, bus: bus, patterns: cp}, nil
|
|
}
|
|
|
|
// Start subscribes to push.received and blocks until ctx is cancelled.
|
|
func (s *Scanner) Start(ctx context.Context) {
|
|
unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
|
|
var evt events.PushEvent
|
|
if err := json.Unmarshal(data, &evt); err != nil {
|
|
log.Printf("scanning: bad push event: %v", err)
|
|
return
|
|
}
|
|
go s.scanPush(evt)
|
|
})
|
|
if err != nil {
|
|
log.Printf("scanning: subscribe: %v", err)
|
|
} else {
|
|
defer unsub()
|
|
}
|
|
<-ctx.Done()
|
|
}
|
|
|
|
// scanPush scans the diff between before and after for all patterns.
|
|
func (s *Scanner) scanPush(evt events.PushEvent) {
|
|
// Branch deletion — nothing to scan.
|
|
zeroOID := "0000000000000000000000000000000000000000"
|
|
if evt.After == zeroOID {
|
|
return
|
|
}
|
|
|
|
// Resolve repo.
|
|
var repo models.Repository
|
|
if found, err := s.db.ID(evt.RepoID).Get(&repo); err != nil {
|
|
log.Printf("scanning: look up repo %d: %v", evt.RepoID, err)
|
|
return
|
|
} else if !found {
|
|
return
|
|
}
|
|
|
|
// Get the diff content between before and after.
|
|
diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After)
|
|
if err != nil {
|
|
log.Printf("scanning: get diff for repo %s: %v", repo.Name, err)
|
|
return
|
|
}
|
|
|
|
// Determine the commit SHA for the findings.
|
|
headSHA := evt.After
|
|
|
|
now := time.Now().UTC()
|
|
|
|
for _, p := range s.patterns {
|
|
matches := p.re.FindAllString(string(diffContent), -1)
|
|
for _, match := range matches {
|
|
// Skip very short matches (likely false positives).
|
|
if len(match) < 6 {
|
|
continue
|
|
}
|
|
|
|
leak := &models.SecretLeak{
|
|
RepoID: evt.RepoID,
|
|
CommitSHA: headSHA[:12],
|
|
Ref: evt.Ref,
|
|
PatternName: p.Name,
|
|
Description: p.Description,
|
|
Severity: p.Severity,
|
|
MatchSample: truncate(match, 40),
|
|
DetectedAt: now,
|
|
}
|
|
if _, err := s.db.Insert(leak); err != nil {
|
|
log.Printf("scanning: insert leak for %s: %v", repo.Name, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// getDiff returns the unified diff of all changes between two refs.
|
|
func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) {
|
|
// If oldRef is the zero OID (new branch), diff-tree against the empty tree so
|
|
// we get actual file contents rather than ls-tree metadata.
|
|
zeroOID := "0000000000000000000000000000000000000000"
|
|
if oldRef == zeroOID {
|
|
out, err := gitdomain.Run(repoPath, "diff-tree", "--no-commit-id", "-r", "-p", newRef)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// ListFindings returns all active secret leaks for a repo, newest first.
|
|
func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) {
|
|
var leaks []models.SecretLeak
|
|
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
|
|
OrderBy("detected_at DESC").Find(&leaks); err != nil {
|
|
return nil, err
|
|
}
|
|
if leaks == nil {
|
|
leaks = []models.SecretLeak{}
|
|
}
|
|
return leaks, nil
|
|
}
|
|
|
|
// DismissFindings acknowledges a leak so it no longer appears in active lists.
|
|
func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error {
|
|
now := time.Now().UTC()
|
|
affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at").
|
|
Update(&models.SecretLeak{
|
|
Dismissed: true,
|
|
DismissedBy: dismissedBy,
|
|
DismissedAt: &now,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if affected == 0 {
|
|
return fmt.Errorf("leak %d not found", leakID)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// truncate shortens a string to maxLen characters for safe display.
|
|
func truncate(s string, maxLen int) string {
|
|
if len(s) <= maxLen {
|
|
return s
|
|
}
|
|
return s[:maxLen] + "..."
|
|
}
|