added artifacts

This commit is contained in:
2026-05-12 22:34:26 +02:00
parent 822f723ff1
commit 91462500f0
30 changed files with 2769 additions and 4 deletions
+170
View File
@@ -0,0 +1,170 @@
package scanning
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/events"
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
"github.com/forgeo/forgebucket/internal/models"
)
// compiledPattern is a pre-compiled regex pattern.
type compiledPattern struct {
pattern
re *regexp.Regexp
}
// Scanner subscribes to push.received and scans commit content for secrets.
type Scanner struct {
db *xorm.Engine
bus events.EventBus
patterns []compiledPattern
}
// New creates a Scanner with all patterns pre-compiled.
func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) {
cp := make([]compiledPattern, 0, len(Patterns))
for _, p := range Patterns {
re, err := regexp.Compile(p.Raw)
if err != nil {
return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err)
}
cp = append(cp, compiledPattern{pattern: p, re: re})
}
return &Scanner{db: db, bus: bus, patterns: cp}, nil
}
// Start subscribes to push.received and blocks until ctx is cancelled.
func (s *Scanner) Start(ctx context.Context) {
unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
var evt events.PushEvent
if err := json.Unmarshal(data, &evt); err != nil {
log.Printf("scanning: bad push event: %v", err)
return
}
go s.scanPush(evt)
})
if err != nil {
log.Printf("scanning: subscribe: %v", err)
} else {
defer unsub()
}
<-ctx.Done()
}
// scanPush scans the diff between before and after for all patterns.
func (s *Scanner) scanPush(evt events.PushEvent) {
// Branch deletion — nothing to scan.
zeroOID := "0000000000000000000000000000000000000000"
if evt.After == zeroOID {
return
}
// Resolve repo.
var repo models.Repository
if found, _ := s.db.ID(evt.RepoID).Get(&repo); !found {
return
}
// Get the diff content between before and after.
diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After)
if err != nil {
log.Printf("scanning: get diff for repo %s: %v", repo.Name, err)
return
}
// Determine the commit SHA for the findings.
headSHA := evt.After
now := time.Now().UTC()
for _, p := range s.patterns {
matches := p.re.FindAllString(string(diffContent), -1)
for _, match := range matches {
// Skip very short matches (likely false positives).
if len(match) < 6 {
continue
}
leak := &models.SecretLeak{
RepoID: evt.RepoID,
CommitSHA: headSHA[:12],
Ref: evt.Ref,
PatternName: p.Name,
Description: p.Description,
Severity: p.Severity,
MatchSample: truncate(match, 40),
DetectedAt: now,
}
if _, err := s.db.Insert(leak); err != nil {
log.Printf("scanning: insert leak for %s: %v", repo.Name, err)
}
}
}
}
// getDiff returns the unified diff of all changes between two refs.
func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) {
// If oldRef is the zero OID (new branch), just get the initial commit content.
zeroOID := "0000000000000000000000000000000000000000"
if oldRef == zeroOID {
// Show the entire tree at the new ref.
out, err := gitdomain.Run(repoPath, "ls-tree", "-r", newRef)
if err != nil {
return nil, err
}
return out, nil
}
out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef)
if err != nil {
return nil, err
}
return out, nil
}
// ListFindings returns all active secret leaks for a repo, newest first.
func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) {
var leaks []models.SecretLeak
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
OrderBy("detected_at DESC").Find(&leaks); err != nil {
return nil, err
}
if leaks == nil {
leaks = []models.SecretLeak{}
}
return leaks, nil
}
// DismissFindings acknowledges a leak so it no longer appears in active lists.
func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error {
now := time.Now().UTC()
affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at").
Update(&models.SecretLeak{
Dismissed: true,
DismissedBy: dismissedBy,
DismissedAt: &now,
})
if err != nil {
return err
}
if affected == 0 {
return fmt.Errorf("leak %d not found", leakID)
}
return nil
}
// truncate shortens a string to maxLen characters for safe display.
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
+106
View File
@@ -0,0 +1,106 @@
package scanning
// pattern holds a compiled regex-like pattern string and its metadata.
// We use raw string patterns rather than importing regexp for each check;
// the Scanner compiles all patterns once at startup.
type pattern struct {
Name string
Description string
Raw string // the regex pattern (compiled at init)
Severity string // "high", "medium", "low"
}
// Patterns is the list of secret patterns checked against every pushed commit.
// Patterns are ordered by severity — high first.
var Patterns = []pattern{
{
Name: "aws-access-key-id",
Description: "AWS Access Key ID",
Raw: `AKIA[0-9A-Z]{16}`,
Severity: "high",
},
{
Name: "aws-secret-key",
Description: "AWS Secret Access Key",
Raw: `(?i)aws[_-]?(secret|private)[_-]?(access[_-]?)?key['"]?\s*[:=]\s*['"]?[A-Za-z0-9\/+=]{40}`,
Severity: "high",
},
{
Name: "github-token",
Description: "GitHub Personal Access Token",
Raw: `gh[pousr]_[A-Za-z0-9_]{36,}`,
Severity: "high",
},
{
Name: "gitlab-token",
Description: "GitLab Personal Access Token",
Raw: `glpat-[A-Za-z0-9\-_]{20,}`,
Severity: "high",
},
{
Name: "generic-api-key",
Description: "Generic API key assignment (high entropy)",
Raw: `(?i)(api[_-]?key|apikey|api[_-]?secret|api[_-]?token)['"]?\s*[:=]\s*['"][A-Za-z0-9_\-\.]{20,64}`,
Severity: "high",
},
{
Name: "bearer-token",
Description: "Bearer token in HTTP header",
Raw: `(?i)authorization:\s*bearer\s+[A-Za-z0-9_\-\.]{20,}`,
Severity: "high",
},
{
Name: "slack-token",
Description: "Slack Bot / Webhook token",
Raw: `xox[baprs]-[A-Za-z0-9\-]{10,}`,
Severity: "high",
},
{
Name: "google-api-key",
Description: "Google API Key",
Raw: `AIza[0-9A-Za-z\-_]{35}`,
Severity: "high",
},
{
Name: "google-service-account",
Description: "Google Service Account",
Raw: `[0-9]+-[0-9a-z]{32}\.apps\.googleusercontent\.com`,
Severity: "high",
},
{
Name: "ssh-private-key",
Description: "SSH / TLS private key embed",
Raw: `-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`,
Severity: "high",
},
{
Name: "jwt-token",
Description: "JSON Web Token (JWT)",
Raw: `eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`,
Severity: "medium",
},
{
Name: "generic-password",
Description: "Generic password/secret field assignment",
Raw: `(?i)(password|passwd|pwd|secret)['"]?\s*[:=]\s*['"][A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?]{8,}`,
Severity: "medium",
},
{
Name: "npm-token",
Description: "npm access token",
Raw: `npm_[A-Za-z0-9]{36,}`,
Severity: "high",
},
{
Name: "pg-connection-string",
Description: "PostgreSQL connection string",
Raw: `postgres(ql)?://[A-Za-z0-9_]+:[^@\s]+@`,
Severity: "high",
},
{
Name: "redis-connection-string",
Description: "Redis connection string with password",
Raw: `redis://[^:@\s]+:[^@\s]+@`,
Severity: "high",
},
}
+118
View File
@@ -0,0 +1,118 @@
package scanning
import (
"regexp"
"testing"
)
func TestPatternsCompile(t *testing.T) {
for _, p := range Patterns {
_, err := regexp.Compile(p.Raw)
if err != nil {
t.Errorf("pattern %q failed to compile: %v", p.Name, err)
}
}
}
func TestPatternsHaveNames(t *testing.T) {
for _, p := range Patterns {
if p.Name == "" {
t.Error("pattern with empty name")
}
if p.Description == "" {
t.Errorf("pattern %q has empty description", p.Name)
}
if p.Severity != "high" && p.Severity != "medium" && p.Severity != "low" {
t.Errorf("pattern %q has invalid severity %q", p.Name, p.Severity)
}
}
}
func TestAWSAccessKey(t *testing.T) {
re := regexp.MustCompile(`AKIA[0-9A-Z]{16}`)
cases := []struct {
input string
match bool
}{
{"AKIAIOSFODNN7EXAMPLE", true},
{"AKIA1234567890123456", true},
{"not-a-key", false},
{"SKIA1234567890123456", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestGitHubToken(t *testing.T) {
re := regexp.MustCompile(`gh[pousr]_[A-Za-z0-9_]{36,}`)
cases := []struct {
input string
match bool
}{
{"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"gho_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghu_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghs_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghr_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"not-a-token", false},
{"ghp_short", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestPrivateKey(t *testing.T) {
re := regexp.MustCompile(`-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`)
cases := []struct {
input string
match bool
}{
{"-----BEGIN RSA PRIVATE KEY-----", true},
{"-----BEGIN EC PRIVATE KEY-----", true},
{"-----BEGIN OPENSSH PRIVATE KEY-----", true},
{"-----BEGIN DSA PRIVATE KEY-----", true},
{"-----BEGIN PRIVATE KEY-----", true},
{"-----BEGIN CERTIFICATE-----", false},
{"public key is here", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestJWT(t *testing.T) {
re := regexp.MustCompile(`eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`)
cases := []struct {
input string
match bool
}{
{"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNnZctV9XjvP_oGZQZxGdAqVxQ", true},
{"not-a-jwt", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestTruncate(t *testing.T) {
if truncate("hello", 10) != "hello" {
t.Error("should not truncate short strings")
}
if truncate("hello world this is long", 10) != "hello worl..." {
t.Errorf("got %q", truncate("hello world this is long", 10))
}
}