added artifacts
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
package scanning
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"xorm.io/xorm"
|
||||
|
||||
"github.com/forgeo/forgebucket/internal/events"
|
||||
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
|
||||
"github.com/forgeo/forgebucket/internal/models"
|
||||
)
|
||||
|
||||
// compiledPattern is a pre-compiled regex pattern.
|
||||
type compiledPattern struct {
|
||||
pattern
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
// Scanner subscribes to push.received and scans commit content for secrets.
|
||||
type Scanner struct {
|
||||
db *xorm.Engine
|
||||
bus events.EventBus
|
||||
patterns []compiledPattern
|
||||
}
|
||||
|
||||
// New creates a Scanner with all patterns pre-compiled.
|
||||
func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) {
|
||||
cp := make([]compiledPattern, 0, len(Patterns))
|
||||
for _, p := range Patterns {
|
||||
re, err := regexp.Compile(p.Raw)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err)
|
||||
}
|
||||
cp = append(cp, compiledPattern{pattern: p, re: re})
|
||||
}
|
||||
return &Scanner{db: db, bus: bus, patterns: cp}, nil
|
||||
}
|
||||
|
||||
// Start subscribes to push.received and blocks until ctx is cancelled.
|
||||
func (s *Scanner) Start(ctx context.Context) {
|
||||
unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
|
||||
var evt events.PushEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
log.Printf("scanning: bad push event: %v", err)
|
||||
return
|
||||
}
|
||||
go s.scanPush(evt)
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("scanning: subscribe: %v", err)
|
||||
} else {
|
||||
defer unsub()
|
||||
}
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
// scanPush scans the diff between before and after for all patterns.
|
||||
func (s *Scanner) scanPush(evt events.PushEvent) {
|
||||
// Branch deletion — nothing to scan.
|
||||
zeroOID := "0000000000000000000000000000000000000000"
|
||||
if evt.After == zeroOID {
|
||||
return
|
||||
}
|
||||
|
||||
// Resolve repo.
|
||||
var repo models.Repository
|
||||
if found, _ := s.db.ID(evt.RepoID).Get(&repo); !found {
|
||||
return
|
||||
}
|
||||
|
||||
// Get the diff content between before and after.
|
||||
diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After)
|
||||
if err != nil {
|
||||
log.Printf("scanning: get diff for repo %s: %v", repo.Name, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Determine the commit SHA for the findings.
|
||||
headSHA := evt.After
|
||||
|
||||
now := time.Now().UTC()
|
||||
|
||||
for _, p := range s.patterns {
|
||||
matches := p.re.FindAllString(string(diffContent), -1)
|
||||
for _, match := range matches {
|
||||
// Skip very short matches (likely false positives).
|
||||
if len(match) < 6 {
|
||||
continue
|
||||
}
|
||||
|
||||
leak := &models.SecretLeak{
|
||||
RepoID: evt.RepoID,
|
||||
CommitSHA: headSHA[:12],
|
||||
Ref: evt.Ref,
|
||||
PatternName: p.Name,
|
||||
Description: p.Description,
|
||||
Severity: p.Severity,
|
||||
MatchSample: truncate(match, 40),
|
||||
DetectedAt: now,
|
||||
}
|
||||
if _, err := s.db.Insert(leak); err != nil {
|
||||
log.Printf("scanning: insert leak for %s: %v", repo.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getDiff returns the unified diff of all changes between two refs.
|
||||
func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) {
|
||||
// If oldRef is the zero OID (new branch), just get the initial commit content.
|
||||
zeroOID := "0000000000000000000000000000000000000000"
|
||||
if oldRef == zeroOID {
|
||||
// Show the entire tree at the new ref.
|
||||
out, err := gitdomain.Run(repoPath, "ls-tree", "-r", newRef)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ListFindings returns all active secret leaks for a repo, newest first.
|
||||
func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) {
|
||||
var leaks []models.SecretLeak
|
||||
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
|
||||
OrderBy("detected_at DESC").Find(&leaks); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if leaks == nil {
|
||||
leaks = []models.SecretLeak{}
|
||||
}
|
||||
return leaks, nil
|
||||
}
|
||||
|
||||
// DismissFindings acknowledges a leak so it no longer appears in active lists.
|
||||
func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error {
|
||||
now := time.Now().UTC()
|
||||
affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at").
|
||||
Update(&models.SecretLeak{
|
||||
Dismissed: true,
|
||||
DismissedBy: dismissedBy,
|
||||
DismissedAt: &now,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if affected == 0 {
|
||||
return fmt.Errorf("leak %d not found", leakID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// truncate shortens a string to maxLen characters for safe display.
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
package scanning
|
||||
|
||||
// pattern holds a compiled regex-like pattern string and its metadata.
|
||||
// We use raw string patterns rather than importing regexp for each check;
|
||||
// the Scanner compiles all patterns once at startup.
|
||||
type pattern struct {
|
||||
Name string
|
||||
Description string
|
||||
Raw string // the regex pattern (compiled at init)
|
||||
Severity string // "high", "medium", "low"
|
||||
}
|
||||
|
||||
// Patterns is the list of secret patterns checked against every pushed commit.
|
||||
// Patterns are ordered by severity — high first.
|
||||
var Patterns = []pattern{
|
||||
{
|
||||
Name: "aws-access-key-id",
|
||||
Description: "AWS Access Key ID",
|
||||
Raw: `AKIA[0-9A-Z]{16}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "aws-secret-key",
|
||||
Description: "AWS Secret Access Key",
|
||||
Raw: `(?i)aws[_-]?(secret|private)[_-]?(access[_-]?)?key['"]?\s*[:=]\s*['"]?[A-Za-z0-9\/+=]{40}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "github-token",
|
||||
Description: "GitHub Personal Access Token",
|
||||
Raw: `gh[pousr]_[A-Za-z0-9_]{36,}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "gitlab-token",
|
||||
Description: "GitLab Personal Access Token",
|
||||
Raw: `glpat-[A-Za-z0-9\-_]{20,}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "generic-api-key",
|
||||
Description: "Generic API key assignment (high entropy)",
|
||||
Raw: `(?i)(api[_-]?key|apikey|api[_-]?secret|api[_-]?token)['"]?\s*[:=]\s*['"][A-Za-z0-9_\-\.]{20,64}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "bearer-token",
|
||||
Description: "Bearer token in HTTP header",
|
||||
Raw: `(?i)authorization:\s*bearer\s+[A-Za-z0-9_\-\.]{20,}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "slack-token",
|
||||
Description: "Slack Bot / Webhook token",
|
||||
Raw: `xox[baprs]-[A-Za-z0-9\-]{10,}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "google-api-key",
|
||||
Description: "Google API Key",
|
||||
Raw: `AIza[0-9A-Za-z\-_]{35}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "google-service-account",
|
||||
Description: "Google Service Account",
|
||||
Raw: `[0-9]+-[0-9a-z]{32}\.apps\.googleusercontent\.com`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "ssh-private-key",
|
||||
Description: "SSH / TLS private key embed",
|
||||
Raw: `-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "jwt-token",
|
||||
Description: "JSON Web Token (JWT)",
|
||||
Raw: `eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`,
|
||||
Severity: "medium",
|
||||
},
|
||||
{
|
||||
Name: "generic-password",
|
||||
Description: "Generic password/secret field assignment",
|
||||
Raw: `(?i)(password|passwd|pwd|secret)['"]?\s*[:=]\s*['"][A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?]{8,}`,
|
||||
Severity: "medium",
|
||||
},
|
||||
{
|
||||
Name: "npm-token",
|
||||
Description: "npm access token",
|
||||
Raw: `npm_[A-Za-z0-9]{36,}`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "pg-connection-string",
|
||||
Description: "PostgreSQL connection string",
|
||||
Raw: `postgres(ql)?://[A-Za-z0-9_]+:[^@\s]+@`,
|
||||
Severity: "high",
|
||||
},
|
||||
{
|
||||
Name: "redis-connection-string",
|
||||
Description: "Redis connection string with password",
|
||||
Raw: `redis://[^:@\s]+:[^@\s]+@`,
|
||||
Severity: "high",
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
package scanning
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPatternsCompile(t *testing.T) {
|
||||
for _, p := range Patterns {
|
||||
_, err := regexp.Compile(p.Raw)
|
||||
if err != nil {
|
||||
t.Errorf("pattern %q failed to compile: %v", p.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPatternsHaveNames(t *testing.T) {
|
||||
for _, p := range Patterns {
|
||||
if p.Name == "" {
|
||||
t.Error("pattern with empty name")
|
||||
}
|
||||
if p.Description == "" {
|
||||
t.Errorf("pattern %q has empty description", p.Name)
|
||||
}
|
||||
if p.Severity != "high" && p.Severity != "medium" && p.Severity != "low" {
|
||||
t.Errorf("pattern %q has invalid severity %q", p.Name, p.Severity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAWSAccessKey(t *testing.T) {
|
||||
re := regexp.MustCompile(`AKIA[0-9A-Z]{16}`)
|
||||
cases := []struct {
|
||||
input string
|
||||
match bool
|
||||
}{
|
||||
{"AKIAIOSFODNN7EXAMPLE", true},
|
||||
{"AKIA1234567890123456", true},
|
||||
{"not-a-key", false},
|
||||
{"SKIA1234567890123456", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := re.MatchString(tc.input)
|
||||
if got != tc.match {
|
||||
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitHubToken(t *testing.T) {
|
||||
re := regexp.MustCompile(`gh[pousr]_[A-Za-z0-9_]{36,}`)
|
||||
cases := []struct {
|
||||
input string
|
||||
match bool
|
||||
}{
|
||||
{"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
|
||||
{"gho_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
|
||||
{"ghu_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
|
||||
{"ghs_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
|
||||
{"ghr_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
|
||||
{"not-a-token", false},
|
||||
{"ghp_short", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := re.MatchString(tc.input)
|
||||
if got != tc.match {
|
||||
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrivateKey(t *testing.T) {
|
||||
re := regexp.MustCompile(`-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`)
|
||||
cases := []struct {
|
||||
input string
|
||||
match bool
|
||||
}{
|
||||
{"-----BEGIN RSA PRIVATE KEY-----", true},
|
||||
{"-----BEGIN EC PRIVATE KEY-----", true},
|
||||
{"-----BEGIN OPENSSH PRIVATE KEY-----", true},
|
||||
{"-----BEGIN DSA PRIVATE KEY-----", true},
|
||||
{"-----BEGIN PRIVATE KEY-----", true},
|
||||
{"-----BEGIN CERTIFICATE-----", false},
|
||||
{"public key is here", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := re.MatchString(tc.input)
|
||||
if got != tc.match {
|
||||
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestJWT(t *testing.T) {
|
||||
re := regexp.MustCompile(`eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`)
|
||||
cases := []struct {
|
||||
input string
|
||||
match bool
|
||||
}{
|
||||
{"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNnZctV9XjvP_oGZQZxGdAqVxQ", true},
|
||||
{"not-a-jwt", false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := re.MatchString(tc.input)
|
||||
if got != tc.match {
|
||||
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncate(t *testing.T) {
|
||||
if truncate("hello", 10) != "hello" {
|
||||
t.Error("should not truncate short strings")
|
||||
}
|
||||
if truncate("hello world this is long", 10) != "hello worl..." {
|
||||
t.Errorf("got %q", truncate("hello world this is long", 10))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user