added artifacts

This commit is contained in:
2026-05-12 22:34:26 +02:00
parent 822f723ff1
commit 91462500f0
30 changed files with 2769 additions and 4 deletions
+224
View File
@@ -0,0 +1,224 @@
package federation
import (
"encoding/json"
"fmt"
"log"
"strings"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/models"
)
// RepoAPID returns the ActivityPub actor ID for a repository.
// Format: {instanceURL}/repos/{owner}/{name}
func RepoAPID(instanceURL, owner, name string) string {
return strings.TrimRight(instanceURL, "/") + "/repos/" + owner + "/" + name
}
// RepoActorJSON builds the JSON-LD actor document for a ForgeFed Repository actor.
func RepoActorJSON(owner, name, description, instanceURL string) map[string]any {
apid := RepoAPID(instanceURL, owner, name)
return map[string]any{
"@context": []any{
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
map[string]string{
"Repository": "https://www.w3.org/ns/activitystreams#Repository",
},
},
"id": apid,
"type": "Repository",
"preferredUsername": name,
"name": owner + "/" + name,
"summary": description,
"inbox": apid + "/inbox",
"outbox": apid + "/outbox",
"followers": apid + "/followers",
"following": apid + "/following",
}
}
// HandleCreatePullRequest processes an incoming Create activity whose object
// is a PullRequest (per the ForgeFed vocabulary). It creates a local PR record
// in the target repository for the cross-instance proposal.
func HandleCreatePullRequest(db *xorm.Engine, body []byte, instanceURL string) error {
var activity struct {
Actor string `json:"actor"`
Object struct {
Type string `json:"type"`
Summary string `json:"summary"`
Content string `json:"content"`
Source *struct {
ID string `json:"id"`
Name string `json:"name"`
} `json:"source"`
Target *struct {
ID string `json:"id"`
Name string `json:"name"`
} `json:"target"`
} `json:"object"`
}
if err := json.Unmarshal(body, &activity); err != nil {
return fmt.Errorf("parse activity: %w", err)
}
if activity.Object.Type != "PullRequest" {
return nil
}
// Extract target repository info from the object's target.
targetID := activity.Object.Target.ID
targetParts := strings.Split(strings.TrimRight(targetID, "/"), "/")
if len(targetParts) < 2 {
return fmt.Errorf("cannot parse target repo APID: %s", targetID)
}
// Last two segments should be owner/repo-name.
repoOwner := targetParts[len(targetParts)-2]
repoName := targetParts[len(targetParts)-1]
// Resolve the target repository.
var repo models.Repository
found, err := db.Where("name = ?", repoName).
Join("INNER", "user", "repository.owner_id = user.id AND user.username = ?", repoOwner).
Get(&repo)
if err != nil {
return fmt.Errorf("database error: %w", err)
}
if !found {
return fmt.Errorf("target repo %s/%s not found on this instance", repoOwner, repoName)
}
// Resolve or create a FederationActor for the repo owner (needed for key ops).
var ownerUser models.User
if found, _ := db.Where("username = ?", repoOwner).Get(&ownerUser); !found {
return fmt.Errorf("owner user %s not found", repoOwner)
}
localActor, err := GetOrCreate(db, ownerUser.ID, repoOwner, instanceURL)
if err != nil {
return fmt.Errorf("get actor: %w", err)
}
// Determine the PR title and body.
title := activity.Object.Summary
if title == "" {
title = fmt.Sprintf("Cross-instance PR from %s", activity.Actor)
}
bodyContent := activity.Object.Content
if bodyContent == "" {
bodyContent = fmt.Sprintf("Pull request proposed via ActivityPub from %s", activity.Actor)
}
// Create the PR. For cross-instance PRs, authorID is set to the target
// repo owner (we can't create a user for the remote actor automatically).
// The RemoteSource field records the source repository APID.
pr := &models.PullRequest{
RepoID: repo.ID,
AuthorID: ownerUser.ID,
Title: title,
Body: bodyContent,
SourceBranch: "refs/for/main",
TargetBranch: "main",
Status: models.PRStatusOpen,
RemoteSource: activity.Actor,
}
// Try to extract source branch from the source repo.
if activity.Object.Source != nil {
sourceID := activity.Object.Source.ID
if sourceID != "" {
pr.RemoteSource = sourceID
}
if activity.Object.Source.Name != "" {
pr.SourceBranch = activity.Object.Source.Name
}
}
if _, err := db.Insert(pr); err != nil {
return fmt.Errorf("insert PR: %w", err)
}
// Persist the outbound Accept for the PR activity so the remote knows
// we received it (we auto-accept all incoming PRs).
accept := map[string]any{
"@context": "https://www.w3.org/ns/activitystreams",
"id": localActor.APID + "/activities/accept-pr-" + fmt.Sprint(time.Now().UnixNano()),
"type": "Accept",
"actor": localActor.APID,
}
acceptJSON, _ := json.Marshal(accept)
db.Insert(&models.FederationActivity{ //nolint:errcheck
ActorAPID: localActor.APID,
Type: "Accept",
ObjectJSON: string(acceptJSON),
Direction: "outbound",
RemoteActor: activity.Actor,
Published: time.Now().UTC(),
})
log.Printf("forgefed: created PR %d from cross-instance actor %s", pr.ID, activity.Actor)
return nil
}
// SendCreatePullRequest delivers a Create(PullRequest) activity to a remote
// instance's inbox. The remote inbox URL is derived from the forked-from repo's
// APID by appending /inbox.
func SendCreatePullRequest(db *xorm.Engine, localActor *models.FederationActor, pr *models.PullRequest, remoteAPID, instanceURL string) error {
// Build the Create(PullRequest) activity.
activity := map[string]any{
"@context": "https://www.w3.org/ns/activitystreams",
"id": localActor.APID + "/activities/create-pr-" + fmt.Sprint(time.Now().UnixNano()),
"type": "Create",
"actor": localActor.APID,
"object": map[string]any{
"type": "PullRequest",
"id": localActor.APID + "/pull-requests/" + fmt.Sprint(pr.ID),
"summary": pr.Title,
"content": pr.Body,
"source": map[string]any{
"type": "Repository",
"id": localActor.APID,
},
"target": map[string]any{
"type": "Repository",
"id": remoteAPID,
},
},
"to": []string{remoteAPID + "/inbox", "https://www.w3.org/ns/activitystreams#Public"},
}
remoteInbox := strings.TrimSuffix(remoteAPID, "/") + "/inbox"
if err := DeliverActivity(localActor, activity, remoteInbox); err != nil {
return fmt.Errorf("deliver PR to %s: %w", remoteInbox, err)
}
actJSON, _ := json.Marshal(activity)
db.Insert(&models.FederationActivity{ //nolint:errcheck
ActorAPID: localActor.APID,
Type: "Create",
ObjectJSON: string(actJSON),
Direction: "outbound",
RemoteActor: remoteAPID,
Published: time.Now().UTC(),
})
log.Printf("forgefed: sent Create(PullRequest) for PR %d to %s", pr.ID, remoteInbox)
return nil
}
// IsCreatePullRequest checks whether the given body is a Create(PullRequest) activity.
func IsCreatePullRequest(body []byte) bool {
var check struct {
Type string `json:"type"`
Object struct {
Type string `json:"type"`
} `json:"object"`
}
if err := json.Unmarshal(body, &check); err != nil {
return false
}
return check.Type == "Create" && check.Object.Type == "PullRequest"
}
+100
View File
@@ -0,0 +1,100 @@
package federation
import (
"testing"
)
func TestRepoAPID(t *testing.T) {
apid := RepoAPID("https://example.com", "alice", "myrepo")
expected := "https://example.com/repos/alice/myrepo"
if apid != expected {
t.Errorf("got %q, want %q", apid, expected)
}
}
func TestRepoAPID_TrailingSlash(t *testing.T) {
apid := RepoAPID("https://example.com/", "bob", "app")
expected := "https://example.com/repos/bob/app"
if apid != expected {
t.Errorf("got %q, want %q", apid, expected)
}
}
func TestRepoActorJSON(t *testing.T) {
doc := RepoActorJSON("alice", "myrepo", "A cool repo", "https://example.com")
if doc["type"] != "Repository" {
t.Errorf("type = %v, want Repository", doc["type"])
}
if doc["preferredUsername"] != "myrepo" {
t.Errorf("preferredUsername = %v", doc["preferredUsername"])
}
if doc["name"] != "alice/myrepo" {
t.Errorf("name = %v", doc["name"])
}
if doc["summary"] != "A cool repo" {
t.Errorf("summary = %v", doc["summary"])
}
inbox, ok := doc["inbox"].(string)
if !ok || inbox != "https://example.com/repos/alice/myrepo/inbox" {
t.Errorf("inbox = %v", inbox)
}
outbox, ok := doc["outbox"].(string)
if !ok || outbox != "https://example.com/repos/alice/myrepo/outbox" {
t.Errorf("outbox = %v", outbox)
}
}
func TestIsCreatePullRequest(t *testing.T) {
tests := []struct {
name string
body []byte
want bool
}{
{
name: "valid Create(PullRequest)",
body: []byte(`{"type":"Create","object":{"type":"PullRequest","summary":"fix bug"}}`),
want: true,
},
{
name: "Create with non-PR object",
body: []byte(`{"type":"Create","object":{"type":"Note"}}`),
want: false,
},
{
name: "Follow activity",
body: []byte(`{"type":"Follow","object":"https://example.com/users/alice"}`),
want: false,
},
{
name: "invalid JSON",
body: []byte(`not json`),
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsCreatePullRequest(tt.body); got != tt.want {
t.Errorf("IsCreatePullRequest() = %v, want %v", got, tt.want)
}
})
}
}
func TestExtractInstanceURL(t *testing.T) {
tests := []struct {
apid string
want string
}{
{"https://example.com/users/alice", "https://example.com"},
{"http://localhost:8080/users/bob", "http://localhost:8080"},
{"https://forge.example.org/users/charlie", "https://forge.example.org"},
}
for _, tt := range tests {
t.Run(tt.apid, func(t *testing.T) {
if got := extractInstanceURL(tt.apid); got != tt.want {
t.Errorf("extractInstanceURL() = %q, want %q", got, tt.want)
}
})
}
}
+21
View File
@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"log"
"strings"
"time"
"xorm.io/xorm"
@@ -39,6 +40,16 @@ func Receive(db *xorm.Engine, localActor *models.FederationActor, body []byte) e
handleAccept(db, localActor, activity)
case "Undo":
handleUndo(db, localActor, activity)
case "Create":
if IsCreatePullRequest(body) {
// Derive instanceURL from the local actor's APID.
instanceURL := extractInstanceURL(localActor.APID)
if err := HandleCreatePullRequest(db, body, instanceURL); err != nil {
log.Printf("federation: handle Create(PullRequest): %v", err)
}
} else {
log.Printf("federation: received Create activity from %s (non-PR, skipped)", actorAPID)
}
default:
log.Printf("federation: received unhandled activity type %q from %s", actType, actorAPID)
}
@@ -111,3 +122,13 @@ func mustJSON(v any) string {
b, _ := json.Marshal(v)
return string(b)
}
func extractInstanceURL(apid string) string {
// apid is like "https://example.com/users/alice"
// Return "https://example.com"
parts := strings.SplitN(apid, "/", 4)
if len(parts) >= 3 {
return parts[0] + "//" + parts[2]
}
return apid
}
+8
View File
@@ -283,6 +283,14 @@ func RepoSize(repoPath string) int64 {
return total
}
// Run executes a git command in repoPath with discrete arguments and returns
// the raw stdout. WARNING: args must be constant literals or strictly validated
// — no user-controlled values belong here. This is the public equivalent of the
// internal run() helper and carries the same safety guarantees.
func Run(repoPath string, args ...string) ([]byte, error) {
return run(repoPath, args...)
}
// RevParse resolves a ref (branch name, tag, or SHA) to its full commit SHA.
func RevParse(repoPath, ref string) (string, error) {
out, err := run(repoPath, "rev-parse", "--verify", ref)
+374
View File
@@ -0,0 +1,374 @@
// Package oci implements an OCI Distribution Specification v1.1 registry.
// https://github.com/opencontainers/distribution-spec/blob/main/spec.md
//
// Storage layout under ociRoot:
//
// blobs/sha256/<hex64> — content-addressable layer/config blobs
// uploads/<uuid> — temporary files for in-progress chunked uploads
package oci
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
// Registry manages the on-disk blob store and is used by the HTTP handler.
type Registry struct {
root string // absolute path to the OCI storage root
}
// New creates a Registry rooted at ociRoot, creating the directory tree if needed.
func New(ociRoot string) (*Registry, error) {
for _, sub := range []string{"blobs/sha256", "uploads"} {
if err := os.MkdirAll(filepath.Join(ociRoot, sub), 0755); err != nil {
return nil, fmt.Errorf("oci: init storage %s: %w", sub, err)
}
}
return &Registry{root: ociRoot}, nil
}
// Root returns the storage root path.
func (r *Registry) Root() string { return r.root }
// ─── Blob paths ───────────────────────────────────────────────────────────────
// BlobPath returns the filesystem path for a blob identified by its digest.
// digest must be in the form "sha256:<hex>".
func (r *Registry) BlobPath(digest string) (string, error) {
hex, err := digestHex(digest)
if err != nil {
return "", err
}
return filepath.Join(r.root, "blobs", "sha256", hex), nil
}
// UploadPath returns the filesystem path for a chunked upload session.
func (r *Registry) UploadPath(uploadID string) string {
return filepath.Join(r.root, "uploads", sanitiseID(uploadID))
}
// BlobExists reports whether a blob with the given digest exists on disk.
func (r *Registry) BlobExists(digest string) bool {
p, err := r.BlobPath(digest)
if err != nil {
return false
}
_, err = os.Stat(p)
return err == nil
}
// BlobSize returns the size of the blob in bytes, or -1 if it doesn't exist.
func (r *Registry) BlobSize(digest string) int64 {
p, err := r.BlobPath(digest)
if err != nil {
return -1
}
info, err := os.Stat(p)
if err != nil {
return -1
}
return info.Size()
}
// ReadBlob opens a blob for streaming. Caller must close the returned file.
func (r *Registry) ReadBlob(digest string) (*os.File, error) {
p, err := r.BlobPath(digest)
if err != nil {
return nil, err
}
return os.Open(p)
}
// WriteBlob writes src into the blob store, verifies the digest, and returns
// the computed digest string ("sha256:<hex>") and size.
// If a blob with the same digest already exists it is not overwritten.
func (r *Registry) WriteBlob(src io.Reader) (digest string, size int64, err error) {
tmp, err := os.CreateTemp(filepath.Join(r.root, "uploads"), "blob-*")
if err != nil {
return "", 0, fmt.Errorf("oci: create tmp blob: %w", err)
}
tmpPath := tmp.Name()
defer func() {
tmp.Close()
if err != nil {
os.Remove(tmpPath)
}
}()
h := sha256.New()
mw := io.MultiWriter(tmp, h)
size, err = io.Copy(mw, src)
if err != nil {
return "", 0, fmt.Errorf("oci: write blob: %w", err)
}
tmp.Close()
digest = "sha256:" + hex.EncodeToString(h.Sum(nil))
dest, err2 := r.BlobPath(digest)
if err2 != nil {
os.Remove(tmpPath)
return "", 0, err2
}
if _, statErr := os.Stat(dest); statErr == nil {
// Already exists — deduplication.
os.Remove(tmpPath)
return digest, size, nil
}
if err = os.Rename(tmpPath, dest); err != nil {
return "", 0, fmt.Errorf("oci: commit blob: %w", err)
}
return digest, size, nil
}
// FinishUpload finalises a chunked upload: reads the temp file, verifies
// clientDigest (if non-empty), atomically moves it to the blob store, and
// returns the canonical digest and size.
func (r *Registry) FinishUpload(uploadID, clientDigest string) (digest string, size int64, err error) {
src := r.UploadPath(uploadID)
f, err := os.Open(src)
if err != nil {
return "", 0, fmt.Errorf("oci: open upload: %w", err)
}
h := sha256.New()
size, err = io.Copy(h, f)
f.Close()
if err != nil {
return "", 0, fmt.Errorf("oci: hash upload: %w", err)
}
digest = "sha256:" + hex.EncodeToString(h.Sum(nil))
if clientDigest != "" && clientDigest != digest {
os.Remove(src)
return "", 0, &DigestMismatch{Expected: clientDigest, Actual: digest}
}
dest, err := r.BlobPath(digest)
if err != nil {
return "", 0, err
}
if _, statErr := os.Stat(dest); statErr == nil {
// Blob already exists — dedup.
os.Remove(src)
return digest, size, nil
}
if err = os.Rename(src, dest); err != nil {
return "", 0, fmt.Errorf("oci: commit upload: %w", err)
}
return digest, size, nil
}
// AppendUpload appends src to an existing upload session file and returns the
// new total offset.
func (r *Registry) AppendUpload(uploadID string, src io.Reader) (newOffset int64, err error) {
path := r.UploadPath(uploadID)
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return 0, fmt.Errorf("oci: open upload for append: %w", err)
}
defer f.Close()
n, err := io.Copy(f, src)
if err != nil {
return 0, fmt.Errorf("oci: append upload: %w", err)
}
info, err := f.Stat()
if err != nil {
return n, nil
}
return info.Size(), nil
}
// UploadOffset returns the number of bytes written to an upload session so far.
func (r *Registry) UploadOffset(uploadID string) int64 {
info, err := os.Stat(r.UploadPath(uploadID))
if err != nil {
return 0
}
return info.Size()
}
// CancelUpload removes the temporary upload file.
func (r *Registry) CancelUpload(uploadID string) {
os.Remove(r.UploadPath(uploadID))
}
// DeleteBlob removes a blob from disk.
func (r *Registry) DeleteBlob(digest string) error {
p, err := r.BlobPath(digest)
if err != nil {
return err
}
return os.Remove(p)
}
// ─── Manifest helpers ─────────────────────────────────────────────────────────
// ManifestDescriptor extracts the digest and size from a raw manifest body.
func ManifestDescriptor(body []byte) (digest string, size int64) {
h := sha256.Sum256(body)
return "sha256:" + hex.EncodeToString(h[:]), int64(len(body))
}
// IsDigestRef returns true when ref looks like a digest ("sha256:<hex>").
func IsDigestRef(ref string) bool {
return strings.HasPrefix(ref, "sha256:")
}
// ─── OCI error types ─────────────────────────────────────────────────────────
// ErrorCode is an OCI Distribution API error code.
type ErrorCode string
const (
ErrBlobUnknown ErrorCode = "BLOB_UNKNOWN"
ErrBlobUploadInvalid ErrorCode = "BLOB_UPLOAD_INVALID"
ErrBlobUploadUnknown ErrorCode = "BLOB_UPLOAD_UNKNOWN"
ErrDigestInvalid ErrorCode = "DIGEST_INVALID"
ErrManifestBlobUnknown ErrorCode = "MANIFEST_BLOB_UNKNOWN"
ErrManifestInvalid ErrorCode = "MANIFEST_INVALID"
ErrManifestUnknown ErrorCode = "MANIFEST_UNKNOWN"
ErrNameInvalid ErrorCode = "NAME_INVALID"
ErrNameUnknown ErrorCode = "NAME_UNKNOWN"
ErrTagInvalid ErrorCode = "TAG_INVALID"
ErrUnauthorized ErrorCode = "UNAUTHORIZED"
ErrDenied ErrorCode = "DENIED"
ErrUnsupported ErrorCode = "UNSUPPORTED"
)
// APIError is a single OCI error entry.
type APIError struct {
Code ErrorCode `json:"code"`
Message string `json:"message"`
Detail interface{} `json:"detail,omitempty"`
}
// ErrorResponse is the top-level OCI error response body.
type ErrorResponse struct {
Errors []APIError `json:"errors"`
}
// NewError builds an ErrorResponse JSON body.
func NewError(code ErrorCode, msg string) []byte {
b, _ := json.Marshal(ErrorResponse{Errors: []APIError{{Code: code, Message: msg}}})
return b
}
// DigestMismatch is returned when a client-provided digest doesn't match the computed one.
type DigestMismatch struct {
Expected string
Actual string
}
func (e *DigestMismatch) Error() string {
return fmt.Sprintf("digest mismatch: expected %s, got %s", e.Expected, e.Actual)
}
// ─── path helpers ─────────────────────────────────────────────────────────────
// digestHex validates a "sha256:<hex>" digest string and returns the hex part.
func digestHex(digest string) (string, error) {
if !strings.HasPrefix(digest, "sha256:") {
return "", fmt.Errorf("oci: only sha256 digests are supported, got %q", digest)
}
h := strings.TrimPrefix(digest, "sha256:")
if len(h) != 64 {
return "", fmt.Errorf("oci: invalid sha256 digest length: %d", len(h))
}
return h, nil
}
// sanitiseID strips any path separators from an upload ID.
func sanitiseID(id string) string {
return strings.NewReplacer("/", "", "\\", "", "..", "").Replace(id)
}
// ParseOCIPath extracts the image name and the operation kind from a path
// under /v2/. name may contain slashes (e.g. "alice/myapp").
//
// Returns: name, kind, ref where kind is one of:
//
// "tags" → ref = ""
// "manifest" → ref = tag or digest
// "blob" → ref = digest
// "upload" → ref = uploadID (empty for new upload)
// "" → unrecognised path
func ParseOCIPath(rawPath string) (name, kind, ref string) {
// Strip leading /v2/
p := strings.TrimPrefix(rawPath, "/v2/")
if p == "" || p == "/" {
return "", "", ""
}
// Try known suffixes from most to least specific.
type suffix struct {
needle string
kind string
}
suffixes := []suffix{
{"/blobs/uploads/", "upload"},
{"/blobs/sha256:", "blob"},
{"/blobs/", "blob"},
{"/manifests/", "manifest"},
{"/tags/list", "tags"},
}
for _, s := range suffixes {
idx := strings.Index(p, s.needle)
if idx < 0 {
continue
}
name = p[:idx]
rest := p[idx+len(s.needle):]
kind = s.kind
switch s.kind {
case "blob":
// ref is digest: re-attach the sha256: prefix if needed
if strings.HasSuffix(s.needle, ":") {
ref = "sha256:" + rest
} else {
ref = rest
}
case "upload":
ref = rest // upload UUID or empty for new session
default:
ref = rest
}
return name, kind, ref
}
return "", "", ""
}
// ValidateName returns an error if the image name is empty or contains
// invalid characters.
func ValidateName(name string) error {
if name == "" {
return errors.New("empty image name")
}
for _, c := range name {
if !isNameChar(c) {
return fmt.Errorf("invalid character %q in image name", c)
}
}
return nil
}
func isNameChar(c rune) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == '-' || c == '_' || c == '/'
}
+254
View File
@@ -0,0 +1,254 @@
package oci_test
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
"github.com/forgeo/forgebucket/internal/domain/oci"
)
func TestParseOCIPath(t *testing.T) {
tests := []struct {
path string
wantName string
wantKind string
wantRef string
}{
{"/v2/", "", "", ""},
{"/v2", "", "", ""},
{"/v2/alice/myapp/tags/list", "alice/myapp", "tags", ""},
{"/v2/alice/myapp/manifests/latest", "alice/myapp", "manifest", "latest"},
{"/v2/alice/myapp/manifests/sha256:abc123", "alice/myapp", "manifest", "sha256:abc123"},
{"/v2/alice/myapp/blobs/sha256:def456", "alice/myapp", "blob", "sha256:def456"},
{"/v2/alice/myapp/blobs/uploads/", "alice/myapp", "upload", ""},
{"/v2/alice/myapp/blobs/uploads/uuid123", "alice/myapp", "upload", "uuid123"},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
name, kind, ref := oci.ParseOCIPath(tt.path)
if name != tt.wantName {
t.Errorf("name = %q, want %q", name, tt.wantName)
}
if kind != tt.wantKind {
t.Errorf("kind = %q, want %q", kind, tt.wantKind)
}
if ref != tt.wantRef {
t.Errorf("ref = %q, want %q", ref, tt.wantRef)
}
})
}
}
func TestValidateName(t *testing.T) {
if err := oci.ValidateName("alice/myapp"); err != nil {
t.Errorf("valid name got error: %v", err)
}
if err := oci.ValidateName(""); err == nil {
t.Error("empty name should error")
}
if err := oci.ValidateName("alice/my app"); err == nil {
t.Error("name with spaces should error")
}
}
func TestBlobPath(t *testing.T) {
dir := t.TempDir()
reg, err := oci.New(dir)
if err != nil {
t.Fatal(err)
}
p, err := reg.BlobPath("sha256:" + strings.Repeat("a", 64))
if err != nil {
t.Fatal(err)
}
expectedSuffix := filepath.Join("blobs", "sha256", strings.Repeat("a", 64))
if !strings.HasSuffix(p, expectedSuffix) {
t.Errorf("path %q does not end with %q", p, expectedSuffix)
}
if _, err := reg.BlobPath("sha256:bad"); err == nil {
t.Error("expected error for short hex")
}
if _, err := reg.BlobPath("md5:abc"); err == nil {
t.Error("expected error for non-sha256 algorithm")
}
}
func TestWriteAndReadBlob(t *testing.T) {
dir := t.TempDir()
reg, err := oci.New(dir)
if err != nil {
t.Fatal(err)
}
content := []byte("hello oci blob")
digest, size, err := reg.WriteBlob(bytes.NewReader(content))
if err != nil {
t.Fatalf("WriteBlob: %v", err)
}
if !strings.HasPrefix(digest, "sha256:") {
t.Errorf("digest should start with sha256:, got %s", digest)
}
if size != int64(len(content)) {
t.Errorf("size = %d, want %d", size, len(content))
}
if !reg.BlobExists(digest) {
t.Error("blob should exist after write")
}
// Deduplication test: writing same content again should succeed without error.
d2, s2, err := reg.WriteBlob(bytes.NewReader(content))
if err != nil {
t.Fatalf("WriteBlob duplicate: %v", err)
}
if d2 != digest {
t.Errorf("digest mismatch: %s vs %s", d2, digest)
}
if s2 != size {
t.Errorf("size mismatch: %d vs %d", s2, size)
}
f, err := reg.ReadBlob(digest)
if err != nil {
t.Fatalf("ReadBlob: %v", err)
}
defer f.Close()
buf := new(bytes.Buffer)
buf.ReadFrom(f)
if buf.String() != string(content) {
t.Errorf("content mismatch: got %s", buf.String())
}
}
func TestUploadSession(t *testing.T) {
dir := t.TempDir()
reg, _ := oci.New(dir)
uploadID := "test-upload-001"
// Append content in chunks.
off, err := reg.AppendUpload(uploadID, strings.NewReader("chunk1"))
if err != nil {
t.Fatalf("AppendUpload: %v", err)
}
if off != 6 {
t.Errorf("expected offset 6, got %d", off)
}
off, err = reg.AppendUpload(uploadID, strings.NewReader("-chunk2"))
if err != nil {
t.Fatalf("AppendUpload second: %v", err)
}
if off != 13 {
t.Errorf("expected offset 13 after chunk2, got %d", off)
}
if reg.UploadOffset(uploadID) != 13 {
t.Errorf("UploadOffset = %d, want 13", reg.UploadOffset(uploadID))
}
// Finish upload with digest.
digest, size, err := reg.FinishUpload(uploadID, "")
if err != nil {
t.Fatalf("FinishUpload: %v", err)
}
if !strings.HasPrefix(digest, "sha256:") {
t.Errorf("expected sha256 digest, got %s", digest)
}
if size != 13 {
t.Errorf("expected size 13, got %d", size)
}
if !reg.BlobExists(digest) {
t.Error("blob should exist after finish upload")
}
// Verify content.
f, _ := reg.ReadBlob(digest)
buf := new(bytes.Buffer)
buf.ReadFrom(f)
f.Close()
if buf.String() != "chunk1-chunk2" {
t.Errorf("content = %q, want %q", buf.String(), "chunk1-chunk2")
}
}
func TestFinishUploadDigestMismatch(t *testing.T) {
dir := t.TempDir()
reg, _ := oci.New(dir)
uploadID := "mismatch-upload"
reg.AppendUpload(uploadID, strings.NewReader("some data"))
_, _, err := reg.FinishUpload(uploadID, "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
if err == nil {
t.Fatal("expected digest mismatch error")
}
if !strings.Contains(err.Error(), "digest mismatch") {
t.Errorf("expected 'digest mismatch', got: %v", err)
}
}
func TestManifestDescriptor(t *testing.T) {
body := []byte(`{"schemaVersion":2}`)
digest, size := oci.ManifestDescriptor(body)
if !strings.HasPrefix(digest, "sha256:") {
t.Errorf("digest should be sha256, got %s", digest)
}
if size != int64(len(body)) {
t.Errorf("size = %d, want %d", size, len(body))
}
}
func TestIsDigestRef(t *testing.T) {
if !oci.IsDigestRef("sha256:abc") {
t.Error("sha256:abc should be a digest ref")
}
if oci.IsDigestRef("latest") {
t.Error("latest should NOT be a digest ref")
}
}
func TestDeleteBlob(t *testing.T) {
dir := t.TempDir()
reg, _ := oci.New(dir)
content := []byte("delete me")
digest, _, _ := reg.WriteBlob(bytes.NewReader(content))
if !reg.BlobExists(digest) {
t.Fatal("blob should exist after write")
}
if err := reg.DeleteBlob(digest); err != nil {
t.Fatalf("DeleteBlob: %v", err)
}
if reg.BlobExists(digest) {
t.Error("blob should not exist after delete")
}
}
func TestNewCreatesDirectories(t *testing.T) {
dir := filepath.Join(t.TempDir(), "oci-storage")
reg, err := oci.New(dir)
if err != nil {
t.Fatal(err)
}
for _, sub := range []string{"blobs/sha256", "uploads"} {
p := filepath.Join(dir, sub)
if _, err := os.Stat(p); os.IsNotExist(err) {
t.Errorf("directory not created: %s", p)
}
}
_ = reg
}
+170
View File
@@ -0,0 +1,170 @@
package scanning
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/events"
gitdomain "github.com/forgeo/forgebucket/internal/domain/git"
"github.com/forgeo/forgebucket/internal/models"
)
// compiledPattern is a pre-compiled regex pattern.
type compiledPattern struct {
pattern
re *regexp.Regexp
}
// Scanner subscribes to push.received and scans commit content for secrets.
type Scanner struct {
db *xorm.Engine
bus events.EventBus
patterns []compiledPattern
}
// New creates a Scanner with all patterns pre-compiled.
func New(db *xorm.Engine, bus events.EventBus) (*Scanner, error) {
cp := make([]compiledPattern, 0, len(Patterns))
for _, p := range Patterns {
re, err := regexp.Compile(p.Raw)
if err != nil {
return nil, fmt.Errorf("scanning: compile pattern %q: %w", p.Name, err)
}
cp = append(cp, compiledPattern{pattern: p, re: re})
}
return &Scanner{db: db, bus: bus, patterns: cp}, nil
}
// Start subscribes to push.received and blocks until ctx is cancelled.
func (s *Scanner) Start(ctx context.Context) {
unsub, err := s.bus.Subscribe(events.SubjectPushReceived, func(_ string, data []byte) {
var evt events.PushEvent
if err := json.Unmarshal(data, &evt); err != nil {
log.Printf("scanning: bad push event: %v", err)
return
}
go s.scanPush(evt)
})
if err != nil {
log.Printf("scanning: subscribe: %v", err)
} else {
defer unsub()
}
<-ctx.Done()
}
// scanPush scans the diff between before and after for all patterns.
func (s *Scanner) scanPush(evt events.PushEvent) {
// Branch deletion — nothing to scan.
zeroOID := "0000000000000000000000000000000000000000"
if evt.After == zeroOID {
return
}
// Resolve repo.
var repo models.Repository
if found, _ := s.db.ID(evt.RepoID).Get(&repo); !found {
return
}
// Get the diff content between before and after.
diffContent, err := s.getDiff(repo.DiskPath, evt.Before, evt.After)
if err != nil {
log.Printf("scanning: get diff for repo %s: %v", repo.Name, err)
return
}
// Determine the commit SHA for the findings.
headSHA := evt.After
now := time.Now().UTC()
for _, p := range s.patterns {
matches := p.re.FindAllString(string(diffContent), -1)
for _, match := range matches {
// Skip very short matches (likely false positives).
if len(match) < 6 {
continue
}
leak := &models.SecretLeak{
RepoID: evt.RepoID,
CommitSHA: headSHA[:12],
Ref: evt.Ref,
PatternName: p.Name,
Description: p.Description,
Severity: p.Severity,
MatchSample: truncate(match, 40),
DetectedAt: now,
}
if _, err := s.db.Insert(leak); err != nil {
log.Printf("scanning: insert leak for %s: %v", repo.Name, err)
}
}
}
}
// getDiff returns the unified diff of all changes between two refs.
func (s *Scanner) getDiff(repoPath, oldRef, newRef string) ([]byte, error) {
// If oldRef is the zero OID (new branch), just get the initial commit content.
zeroOID := "0000000000000000000000000000000000000000"
if oldRef == zeroOID {
// Show the entire tree at the new ref.
out, err := gitdomain.Run(repoPath, "ls-tree", "-r", newRef)
if err != nil {
return nil, err
}
return out, nil
}
out, err := gitdomain.Run(repoPath, "diff", "--no-color", "--unified=3", oldRef, newRef)
if err != nil {
return nil, err
}
return out, nil
}
// ListFindings returns all active secret leaks for a repo, newest first.
func (s *Scanner) ListFindings(repoID int64) ([]models.SecretLeak, error) {
var leaks []models.SecretLeak
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
OrderBy("detected_at DESC").Find(&leaks); err != nil {
return nil, err
}
if leaks == nil {
leaks = []models.SecretLeak{}
}
return leaks, nil
}
// DismissFindings acknowledges a leak so it no longer appears in active lists.
func (s *Scanner) DismissFindings(leakID int64, dismissedBy string) error {
now := time.Now().UTC()
affected, err := s.db.ID(leakID).Cols("dismissed", "dismissed_by", "dismissed_at").
Update(&models.SecretLeak{
Dismissed: true,
DismissedBy: dismissedBy,
DismissedAt: &now,
})
if err != nil {
return err
}
if affected == 0 {
return fmt.Errorf("leak %d not found", leakID)
}
return nil
}
// truncate shortens a string to maxLen characters for safe display.
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
+106
View File
@@ -0,0 +1,106 @@
package scanning
// pattern holds a compiled regex-like pattern string and its metadata.
// We use raw string patterns rather than importing regexp for each check;
// the Scanner compiles all patterns once at startup.
type pattern struct {
Name string
Description string
Raw string // the regex pattern (compiled at init)
Severity string // "high", "medium", "low"
}
// Patterns is the list of secret patterns checked against every pushed commit.
// Patterns are ordered by severity — high first.
var Patterns = []pattern{
{
Name: "aws-access-key-id",
Description: "AWS Access Key ID",
Raw: `AKIA[0-9A-Z]{16}`,
Severity: "high",
},
{
Name: "aws-secret-key",
Description: "AWS Secret Access Key",
Raw: `(?i)aws[_-]?(secret|private)[_-]?(access[_-]?)?key['"]?\s*[:=]\s*['"]?[A-Za-z0-9\/+=]{40}`,
Severity: "high",
},
{
Name: "github-token",
Description: "GitHub Personal Access Token",
Raw: `gh[pousr]_[A-Za-z0-9_]{36,}`,
Severity: "high",
},
{
Name: "gitlab-token",
Description: "GitLab Personal Access Token",
Raw: `glpat-[A-Za-z0-9\-_]{20,}`,
Severity: "high",
},
{
Name: "generic-api-key",
Description: "Generic API key assignment (high entropy)",
Raw: `(?i)(api[_-]?key|apikey|api[_-]?secret|api[_-]?token)['"]?\s*[:=]\s*['"][A-Za-z0-9_\-\.]{20,64}`,
Severity: "high",
},
{
Name: "bearer-token",
Description: "Bearer token in HTTP header",
Raw: `(?i)authorization:\s*bearer\s+[A-Za-z0-9_\-\.]{20,}`,
Severity: "high",
},
{
Name: "slack-token",
Description: "Slack Bot / Webhook token",
Raw: `xox[baprs]-[A-Za-z0-9\-]{10,}`,
Severity: "high",
},
{
Name: "google-api-key",
Description: "Google API Key",
Raw: `AIza[0-9A-Za-z\-_]{35}`,
Severity: "high",
},
{
Name: "google-service-account",
Description: "Google Service Account",
Raw: `[0-9]+-[0-9a-z]{32}\.apps\.googleusercontent\.com`,
Severity: "high",
},
{
Name: "ssh-private-key",
Description: "SSH / TLS private key embed",
Raw: `-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`,
Severity: "high",
},
{
Name: "jwt-token",
Description: "JSON Web Token (JWT)",
Raw: `eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`,
Severity: "medium",
},
{
Name: "generic-password",
Description: "Generic password/secret field assignment",
Raw: `(?i)(password|passwd|pwd|secret)['"]?\s*[:=]\s*['"][A-Za-z0-9!@#$%^&*()_+\-=\[\]{}|;:,.<>?]{8,}`,
Severity: "medium",
},
{
Name: "npm-token",
Description: "npm access token",
Raw: `npm_[A-Za-z0-9]{36,}`,
Severity: "high",
},
{
Name: "pg-connection-string",
Description: "PostgreSQL connection string",
Raw: `postgres(ql)?://[A-Za-z0-9_]+:[^@\s]+@`,
Severity: "high",
},
{
Name: "redis-connection-string",
Description: "Redis connection string with password",
Raw: `redis://[^:@\s]+:[^@\s]+@`,
Severity: "high",
},
}
+118
View File
@@ -0,0 +1,118 @@
package scanning
import (
"regexp"
"testing"
)
func TestPatternsCompile(t *testing.T) {
for _, p := range Patterns {
_, err := regexp.Compile(p.Raw)
if err != nil {
t.Errorf("pattern %q failed to compile: %v", p.Name, err)
}
}
}
func TestPatternsHaveNames(t *testing.T) {
for _, p := range Patterns {
if p.Name == "" {
t.Error("pattern with empty name")
}
if p.Description == "" {
t.Errorf("pattern %q has empty description", p.Name)
}
if p.Severity != "high" && p.Severity != "medium" && p.Severity != "low" {
t.Errorf("pattern %q has invalid severity %q", p.Name, p.Severity)
}
}
}
func TestAWSAccessKey(t *testing.T) {
re := regexp.MustCompile(`AKIA[0-9A-Z]{16}`)
cases := []struct {
input string
match bool
}{
{"AKIAIOSFODNN7EXAMPLE", true},
{"AKIA1234567890123456", true},
{"not-a-key", false},
{"SKIA1234567890123456", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestGitHubToken(t *testing.T) {
re := regexp.MustCompile(`gh[pousr]_[A-Za-z0-9_]{36,}`)
cases := []struct {
input string
match bool
}{
{"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"gho_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghu_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghs_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"ghr_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", true},
{"not-a-token", false},
{"ghp_short", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestPrivateKey(t *testing.T) {
re := regexp.MustCompile(`-----BEGIN\s+(RSA|EC|OPENSSH|DSA|PRIVATE)(\s+PRIVATE)?\s+KEY-----`)
cases := []struct {
input string
match bool
}{
{"-----BEGIN RSA PRIVATE KEY-----", true},
{"-----BEGIN EC PRIVATE KEY-----", true},
{"-----BEGIN OPENSSH PRIVATE KEY-----", true},
{"-----BEGIN DSA PRIVATE KEY-----", true},
{"-----BEGIN PRIVATE KEY-----", true},
{"-----BEGIN CERTIFICATE-----", false},
{"public key is here", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestJWT(t *testing.T) {
re := regexp.MustCompile(`eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}`)
cases := []struct {
input string
match bool
}{
{"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNnZctV9XjvP_oGZQZxGdAqVxQ", true},
{"not-a-jwt", false},
}
for _, tc := range cases {
got := re.MatchString(tc.input)
if got != tc.match {
t.Errorf("input %q: got %v, want %v", tc.input, got, tc.match)
}
}
}
func TestTruncate(t *testing.T) {
if truncate("hello", 10) != "hello" {
t.Error("should not truncate short strings")
}
if truncate("hello world this is long", 10) != "hello worl..." {
t.Errorf("got %q", truncate("hello world this is long", 10))
}
}
+140
View File
@@ -0,0 +1,140 @@
package vulnscan
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
const defaultOSVAPI = "https://api.osv.dev/v1"
// Client queries the OSV (Open Source Vulnerabilities) API.
// https://osv.dev/docs/
type Client struct {
baseURL string
http *http.Client
}
// NewClient creates a client that queries the public OSV API.
func NewClient() *Client {
return &Client{
baseURL: defaultOSVAPI,
http: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// QueryRequest is sent to POST /v1/query.
type QueryRequest struct {
Package PackageID `json:"package"`
Version string `json:"version"`
}
// PackageID identifies a package in a specific ecosystem.
type PackageID struct {
PURL string `json:"purl,omitempty"`
Name string `json:"name,omitempty"`
Ecosystem string `json:"ecosystem,omitempty"`
}
// QueryResponse is the response from POST /v1/query.
type QueryResponse struct {
Vulns []OSVVuln `json:"vulns"`
}
// OSVVuln is a vulnerability returned by the OSV API.
type OSVVuln struct {
ID string `json:"id"`
Summary string `json:"summary"`
Details string `json:"details"`
Aliases []string `json:"aliases"`
Fixed string `json:"fixed,omitempty"`
Severity []Severity `json:"severity,omitempty"`
Affected []Affected `json:"affected,omitempty"`
Published string `json:"published,omitempty"`
Modified string `json:"modified,omitempty"`
}
// Severity holds a CVSS score from the OSV response.
type Severity struct {
Type string `json:"type"`
Score string `json:"score"`
}
// Affected describes a package version range.
type Affected struct {
Package PackageID `json:"package"`
Ranges []AffectedRange `json:"ranges"`
Versions []string `json:"versions"`
}
type AffectedRange struct {
Type string `json:"type"`
Events []RangeEvent `json:"events"`
}
type RangeEvent struct {
Introduced string `json:"introduced"`
Fixed string `json:"fixed"`
Limit string `json:"limit"`
}
// QueryByPURL queries OSV for vulnerabilities affecting a given PURL + version.
func (c *Client) QueryByPURL(purl, version string) ([]OSVVuln, error) {
body := QueryRequest{
Package: PackageID{PURL: purl},
Version: version,
}
return c.doQuery(body)
}
// QueryByEcosystem queries OSV for vulnerabilities affecting a package in a
// specific ecosystem (e.g. "npm", "Go", "PyPI", "cargo", "Maven", "RubyGems").
func (c *Client) QueryByEcosystem(ecosystem, name, version string) ([]OSVVuln, error) {
body := QueryRequest{
Package: PackageID{
Name: name,
Ecosystem: ecosystem,
},
Version: version,
}
return c.doQuery(body)
}
func (c *Client) doQuery(body interface{}) ([]OSVVuln, error) {
payload, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("vulnscan: marshal body: %w", err)
}
req, err := http.NewRequest(http.MethodPost, c.baseURL+"/query", bytes.NewReader(payload))
if err != nil {
return nil, fmt.Errorf("vulnscan: create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("vulnscan: query: %w", err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("vulnscan: read response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("vulnscan: OSV returned %d: %s", resp.StatusCode, string(respBody))
}
var qr QueryResponse
if err := json.Unmarshal(respBody, &qr); err != nil {
return nil, fmt.Errorf("vulnscan: parse response: %w", err)
}
return qr.Vulns, nil
}
+89
View File
@@ -0,0 +1,89 @@
package vulnscan
import (
"encoding/json"
"testing"
)
func TestParseCVSS(t *testing.T) {
v := OSVVuln{
ID: "CVE-2024-0001",
Severity: []Severity{
{Type: "CVSS_V3", Score: "7.5"},
},
}
score := parseCVSS(v)
if score != 7.5 {
t.Errorf("expected 7.5, got %f", score)
}
}
func TestParseCVSS_NoScore(t *testing.T) {
v := OSVVuln{
ID: "GHSA-xxxx",
}
score := parseCVSS(v)
if score != 0 {
t.Errorf("expected 0 for no severity, got %f", score)
}
}
func TestExtractFixedVersion(t *testing.T) {
v := OSVVuln{
Affected: []Affected{
{
Ranges: []AffectedRange{
{
Events: []RangeEvent{
{Introduced: "0"},
{Fixed: "1.2.3"},
},
},
},
},
},
}
fixed := extractFixedVersion(v)
if fixed != "1.2.3" {
t.Errorf("expected 1.2.3, got %s", fixed)
}
}
func TestExtractFixedVersion_None(t *testing.T) {
v := OSVVuln{}
fixed := extractFixedVersion(v)
if fixed != "" {
t.Errorf("expected empty, got %s", fixed)
}
}
func TestTruncateStr(t *testing.T) {
if truncateStr("short", 10) != "short" {
t.Error("should not truncate short strings")
}
if truncateStr("this is a long string", 10) != "this is a ..." {
t.Errorf("got %q", truncateStr("this is a long string", 10))
}
}
func TestNewClient(t *testing.T) {
c := NewClient()
if c.baseURL != defaultOSVAPI {
t.Errorf("baseURL = %s, want %s", c.baseURL, defaultOSVAPI)
}
}
func TestQueryRequest_Marshal(t *testing.T) {
body := QueryRequest{
Package: PackageID{PURL: "pkg:golang/github.com/foo/bar@v1.0.0"},
Version: "v1.0.0",
}
data, err := json.Marshal(body)
if err != nil {
t.Fatalf("marshal: %v", err)
}
// Ensure it produces valid JSON.
if len(data) == 0 {
t.Error("empty JSON")
}
}
+179
View File
@@ -0,0 +1,179 @@
package vulnscan
import (
"context"
"encoding/json"
"fmt"
"log"
"time"
"xorm.io/xorm"
"github.com/forgeo/forgebucket/internal/events"
"github.com/forgeo/forgebucket/internal/models"
)
// Scanner watches for SBOM generation events and queries OSV for vulns.
type Scanner struct {
db *xorm.Engine
bus events.EventBus
client *Client
}
func NewScanner(db *xorm.Engine, bus events.EventBus) *Scanner {
return &Scanner{
db: db,
bus: bus,
client: NewClient(),
}
}
// Start subscribes to SBOM-related events and scans for vulnerabilities.
func (s *Scanner) Start(ctx context.Context) {
// Listen for SBOM Report created events (sync trigger).
// In practice this is called on-demand via the API, so Start is minimal.
<-ctx.Done()
}
// ScanByPURL queries OSV for a single package and stores findings.
func (s *Scanner) ScanByPURL(repoID int64, purl, version string) ([]models.VulnerabilityFinding, error) {
vulns, err := s.client.QueryByPURL(purl, version)
if err != nil {
return nil, err
}
return s.persistFindings(repoID, purl, version, vulns), nil
}
// ScanSBOM reads the latest SBOM report for a repo, queries OSV for every
// component, and stores the findings. Returns the new findings.
func (s *Scanner) ScanSBOM(repoID int64) ([]models.VulnerabilityFinding, error) {
var report models.SBOMReport
found, err := s.db.Where("repo_id = ?", repoID).
OrderBy("generated_at DESC").Get(&report)
if err != nil {
return nil, err
}
if !found {
return nil, fmt.Errorf("no SBOM found for repo %d", repoID)
}
var doc struct {
Components []struct {
Name string `json:"name"`
Version string `json:"version"`
PURL string `json:"purl"`
} `json:"components"`
}
if err := json.Unmarshal([]byte(report.BOMDocument), &doc); err != nil {
return nil, fmt.Errorf("parse SBOM: %w", err)
}
var allFindings []models.VulnerabilityFinding
for _, comp := range doc.Components {
if comp.PURL == "" || comp.Version == "" {
continue
}
vulns, err := s.client.QueryByPURL(comp.PURL, comp.Version)
if err != nil {
log.Printf("vulnscan: query %s@%s: %v", comp.PURL, comp.Version, err)
continue
}
findings := s.persistFindings(repoID, comp.PURL, comp.Version, vulns)
allFindings = append(allFindings, findings...)
}
return allFindings, nil
}
// ListFindings returns unfixed vulnerability findings for a repo.
func (s *Scanner) ListFindings(repoID int64) ([]models.VulnerabilityFinding, error) {
var findings []models.VulnerabilityFinding
if err := s.db.Where("repo_id = ? AND dismissed = ?", repoID, false).
OrderBy("cvss_score DESC, detected_at DESC").Find(&findings); err != nil {
return nil, err
}
if findings == nil {
findings = []models.VulnerabilityFinding{}
}
return findings, nil
}
// DismissFindings acknowledges a vulnerability finding.
func (s *Scanner) DismissFindings(findingID int64, dismissedBy string) error {
now := time.Now().UTC()
affected, err := s.db.ID(findingID).Cols("dismissed", "dismissed_by", "dismissed_at").
Update(&models.VulnerabilityFinding{
Dismissed: true,
DismissedBy: dismissedBy,
DismissedAt: &now,
})
if err != nil {
return err
}
if affected == 0 {
return fmt.Errorf("finding %d not found", findingID)
}
return nil
}
func (s *Scanner) persistFindings(repoID int64, purl, version string, vulns []OSVVuln) []models.VulnerabilityFinding {var findings []models.VulnerabilityFinding
for _, v := range vulns {
// Check for duplicate before inserting.
existing := &models.VulnerabilityFinding{}
if has, _ := s.db.Where("vuln_id = ? AND purl = ? AND repo_id = ?", v.ID, purl, repoID).Get(existing); has {
continue
}
cvssScore := parseCVSS(v)
finding := &models.VulnerabilityFinding{
RepoID: repoID,
VulnID: v.ID,
PURL: purl,
Version: version,
Summary: truncateStr(v.Summary, 300),
Details: v.Details,
CVSSScore: cvssScore,
FixedVersion: extractFixedVersion(v),
DetectedAt: time.Now().UTC(),
}
if _, err := s.db.Insert(finding); err != nil {
log.Printf("vulnscan: insert finding %s for %s: %v", v.ID, purl, err)
continue
}
findings = append(findings, *finding)
}
return findings
}
// parseCVSS extracts the CVSS score from OSV severity info.
func parseCVSS(v OSVVuln) float64 {
for _, sev := range v.Severity {
if sev.Type == "CVSS_V3" || sev.Type == "CVSS_V2" {
var score float64
fmt.Sscanf(sev.Score, "%f", &score)
return score
}
}
return 0
}
// extractFixedVersion tries to extract the fixed version from affected ranges.
func extractFixedVersion(v OSVVuln) string {
for _, a := range v.Affected {
for _, r := range a.Ranges {
for _, e := range r.Events {
if e.Fixed != "" {
return e.Fixed
}
}
}
}
return ""
}
func truncateStr(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}