Files
ForgeBucket/internal/domain/sbom/parsers.go
T

354 lines
9.3 KiB
Go

package sbom
import (
"bufio"
"bytes"
"encoding/json"
"strings"
)
// ParseResult holds components extracted from a single manifest file.
type ParseResult struct {
Ecosystem string
Components []Component
}
// ─── go.mod ──────────────────────────────────────────────────────────────────
// ParseGoMod parses a go.mod file and returns Go module components.
// Handles both single-line `require x v1` and block `require ( ... )` forms.
func ParseGoMod(content []byte) []Component {
var components []Component
scanner := bufio.NewScanner(bytes.NewReader(content))
inBlock := false
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Strip inline comments.
if idx := strings.Index(line, "//"); idx >= 0 {
line = strings.TrimSpace(line[:idx])
}
if line == "" {
continue
}
if line == "require (" {
inBlock = true
continue
}
if inBlock && line == ")" {
inBlock = false
continue
}
var modulePath, version string
if inBlock {
parts := strings.Fields(line)
if len(parts) >= 2 {
modulePath, version = parts[0], parts[1]
}
} else if strings.HasPrefix(line, "require ") {
parts := strings.Fields(strings.TrimPrefix(line, "require "))
if len(parts) >= 2 {
modulePath, version = parts[0], parts[1]
}
}
if modulePath == "" {
continue
}
// Indirect deps are still included — they are part of the supply chain.
components = append(components, Component{
Type: "library",
Name: modulePath,
Version: version,
PURL: golangPURL(modulePath, version),
})
}
return components
}
// ─── package.json ─────────────────────────────────────────────────────────────
type packageJSON struct {
Dependencies map[string]string `json:"dependencies"`
DevDependencies map[string]string `json:"devDependencies"`
PeerDependencies map[string]string `json:"peerDependencies"`
}
// ParsePackageJSON parses a package.json and returns npm components.
func ParsePackageJSON(content []byte) []Component {
var pkg packageJSON
if err := json.Unmarshal(content, &pkg); err != nil {
return nil
}
seen := make(map[string]bool)
var components []Component
add := func(name, version, scope string) {
if seen[name] {
return
}
seen[name] = true
// Strip semver range prefixes: ^, ~, >=, >, <=, <, =
clean := strings.TrimLeft(version, "^~>=<")
components = append(components, Component{
Type: "library",
Name: name,
Version: clean,
PURL: npmPURL(name, clean),
Scope: scope,
})
}
for name, ver := range pkg.Dependencies {
add(name, ver, "required")
}
for name, ver := range pkg.DevDependencies {
add(name, ver, "optional")
}
for name, ver := range pkg.PeerDependencies {
add(name, ver, "optional")
}
return components
}
// ─── requirements.txt ────────────────────────────────────────────────────────
// ParseRequirementsTxt parses a pip requirements.txt.
// Handles: pkg==1.0, pkg>=1.0, pkg~=1.0, pkg (no version), comments, extras.
func ParseRequirementsTxt(content []byte) []Component {
var components []Component
scanner := bufio.NewScanner(bytes.NewReader(content))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") {
continue
}
// Strip inline comments.
if idx := strings.Index(line, " #"); idx >= 0 {
line = strings.TrimSpace(line[:idx])
}
// Strip extras: package[extra]==1.0 → package, ==1.0
name := line
version := ""
for _, op := range []string{"==", ">=", "<=", "~=", "!=", ">", "<"} {
if idx := strings.Index(line, op); idx >= 0 {
name = strings.TrimSpace(line[:idx])
version = strings.TrimSpace(line[idx+len(op):])
// Take only the first version specifier.
if commaIdx := strings.Index(version, ","); commaIdx >= 0 {
version = version[:commaIdx]
}
break
}
}
// Strip extras [extra1,extra2] from name.
if bIdx := strings.Index(name, "["); bIdx >= 0 {
name = name[:bIdx]
}
name = strings.ToLower(strings.TrimSpace(name))
if name == "" {
continue
}
components = append(components, Component{
Type: "library",
Name: name,
Version: version,
PURL: pypiPURL(name, version),
})
}
return components
}
// ─── Cargo.toml ──────────────────────────────────────────────────────────────
// ParseCargoToml parses a Cargo.toml [dependencies] section.
// Handles: name = "version" and name = { version = "x", ... }.
func ParseCargoToml(content []byte) []Component {
var components []Component
scanner := bufio.NewScanner(bytes.NewReader(content))
inDeps := false
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "#") {
continue
}
// Section headers.
if strings.HasPrefix(line, "[") {
inDeps = line == "[dependencies]" ||
line == "[dev-dependencies]" ||
line == "[build-dependencies]"
continue
}
if !inDeps || line == "" {
continue
}
eqIdx := strings.Index(line, "=")
if eqIdx < 0 {
continue
}
name := strings.TrimSpace(line[:eqIdx])
rest := strings.TrimSpace(line[eqIdx+1:])
var version string
if strings.HasPrefix(rest, `"`) {
// name = "version"
version = strings.Trim(rest, `"`)
} else if strings.HasPrefix(rest, "{") {
// name = { version = "x", features = [...] }
if vIdx := strings.Index(rest, `version = "`); vIdx >= 0 {
vIdx += len(`version = "`)
endIdx := strings.Index(rest[vIdx:], `"`)
if endIdx >= 0 {
version = rest[vIdx : vIdx+endIdx]
}
}
}
if name == "" {
continue
}
components = append(components, Component{
Type: "library",
Name: name,
Version: version,
PURL: cargoPURL(name, version),
})
}
return components
}
// ─── Gemfile.lock ─────────────────────────────────────────────────────────────
// ParseGemfileLock parses a Gemfile.lock and extracts gem components.
// The GEM section format is:
//
// GEM
// remote: https://rubygems.org/
// specs:
// activesupport (7.1.0)
// ...
func ParseGemfileLock(content []byte) []Component {
var components []Component
scanner := bufio.NewScanner(bytes.NewReader(content))
inSpecs := false
for scanner.Scan() {
line := scanner.Text()
trimmed := strings.TrimSpace(line)
if trimmed == "GEM" {
continue
}
if trimmed == "specs:" {
inSpecs = true
continue
}
// Any non-indented non-empty line ends the specs block.
if inSpecs && !strings.HasPrefix(line, " ") && trimmed != "" {
inSpecs = false
}
if !inSpecs {
continue
}
// Specs entries are indented exactly 4 spaces: " name (version)"
// Sub-dependencies are indented 6+ spaces — skip them.
if !strings.HasPrefix(line, " ") || strings.HasPrefix(line, " ") {
continue
}
// Parse: " gemname (version)"
entry := strings.TrimSpace(line)
oIdx := strings.Index(entry, " (")
if oIdx < 0 {
continue
}
name := entry[:oIdx]
versionFull := strings.TrimSuffix(entry[oIdx+2:], ")")
version := strings.Fields(versionFull)[0]
components = append(components, Component{
Type: "library",
Name: name,
Version: version,
PURL: gemPURL(name, version),
})
}
return components
}
// ─── pom.xml (minimal) ───────────────────────────────────────────────────────
// ParsePomXML does a lightweight line-scan extraction of Maven dependencies.
// It avoids pulling in an XML parser — it looks for <dependency> blocks and
// extracts groupId, artifactId, version tags.
func ParsePomXML(content []byte) []Component {
var components []Component
scanner := bufio.NewScanner(bytes.NewReader(content))
var groupID, artifactID, version string
inDep := false
extract := func(line, tag string) string {
open := "<" + tag + ">"
close := "</" + tag + ">"
sIdx := strings.Index(line, open)
eIdx := strings.Index(line, close)
if sIdx >= 0 && eIdx > sIdx {
return line[sIdx+len(open) : eIdx]
}
return ""
}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.Contains(line, "<dependency>") {
inDep = true
groupID, artifactID, version = "", "", ""
continue
}
if strings.Contains(line, "</dependency>") {
if inDep && groupID != "" && artifactID != "" {
name := groupID + ":" + artifactID
components = append(components, Component{
Type: "library",
Name: name,
Version: version,
PURL: mavenPURL(groupID, artifactID, version),
})
}
inDep = false
continue
}
if !inDep {
continue
}
if v := extract(line, "groupId"); v != "" {
groupID = v
}
if v := extract(line, "artifactId"); v != "" {
artifactID = v
}
if v := extract(line, "version"); v != "" {
version = v
}
}
return components
}