package sbom import ( "bufio" "bytes" "encoding/json" "strings" ) // ParseResult holds components extracted from a single manifest file. type ParseResult struct { Ecosystem string Components []Component } // ─── go.mod ────────────────────────────────────────────────────────────────── // ParseGoMod parses a go.mod file and returns Go module components. // Handles both single-line `require x v1` and block `require ( ... )` forms. func ParseGoMod(content []byte) []Component { var components []Component scanner := bufio.NewScanner(bytes.NewReader(content)) inBlock := false for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) // Strip inline comments. if idx := strings.Index(line, "//"); idx >= 0 { line = strings.TrimSpace(line[:idx]) } if line == "" { continue } if line == "require (" { inBlock = true continue } if inBlock && line == ")" { inBlock = false continue } var modulePath, version string if inBlock { parts := strings.Fields(line) if len(parts) >= 2 { modulePath, version = parts[0], parts[1] } } else if strings.HasPrefix(line, "require ") { parts := strings.Fields(strings.TrimPrefix(line, "require ")) if len(parts) >= 2 { modulePath, version = parts[0], parts[1] } } if modulePath == "" { continue } // Indirect deps are still included — they are part of the supply chain. components = append(components, Component{ Type: "library", Name: modulePath, Version: version, PURL: golangPURL(modulePath, version), }) } return components } // ─── package.json ───────────────────────────────────────────────────────────── type packageJSON struct { Dependencies map[string]string `json:"dependencies"` DevDependencies map[string]string `json:"devDependencies"` PeerDependencies map[string]string `json:"peerDependencies"` } // ParsePackageJSON parses a package.json and returns npm components. func ParsePackageJSON(content []byte) []Component { var pkg packageJSON if err := json.Unmarshal(content, &pkg); err != nil { return nil } seen := make(map[string]bool) var components []Component add := func(name, version, scope string) { if seen[name] { return } seen[name] = true // Strip semver range prefixes: ^, ~, >=, >, <=, <, = clean := strings.TrimLeft(version, "^~>=<") components = append(components, Component{ Type: "library", Name: name, Version: clean, PURL: npmPURL(name, clean), Scope: scope, }) } for name, ver := range pkg.Dependencies { add(name, ver, "required") } for name, ver := range pkg.DevDependencies { add(name, ver, "optional") } for name, ver := range pkg.PeerDependencies { add(name, ver, "optional") } return components } // ─── requirements.txt ──────────────────────────────────────────────────────── // ParseRequirementsTxt parses a pip requirements.txt. // Handles: pkg==1.0, pkg>=1.0, pkg~=1.0, pkg (no version), comments, extras. func ParseRequirementsTxt(content []byte) []Component { var components []Component scanner := bufio.NewScanner(bytes.NewReader(content)) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") { continue } // Strip inline comments. if idx := strings.Index(line, " #"); idx >= 0 { line = strings.TrimSpace(line[:idx]) } // Strip extras: package[extra]==1.0 → package, ==1.0 name := line version := "" for _, op := range []string{"==", ">=", "<=", "~=", "!=", ">", "<"} { if idx := strings.Index(line, op); idx >= 0 { name = strings.TrimSpace(line[:idx]) version = strings.TrimSpace(line[idx+len(op):]) // Take only the first version specifier. if commaIdx := strings.Index(version, ","); commaIdx >= 0 { version = version[:commaIdx] } break } } // Strip extras [extra1,extra2] from name. if bIdx := strings.Index(name, "["); bIdx >= 0 { name = name[:bIdx] } name = strings.ToLower(strings.TrimSpace(name)) if name == "" { continue } components = append(components, Component{ Type: "library", Name: name, Version: version, PURL: pypiPURL(name, version), }) } return components } // ─── Cargo.toml ────────────────────────────────────────────────────────────── // ParseCargoToml parses a Cargo.toml [dependencies] section. // Handles: name = "version" and name = { version = "x", ... }. func ParseCargoToml(content []byte) []Component { var components []Component scanner := bufio.NewScanner(bytes.NewReader(content)) inDeps := false for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if strings.HasPrefix(line, "#") { continue } // Section headers. if strings.HasPrefix(line, "[") { inDeps = line == "[dependencies]" || line == "[dev-dependencies]" || line == "[build-dependencies]" continue } if !inDeps || line == "" { continue } eqIdx := strings.Index(line, "=") if eqIdx < 0 { continue } name := strings.TrimSpace(line[:eqIdx]) rest := strings.TrimSpace(line[eqIdx+1:]) var version string if strings.HasPrefix(rest, `"`) { // name = "version" version = strings.Trim(rest, `"`) } else if strings.HasPrefix(rest, "{") { // name = { version = "x", features = [...] } if vIdx := strings.Index(rest, `version = "`); vIdx >= 0 { vIdx += len(`version = "`) endIdx := strings.Index(rest[vIdx:], `"`) if endIdx >= 0 { version = rest[vIdx : vIdx+endIdx] } } } if name == "" { continue } components = append(components, Component{ Type: "library", Name: name, Version: version, PURL: cargoPURL(name, version), }) } return components } // ─── Gemfile.lock ───────────────────────────────────────────────────────────── // ParseGemfileLock parses a Gemfile.lock and extracts gem components. // The GEM section format is: // // GEM // remote: https://rubygems.org/ // specs: // activesupport (7.1.0) // ... func ParseGemfileLock(content []byte) []Component { var components []Component scanner := bufio.NewScanner(bytes.NewReader(content)) inSpecs := false for scanner.Scan() { line := scanner.Text() trimmed := strings.TrimSpace(line) if trimmed == "GEM" { continue } if trimmed == "specs:" { inSpecs = true continue } // Any non-indented non-empty line ends the specs block. if inSpecs && !strings.HasPrefix(line, " ") && trimmed != "" { inSpecs = false } if !inSpecs { continue } // Specs entries are indented exactly 4 spaces: " name (version)" // Sub-dependencies are indented 6+ spaces — skip them. if !strings.HasPrefix(line, " ") || strings.HasPrefix(line, " ") { continue } // Parse: " gemname (version)" entry := strings.TrimSpace(line) oIdx := strings.Index(entry, " (") if oIdx < 0 { continue } name := entry[:oIdx] versionFull := strings.TrimSuffix(entry[oIdx+2:], ")") version := strings.Fields(versionFull)[0] components = append(components, Component{ Type: "library", Name: name, Version: version, PURL: gemPURL(name, version), }) } return components } // ─── pom.xml (minimal) ─────────────────────────────────────────────────────── // ParsePomXML does a lightweight line-scan extraction of Maven dependencies. // It avoids pulling in an XML parser — it looks for blocks and // extracts groupId, artifactId, version tags. func ParsePomXML(content []byte) []Component { var components []Component scanner := bufio.NewScanner(bytes.NewReader(content)) var groupID, artifactID, version string inDep := false extract := func(line, tag string) string { open := "<" + tag + ">" close := "" sIdx := strings.Index(line, open) eIdx := strings.Index(line, close) if sIdx >= 0 && eIdx > sIdx { return line[sIdx+len(open) : eIdx] } return "" } for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if strings.Contains(line, "") { inDep = true groupID, artifactID, version = "", "", "" continue } if strings.Contains(line, "") { if inDep && groupID != "" && artifactID != "" { name := groupID + ":" + artifactID components = append(components, Component{ Type: "library", Name: name, Version: version, PURL: mavenPURL(groupID, artifactID, version), }) } inDep = false continue } if !inDep { continue } if v := extract(line, "groupId"); v != "" { groupID = v } if v := extract(line, "artifactId"); v != "" { artifactID = v } if v := extract(line, "version"); v != "" { version = v } } return components }