From 9e104f5f0fe51a6dc45a82c5c0a393f07b3b3115 Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Tue, 7 Apr 2026 06:22:24 +0530
Subject: [PATCH 01/14] feat: add Gradle parser feature branch
---
internal/parsers/gradle/gradle_parser.go | 222 ++++++++++++++++++
internal/parsers/gradle/gradle_parser_test.go | 160 +++++++++++++
pkg/parser/manifest-file-selector.go | 5 +
pkg/parser/parser_factory.go | 3 +
test/resources/build.gradle | 31 +++
5 files changed, 421 insertions(+)
create mode 100644 internal/parsers/gradle/gradle_parser.go
create mode 100644 internal/parsers/gradle/gradle_parser_test.go
create mode 100644 test/resources/build.gradle
diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go
new file mode 100644
index 0000000..459b02a
--- /dev/null
+++ b/internal/parsers/gradle/gradle_parser.go
@@ -0,0 +1,222 @@
+package gradle
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+// GradleParser implements parsing of Gradle build files
+type GradleParser struct{}
+
+// Parse implements the Parser interface for Gradle build files
+func (p *GradleParser) Parse(manifestFile string) ([]models.Package, error) {
+ content, err := os.ReadFile(manifestFile)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read manifest file: %w", err)
+ }
+
+ lines := strings.Split(string(content), "\n")
+
+ // Extract variables
+ variables := extractVariables(manifestFile, string(content))
+
+ var packages []models.Package
+
+ // Parse main dependencies
+ mainDeps := parseDependencies(string(content), lines, variables, false)
+ for i := range mainDeps {
+ mainDeps[i].FilePath = manifestFile
+ }
+ packages = append(packages, mainDeps...)
+
+ // Note: Buildscript dependencies are also parsed as main for simplicity
+
+ return packages, nil
+}
+
+// extractVariables extracts variable definitions from the build file and gradle.properties
+func extractVariables(manifestFile, content string) map[string]string {
+ vars := make(map[string]string)
+
+ // Read gradle.properties if exists
+ gradlePropsPath := filepath.Join(filepath.Dir(manifestFile), "gradle.properties")
+ if propsContent, err := os.ReadFile(gradlePropsPath); err == nil {
+ for _, line := range strings.Split(string(propsContent), "\n") {
+ line = strings.TrimSpace(line)
+ if strings.Contains(line, "=") && !strings.HasPrefix(line, "#") {
+ parts := strings.SplitN(line, "=", 2)
+ if len(parts) == 2 {
+ vars[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
+ }
+ }
+ }
+ }
+
+ // Extract from ext blocks (Groovy)
+ extPattern := regexp.MustCompile(`(?s)ext\s*\{([^}]+)\}`)
+ if matches := extPattern.FindStringSubmatch(content); len(matches) > 1 {
+ extContent := matches[1]
+ // Simple key = 'value' or key: 'value'
+ varPatterns := []*regexp.Regexp{
+ regexp.MustCompile(`(\w+)\s*=\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(\w+)\s*:\s*['"]([^'"]+)['"]`),
+ }
+ for _, pattern := range varPatterns {
+ for _, match := range pattern.FindAllStringSubmatch(extContent, -1) {
+ if len(match) > 2 {
+ vars[match[1]] = match[2]
+ }
+ }
+ }
+ }
+
+ // Extract ext.key = 'value' (outside blocks)
+ extVarPattern := regexp.MustCompile(`ext\.(\w+)\s*=\s*['"]([^'"]+)['"]`)
+ for _, match := range extVarPattern.FindAllStringSubmatch(content, -1) {
+ if len(match) > 2 {
+ vars[match[1]] = match[2]
+ }
+ }
+
+ // Extract Kotlin DSL val/const
+ kotlinVarPatterns := []*regexp.Regexp{
+ regexp.MustCompile(`(?:val|const val)\s+(\w+)\s*=\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?:val|const val)\s+(\w+)\s*=\s*(\d+(?:\.\d+)*[^\s'"]*)`), // for versions without quotes
+ }
+ for _, pattern := range kotlinVarPatterns {
+ for _, match := range pattern.FindAllStringSubmatch(content, -1) {
+ if len(match) > 2 {
+ vars[match[1]] = match[2]
+ }
+ }
+ }
+
+ return vars
+}
+
+// parseDependencies parses dependencies from the content
+func parseDependencies(content string, lines []string, variables map[string]string, isBuildscript bool) []models.Package {
+ var packages []models.Package
+
+ // Patterns for different dependency declarations
+ patterns := []*regexp.Regexp{
+ // String notation: implementation 'group:name:version'
+ regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*\(\s*['"]([^'"]+)['"]\s*\)`),
+ // Map notation: implementation group: 'g', name: 'n', version: 'v'
+ regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*group\s*:\s*['"]([^'"]+)['"]\s*,\s*name\s*:\s*['"]([^'"]+)['"]\s*,\s*version\s*:\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*\(\s*group\s*:\s*['"]([^'"]+)['"]\s*,\s*name\s*:\s*['"]([^'"]+)['"]\s*,\s*version\s*:\s*['"]([^'"]+)['"]\s*\)`),
+ }
+
+ depsLines := strings.Split(content, "\n")
+ for _, line := range depsLines {
+ line = strings.TrimSpace(line)
+ if line == "" {
+ continue
+ }
+
+ for _, pattern := range patterns {
+ matches := pattern.FindStringSubmatch(line)
+ if len(matches) > 0 {
+ var group, name, version string
+ if len(matches) == 3 {
+ // String notation
+ depStr := resolveVariables(matches[2], variables)
+ parts := strings.Split(depStr, ":")
+ if len(parts) >= 2 {
+ group = parts[0]
+ name = parts[1]
+ if len(parts) > 2 {
+ version = parts[2]
+ }
+ }
+ } else if len(matches) == 5 {
+ // Map notation
+ group = resolveVariables(matches[2], variables)
+ name = resolveVariables(matches[3], variables)
+ version = resolveVariables(matches[4], variables)
+ }
+
+ if group != "" && name != "" {
+ // Handle version ranges and classifiers
+ cleanVersion := cleanVersion(version)
+
+ // Find line number
+ lineNum := findLineNumber(content, line)
+
+ packages = append(packages, models.Package{
+ PackageManager: "gradle",
+ PackageName: group + ":" + name,
+ Version: cleanVersion,
+ FilePath: "", // Will be set later
+ Locations: []models.Location{
+ {Line: lineNum},
+ },
+ })
+ }
+ }
+ }
+ }
+
+ return packages
+}
+
+// resolveVariables replaces ${var} or $var with values
+func resolveVariables(str string, variables map[string]string) string {
+ // ${var}
+ re := regexp.MustCompile(`\$\{([^}]+)\}`)
+ str = re.ReplaceAllStringFunc(str, func(match string) string {
+ varName := strings.TrimSuffix(strings.TrimPrefix(match, "${"), "}")
+ if val, ok := variables[varName]; ok {
+ return val
+ }
+ return match
+ })
+
+ // $var
+ re = regexp.MustCompile(`\$(\w+)`)
+ str = re.ReplaceAllStringFunc(str, func(match string) string {
+ varName := strings.TrimPrefix(match, "$")
+ if val, ok := variables[varName]; ok {
+ return val
+ }
+ return match
+ })
+
+ return str
+}
+
+// cleanVersion handles version ranges and classifiers
+func cleanVersion(version string) string {
+ // Remove brackets for ranges, take the lower bound
+ if strings.HasPrefix(version, "[") && strings.HasSuffix(version, "]") {
+ version = strings.Trim(version, "[]")
+ parts := strings.Split(version, ",")
+ if len(parts) > 0 {
+ version = strings.TrimSpace(parts[0])
+ }
+ }
+ if strings.HasPrefix(version, "(") && strings.HasSuffix(version, ")") {
+ version = strings.Trim(version, "()")
+ parts := strings.Split(version, ",")
+ if len(parts) > 0 {
+ version = strings.TrimSpace(parts[0])
+ }
+ }
+ // For now, keep classifiers as is
+ return version
+}
+
+// findLineNumber finds the line number of a substring in content
+func findLineNumber(content, substr string) int {
+ index := strings.Index(content, substr)
+ if index == -1 {
+ return 0
+ }
+ return strings.Count(content[:index], "\n") + 1
+}
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
new file mode 100644
index 0000000..b307236
--- /dev/null
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -0,0 +1,160 @@
+package gradle
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+func TestGradleParser_Parse(t *testing.T) {
+ tests := []struct {
+ name string
+ content string
+ expectedPkgs []models.Package
+ expectedError bool
+ }{
+ {
+ name: "basic gradle file",
+ content: `plugins {
+ id 'java'
+}
+
+ext {
+ springVersion = '5.3.0'
+}
+
+dependencies {
+ implementation 'org.springframework:spring-core:5.3.0'
+ testImplementation 'junit:junit:4.13'
+ api 'com.google.guava:guava:30.1-jre'
+ implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
+}
+
+buildscript {
+ dependencies {
+ classpath 'com.android.tools.build:gradle:7.0.0'
+ }
+}`,
+ expectedPkgs: []models.Package{
+ {
+ PackageManager: "gradle",
+ PackageName: "org.springframework:spring-core",
+ Version: "5.3.0",
+ Locations: []models.Location{
+ {Line: 10},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "junit:junit",
+ Version: "4.13",
+ Locations: []models.Location{
+ {Line: 11},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "com.google.guava:guava",
+ Version: "30.1-jre",
+ Locations: []models.Location{
+ {Line: 12},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "org.apache.commons:commons-lang3",
+ Version: "3.12.0",
+ Locations: []models.Location{
+ {Line: 13},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "com.android.tools.build:gradle",
+ Version: "7.0.0",
+ Locations: []models.Location{
+ {Line: 18},
+ },
+ },
+ },
+ expectedError: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Create a temporary file
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ // Write content to temp file
+ _, err = tmpFile.WriteString(tt.content)
+ if err != nil {
+ t.Fatalf("Failed to write to temp file: %v", err)
+ }
+ tmpFile.Close()
+
+ // Parse the file
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+
+ if tt.expectedError && err == nil {
+ t.Errorf("Expected error but got none")
+ }
+ if !tt.expectedError && err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ }
+
+ if len(pkgs) != len(tt.expectedPkgs) {
+ t.Errorf("Expected %d packages, got %d", len(tt.expectedPkgs), len(pkgs))
+ }
+
+ for i, pkg := range pkgs {
+ if i >= len(tt.expectedPkgs) {
+ break
+ }
+ expected := tt.expectedPkgs[i]
+ if pkg.PackageManager != expected.PackageManager ||
+ pkg.PackageName != expected.PackageName ||
+ pkg.Version != expected.Version {
+ t.Errorf("Package %d mismatch: got %+v, expected %+v", i, pkg, expected)
+ }
+ if len(pkg.Locations) > 0 && len(expected.Locations) > 0 {
+ if pkg.Locations[0].Line != expected.Locations[0].Line {
+ t.Errorf("Location line mismatch: got %d, expected %d", pkg.Locations[0].Line, expected.Locations[0].Line)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestGradleParser_ParseFile(t *testing.T) {
+ // Test with actual file
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle"))
+ if err != nil {
+ t.Fatalf("Failed to parse build.gradle: %v", err)
+ }
+
+ if len(pkgs) == 0 {
+ t.Errorf("Expected packages, got none")
+ }
+
+ for _, pkg := range pkgs {
+ if pkg.PackageManager != "gradle" {
+ t.Errorf("Expected package manager 'gradle', got '%s'", pkg.PackageManager)
+ }
+ if pkg.PackageName == "" {
+ t.Errorf("Package name is empty")
+ }
+ if pkg.Version == "" {
+ t.Errorf("Version is empty")
+ }
+ }
+}
diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go
index 2710f99..c11b67e 100644
--- a/pkg/parser/manifest-file-selector.go
+++ b/pkg/parser/manifest-file-selector.go
@@ -15,6 +15,7 @@ const (
DotnetPackagesConfig
MavenPom
GoMod
+ GradleBuild
)
// selectManifestFile a method to select a manifest file type by its name
@@ -55,5 +56,9 @@ func selectManifestFile(manifest string) Manifest {
return GoMod
}
+ if manifestFileName == "build.gradle" || manifestFileName == "build.gradle.kts" {
+ return GradleBuild
+ }
+
return -1
}
diff --git a/pkg/parser/parser_factory.go b/pkg/parser/parser_factory.go
index 0f81e86..58f5d82 100644
--- a/pkg/parser/parser_factory.go
+++ b/pkg/parser/parser_factory.go
@@ -3,6 +3,7 @@ package parser
import (
"github.com/Checkmarx/manifest-parser/internal/parsers/dotnet"
"github.com/Checkmarx/manifest-parser/internal/parsers/golang"
+ "github.com/Checkmarx/manifest-parser/internal/parsers/gradle"
"github.com/Checkmarx/manifest-parser/internal/parsers/maven"
"github.com/Checkmarx/manifest-parser/internal/parsers/npm"
"github.com/Checkmarx/manifest-parser/internal/parsers/pypi"
@@ -26,6 +27,8 @@ func ParsersFactory(manifest string) Parser {
return &dotnet.DotnetPackagesConfigParser{}
case GoMod:
return &golang.GoModParser{}
+ case GradleBuild:
+ return &gradle.GradleParser{}
default:
return nil
}
diff --git a/test/resources/build.gradle b/test/resources/build.gradle
new file mode 100644
index 0000000..6e95dd1
--- /dev/null
+++ b/test/resources/build.gradle
@@ -0,0 +1,31 @@
+plugins {
+ id 'java'
+}
+
+ext {
+ springVersion = '5.3.0'
+ guavaVersion = '30.1-jre'
+}
+
+group 'com.example'
+version '1.0-SNAPSHOT'
+
+repositories {
+ mavenCentral()
+}
+
+buildscript {
+ repositories {
+ mavenCentral()
+ }
+ dependencies {
+ classpath 'com.android.tools.build:gradle:7.0.0'
+ }
+}
+
+dependencies {
+ implementation 'org.springframework:spring-core:5.3.0'
+ testImplementation 'junit:junit:4.13'
+ api 'com.google.guava:guava:30.1-jre'
+ implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
+}
\ No newline at end of file
From e34628260095c592793510dabea304477b76dbd2 Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Tue, 7 Apr 2026 06:36:37 +0530
Subject: [PATCH 02/14] feat: update Gradle parser and add implementation plan
---
GRADLE_PARSER_IMPLEMENTATION_PLAN.md | 110 ++++++++++
internal/parsers/gradle/gradle_parser.go | 203 +++++++++++++-----
internal/parsers/gradle/gradle_parser_test.go | 108 ++++++++++
3 files changed, 365 insertions(+), 56 deletions(-)
create mode 100644 GRADLE_PARSER_IMPLEMENTATION_PLAN.md
diff --git a/GRADLE_PARSER_IMPLEMENTATION_PLAN.md b/GRADLE_PARSER_IMPLEMENTATION_PLAN.md
new file mode 100644
index 0000000..94dda14
--- /dev/null
+++ b/GRADLE_PARSER_IMPLEMENTATION_PLAN.md
@@ -0,0 +1,110 @@
+# Gradle Parser Implementation Plan
+
+## Overview
+This document describes the implementation plan and execution steps for adding Gradle manifest parsing support to the `manifest-parser` repository.
+
+The parser was extended to support static Gradle dependency declarations in both Groovy and Kotlin DSL, including common production patterns such as multi-line dependencies and conditional `if` blocks.
+
+---
+
+## Implementation Plan
+
+### 1. Analyze existing code flow
+- Inspect `cmd/main.go` to understand entrypoint behavior.
+- Review `pkg/parser/parser.go` and `pkg/parser/parser_factory.go` to understand the parser interface and factory logic.
+- Review `pkg/parser/manifest-file-selector.go` to see how manifest types are detected.
+- Review existing language parser implementations for style and output format.
+
+### 2. Add Gradle manifest detection
+- Extend `pkg/parser/manifest-file-selector.go` to recognize `build.gradle` and `build.gradle.kts` files.
+- Add a new `Manifest` type for Gradle.
+
+### 3. Add factory support for Gradle
+- Update `pkg/parser/parser_factory.go` to import the new Gradle parser.
+- Return the Gradle parser instance when the selected manifest is Gradle.
+
+### 4. Implement Gradle parser
+- Create `internal/parsers/gradle/gradle_parser.go`.
+- Implement the `Parser` interface for Gradle.
+- Parse dependencies into `models.Package` entries.
+
+### 5. Add variable resolution
+- Read `gradle.properties` values.
+- Parse Groovy `ext {}` blocks and `ext.key` assignments.
+- Parse Kotlin DSL `val` and `const val` declarations.
+- Resolve `${var}` and `$var` references in dependency strings.
+
+### 6. Add support for multi-line dependency declarations
+- Detect dependency statements spanning multiple lines.
+- Join logical dependency lines before parsing.
+- Support both string notation and map notation across line breaks.
+
+### 7. Add conditional dependency support
+- Parse dependencies inside conditional blocks such as `if (...) { ... }`.
+- Treat static declarations inside conditionals as valid parse targets.
+
+### 8. Add Kotlin DSL support
+- Support Kotlin string syntax: `implementation("group:name:version")`.
+- Support Kotlin map syntax: `implementation(group = "group", name = "name", version = "version")`.
+- Support Kotlin-style dependency declarations in `build.gradle.kts`.
+
+### 9. Write regression tests
+- Create or update `internal/parsers/gradle/gradle_parser_test.go`.
+- Add tests for:
+ - basic Groovy dependencies
+ - multi-line dependencies
+ - conditional `if` block dependencies
+ - Kotlin DSL dependency syntax
+
+### 10. Validate
+- Run `go test ./internal/parsers/gradle`.
+- Confirm Gradle parser tests pass.
+- Optionally run `go test ./...` to verify broader repository compatibility, noting existing unrelated test failures.
+
+---
+
+## Files created or modified
+
+- `pkg/parser/manifest-file-selector.go`
+- `pkg/parser/parser_factory.go`
+- `internal/parsers/gradle/gradle_parser.go`
+- `internal/parsers/gradle/gradle_parser_test.go`
+- `test/resources/build.gradle` (sample Gradle fixture)
+
+---
+
+## Supported Gradle parser features
+
+- Detection of `build.gradle` and `build.gradle.kts` files
+- Parsing of common dependency configurations:
+ - `implementation`, `api`, `compile`, `compileOnly`, `runtime`, `runtimeOnly`
+ - `testImplementation`, `testCompile`, `testRuntimeOnly`
+ - `androidTestImplementation`, `annotationProcessor`, `classpath`, `kapt`
+- String-style dependency declarations
+- Map-style dependency declarations
+- Multi-line dependency statements
+- Dependencies inside `if (...) { ... }` blocks
+- Variable resolution from:
+ - `gradle.properties`
+ - Groovy `ext` property blocks
+ - Groovy `ext.key = value` syntax
+ - Kotlin DSL `val` / `const val`
+- Kotlin DSL dependency syntax
+- Version cleanup for simple ranges and classifiers
+
+---
+
+## Known limitations
+
+- Dynamic dependencies generated by build logic, loops, or plugin APIs are not resolved.
+- Complex Kotlin DSL constructs beyond common forms may not be fully parsed.
+- Conditional branch logic is not evaluated; all static declarations are treated as present.
+- Deep nested DSL or custom Gradle extension syntax may be missed.
+- Computed or function-based version expressions are not evaluated.
+- Multi-project and included-build dependency resolution is not supported.
+
+---
+
+## Notes
+
+The Gradle parser is now suitable for many production scanning scenarios in AST-CLI where static dependency declarations are present. For full Gradle model accuracy, additional Gradle-aware parsing or integration with Gradle tooling would be required.
diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go
index 459b02a..af376a7 100644
--- a/internal/parsers/gradle/gradle_parser.go
+++ b/internal/parsers/gradle/gradle_parser.go
@@ -20,22 +20,20 @@ func (p *GradleParser) Parse(manifestFile string) ([]models.Package, error) {
return nil, fmt.Errorf("failed to read manifest file: %w", err)
}
- lines := strings.Split(string(content), "\n")
+ manifestContent := string(content)
// Extract variables
- variables := extractVariables(manifestFile, string(content))
+ variables := extractVariables(manifestFile, manifestContent)
var packages []models.Package
// Parse main dependencies
- mainDeps := parseDependencies(string(content), lines, variables, false)
+ mainDeps := parseDependencies(manifestContent, variables)
for i := range mainDeps {
mainDeps[i].FilePath = manifestFile
}
packages = append(packages, mainDeps...)
- // Note: Buildscript dependencies are also parsed as main for simplicity
-
return packages, nil
}
@@ -99,73 +97,166 @@ func extractVariables(manifestFile, content string) map[string]string {
return vars
}
+type dependencyStatement struct {
+ Line int
+ Text string
+}
+
// parseDependencies parses dependencies from the content
-func parseDependencies(content string, lines []string, variables map[string]string, isBuildscript bool) []models.Package {
+func parseDependencies(content string, variables map[string]string) []models.Package {
var packages []models.Package
- // Patterns for different dependency declarations
- patterns := []*regexp.Regexp{
- // String notation: implementation 'group:name:version'
- regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*['"]([^'"]+)['"]`),
- regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*\(\s*['"]([^'"]+)['"]\s*\)`),
- // Map notation: implementation group: 'g', name: 'n', version: 'v'
- regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*group\s*:\s*['"]([^'"]+)['"]\s*,\s*name\s*:\s*['"]([^'"]+)['"]\s*,\s*version\s*:\s*['"]([^'"]+)['"]`),
- regexp.MustCompile(`(?i)(implementation|api|compile|runtime|testImplementation|testCompile|androidTestImplementation|classpath)\s*\(\s*group\s*:\s*['"]([^'"]+)['"]\s*,\s*name\s*:\s*['"]([^'"]+)['"]\s*,\s*version\s*:\s*['"]([^'"]+)['"]\s*\)`),
- }
-
- depsLines := strings.Split(content, "\n")
- for _, line := range depsLines {
- line = strings.TrimSpace(line)
- if line == "" {
+ statements := extractDependencyStatements(content)
+ for _, stmt := range statements {
+ for _, pkg := range parseDependencyStatement(stmt.Text, variables) {
+ pkg.Locations = []models.Location{{Line: stmt.Line}}
+ packages = append(packages, pkg)
+ }
+ }
+
+ return packages
+}
+
+func extractDependencyStatements(content string) []dependencyStatement {
+ startPattern := regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\b`)
+ var statements []dependencyStatement
+ var buffer strings.Builder
+ active := false
+ startLine := 0
+
+ lines := strings.Split(content, "\n")
+ for i, raw := range lines {
+ line := strings.TrimSpace(raw)
+ if line == "" || strings.HasPrefix(line, "//") || strings.HasPrefix(line, "/*") || strings.HasPrefix(line, "*") {
continue
}
- for _, pattern := range patterns {
- matches := pattern.FindStringSubmatch(line)
- if len(matches) > 0 {
- var group, name, version string
- if len(matches) == 3 {
- // String notation
- depStr := resolveVariables(matches[2], variables)
- parts := strings.Split(depStr, ":")
- if len(parts) >= 2 {
- group = parts[0]
- name = parts[1]
- if len(parts) > 2 {
- version = parts[2]
- }
- }
- } else if len(matches) == 5 {
- // Map notation
- group = resolveVariables(matches[2], variables)
- name = resolveVariables(matches[3], variables)
- version = resolveVariables(matches[4], variables)
+ if !active {
+ if startPattern.MatchString(line) {
+ active = true
+ startLine = i + 1
+ buffer.Reset()
+ buffer.WriteString(line)
+ if dependencyStatementComplete(buffer.String()) {
+ statements = append(statements, dependencyStatement{Line: startLine, Text: buffer.String()})
+ active = false
}
+ }
+ continue
+ }
+
+ buffer.WriteString(" ")
+ buffer.WriteString(line)
+ if dependencyStatementComplete(buffer.String()) {
+ statements = append(statements, dependencyStatement{Line: startLine, Text: buffer.String()})
+ active = false
+ }
+ }
- if group != "" && name != "" {
- // Handle version ranges and classifiers
- cleanVersion := cleanVersion(version)
-
- // Find line number
- lineNum := findLineNumber(content, line)
-
- packages = append(packages, models.Package{
- PackageManager: "gradle",
- PackageName: group + ":" + name,
- Version: cleanVersion,
- FilePath: "", // Will be set later
- Locations: []models.Location{
- {Line: lineNum},
- },
- })
+ return statements
+}
+
+func dependencyStatementComplete(statement string) bool {
+ patterns := []*regexp.Regexp{
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*['"]([^'"\)]+)['"]`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)group\s*[:=]\s*['"]([^'"]+)['"].*name\s*[:=]\s*['"]([^'"]+)['"].*version\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)group\s*[:=]\s*[^,\s]+.*name\s*[:=]\s*[^,\s]+.*version\s*[:=]\s*[^,\s]+`),
+ }
+
+ for _, pattern := range patterns {
+ if pattern.MatchString(statement) {
+ return true
+ }
+ }
+
+ return false
+}
+
+func parseDependencyStatement(statement string, variables map[string]string) []models.Package {
+ var packages []models.Package
+
+ patterns := []*regexp.Regexp{
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*['"]([^'"\)]+)['"]`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
+ }
+
+ for _, pattern := range patterns {
+ matches := pattern.FindStringSubmatch(statement)
+ if len(matches) > 0 {
+ var group, name, version string
+ if len(matches) == 3 {
+ depStr := resolveVariables(matches[2], variables)
+ parts := strings.Split(depStr, ":")
+ if len(parts) >= 2 {
+ group = parts[0]
+ name = parts[1]
+ if len(parts) > 2 {
+ version = strings.Join(parts[2:], ":")
+ }
}
+ } else if len(matches) == 5 {
+ group = resolveVariables(matches[2], variables)
+ name = resolveVariables(matches[3], variables)
+ version = resolveVariables(matches[4], variables)
}
+
+ if group != "" && name != "" {
+ packages = append(packages, models.Package{
+ PackageManager: "gradle",
+ PackageName: group + ":" + name,
+ Version: cleanVersion(version),
+ FilePath: "",
+ Locations: []models.Location{{}},
+ })
+ }
+ }
+ }
+
+ if len(packages) == 0 {
+ if pkg := parseDependencyKeyValue(statement, variables); pkg != nil {
+ packages = append(packages, *pkg)
}
}
return packages
}
+func parseDependencyKeyValue(statement string, variables map[string]string) *models.Package {
+ fields := map[string]string{}
+
+ patterns := []*regexp.Regexp{
+ regexp.MustCompile(`(?i)(group|name|version)\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)(group|name|version)\s*[:=]\s*([A-Za-z_][A-Za-z0-9_]*)`),
+ }
+
+ for _, pattern := range patterns {
+ for _, match := range pattern.FindAllStringSubmatch(statement, -1) {
+ if len(match) > 2 {
+ key := strings.ToLower(match[1])
+ value := match[2]
+ fields[key] = resolveVariables(value, variables)
+ }
+ }
+ }
+
+ if fields["group"] == "" || fields["name"] == "" {
+ return nil
+ }
+
+ return &models.Package{
+ PackageManager: "gradle",
+ PackageName: fields["group"] + ":" + fields["name"],
+ Version: cleanVersion(fields["version"]),
+ FilePath: "",
+ Locations: []models.Location{{}},
+ }
+}
+
// resolveVariables replaces ${var} or $var with values
func resolveVariables(str string, variables map[string]string) string {
// ${var}
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
index b307236..04e186d 100644
--- a/internal/parsers/gradle/gradle_parser_test.go
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -81,6 +81,114 @@ buildscript {
},
expectedError: false,
},
+ {
+ name: "kotlin dsl dependency syntax",
+ content: `val kotlinVersion = "1.4.32"
+
+dependencies {
+ implementation("org.springframework:spring-core:$kotlinVersion")
+ implementation(
+ "org.apache.commons:commons-lang3:3.12.0"
+ )
+ implementation(group = "com.google.guava", name = "guava", version = "30.1-jre")
+ if (project.hasProperty("feature")) {
+ testImplementation("junit:junit:$kotlinVersion")
+ }
+}`,
+ expectedPkgs: []models.Package{
+ {
+ PackageManager: "gradle",
+ PackageName: "org.springframework:spring-core",
+ Version: "1.4.32",
+ Locations: []models.Location{
+ {Line: 4},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "org.apache.commons:commons-lang3",
+ Version: "3.12.0",
+ Locations: []models.Location{
+ {Line: 5},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "com.google.guava:guava",
+ Version: "30.1-jre",
+ Locations: []models.Location{
+ {Line: 8},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "junit:junit",
+ Version: "1.4.32",
+ Locations: []models.Location{
+ {Line: 10},
+ },
+ },
+ },
+ expectedError: false,
+ },
+ {
+ name: "multi-line and conditional dependencies",
+ content: `ext {
+ featureVersion = '1.0.0'
+}
+
+dependencies {
+ implementation(
+ 'org.springframework:spring-core:5.3.0'
+ )
+ implementation group: 'org.apache.commons',
+ name: 'commons-lang3',
+ version: '3.12.0'
+ if (project.hasProperty('feature')) {
+ testImplementation 'junit:junit:$featureVersion'
+ }
+ if (useRedux) {
+ api group: 'com.google.guava',
+ name: 'guava',
+ version: '30.1-jre'
+ }
+}`,
+ expectedPkgs: []models.Package{
+ {
+ PackageManager: "gradle",
+ PackageName: "org.springframework:spring-core",
+ Version: "5.3.0",
+ Locations: []models.Location{
+ {Line: 6},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "org.apache.commons:commons-lang3",
+ Version: "3.12.0",
+ Locations: []models.Location{
+ {Line: 9},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "junit:junit",
+ Version: "1.0.0",
+ Locations: []models.Location{
+ {Line: 13},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "com.google.guava:guava",
+ Version: "30.1-jre",
+ Locations: []models.Location{
+ {Line: 16},
+ },
+ },
+ },
+ expectedError: false,
+ },
}
for _, tt := range tests {
From 08862c0c8d8e143d0ba14a4933f549d5e6d2ca86 Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Fri, 10 Apr 2026 23:48:56 +0530
Subject: [PATCH 03/14] updated gradle-parser
---
.claude/settings.local.json | 7 +
README.md | 872 +++++++++++++++++-
internal/parsers/gradle/gradle_parser.go | 166 +++-
internal/parsers/gradle/gradle_parser_test.go | 462 +++++++++-
internal/parsers/gradle/version_catalog.go | 210 +++++
test/resources/GRADLE_TEST_FILES_README.md | 308 +++++++
test/resources/build.gradle | 117 ++-
test/resources/build.gradle.kts | 366 ++++++++
test/resources/gradle.properties | 88 ++
test/resources/gradle/libs.versions.toml | 228 +++++
10 files changed, 2776 insertions(+), 48 deletions(-)
create mode 100644 .claude/settings.local.json
create mode 100644 internal/parsers/gradle/version_catalog.go
create mode 100644 test/resources/GRADLE_TEST_FILES_README.md
create mode 100644 test/resources/build.gradle.kts
create mode 100644 test/resources/gradle.properties
create mode 100644 test/resources/gradle/libs.versions.toml
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 0000000..7065ff3
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,7 @@
+{
+ "permissions": {
+ "allow": [
+ "Bash(ls -lh c:/repository/manifest-parser/test/resources/*.gradle* c:/repository/manifest-parser/test/resources/gradle.properties c:/repository/manifest-parser/test/resources/gradle/)"
+ ]
+ }
+}
diff --git a/README.md b/README.md
index 20ecfe6..64c538d 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,871 @@
-# manifest-parser
\ No newline at end of file
+# Manifest Parser
+
+A production-grade Go library for parsing dependency manifests across multiple package managers. Extracts package dependencies from build files and dependency declarations in a standardized format for security scanning, SBOM generation, and dependency analysis.
+
+## ๐ฏ Purpose
+
+This parser extracts software dependencies from project manifest files and provides:
+- **Standardized Package Output** - Consistent JSON format across all package managers
+- **Version Tracking** - Precise version information for vulnerability scanning
+- **Location Tracking** - File path and line numbers for each dependency
+- **Security Scanning** - Integration with SCA (Software Composition Analysis) tools
+- **SBOM Generation** - Software Bill of Materials (cyclonedx, spdx) support
+
+## ๐ฆ Supported Package Managers
+
+| Manager | Format | Status | Features |
+|---------|--------|--------|----------|
+| **Gradle** | `build.gradle`, `build.gradle.kts` | โ
Production | Latest DSL + catalogs |
+| **Maven** | `pom.xml` | โ
Production | Properties, BOMs, ranges |
+| **npm/Node.js** | `package.json` | โ
Production | Dependencies, dev, peer, optional |
+| **Go** | `go.mod` | โ
Production | Direct imports, indirect |
+| **.NET** | `.csproj`, `Directory.Packages.props`, `packages.config` | โ
Production | Multi-format support |
+| **Python** | `requirements.txt` | โ
Production | Pip format with ranges |
+
+---
+
+## ๐ Quick Start
+
+### Installation
+
+```bash
+go get github.com/Checkmarx/manifest-parser
+```
+
+### Usage
+
+```go
+package main
+
+import (
+ "fmt"
+ "github.com/Checkmarx/manifest-parser/pkg/parser"
+)
+
+func main() {
+ // Create parser for manifest file
+ p := parser.ParsersFactory("path/to/package.json")
+ if p == nil {
+ fmt.Println("Unsupported manifest type")
+ return
+ }
+
+ // Parse dependencies
+ packages, err := p.Parse("path/to/package.json")
+ if err != nil {
+ fmt.Println("Error:", err)
+ return
+ }
+
+ // Process results
+ for _, pkg := range packages {
+ fmt.Printf("%s:%s@%s\n", pkg.PackageManager, pkg.PackageName, pkg.Version)
+ }
+}
+```
+
+### Command Line
+
+```bash
+# Parse any supported manifest
+go run cmd/main.go path/to/manifest
+
+# Examples
+go run cmd/main.go project/pom.xml
+go run cmd/main.go project/package.json
+go run cmd/main.go project/build.gradle
+go run cmd/main.go project/go.mod
+```
+
+---
+
+## ๐ Detailed Parser Documentation
+
+### 1. Gradle Parser
+
+**Files:** `build.gradle`, `build.gradle.kts`
+
+#### Features
+
+โ
**Groovy DSL** - Traditional Android/Java Gradle syntax
+โ
**Kotlin DSL** - Modern type-safe Gradle syntax
+โ
**gradle.properties** - Centralized property management
+โ
**Version Catalog** - `gradle/libs.versions.toml` (Gradle 7.0+)
+โ
**BOM/Platform** - Dependency Bill of Materials imports
+โ
**Multi-Module** - Subproject and module-specific configurations
+โ
**19 Configurations** - implementation, api, testImplementation, debugImplementation, ksp, etc.
+
+#### Dependency Declaration Support
+
+```gradle
+// String notation
+implementation 'org.springframework:spring-core:5.3.20'
+
+// Kotlin DSL
+implementation("org.springframework:spring-core:5.3.20")
+
+// Map notation
+implementation group: 'org.springframework', name: 'spring-core', version: '5.3.20'
+
+// Platform/BOM
+implementation platform('org.springframework.boot:spring-boot-dependencies:2.7.0')
+
+// Version Catalog
+implementation(libs.spring.core)
+```
+
+#### Variable Resolution
+
+```gradle
+// gradle.properties
+springVersion=5.3.20
+
+// build.gradle
+implementation "org.springframework:spring-core:${springVersion}"
+
+// ext blocks
+ext {
+ log4jVersion = '2.17.1'
+}
+dependencies {
+ implementation "org.apache.logging.log4j:log4j-core:$log4jVersion"
+}
+```
+
+#### Supported Configurations
+
+| Type | Purpose |
+|------|---------|
+| `implementation` | Runtime + compile dependencies |
+| `api` | Public API (exported to consumers) |
+| `compileOnly` | Compile-time only (e.g., annotations) |
+| `runtimeOnly` | Runtime-only (excluded from compile) |
+| `testImplementation` | Test-only dependencies |
+| `debugImplementation` | Debug build variant |
+| `releaseImplementation` | Release build variant |
+| `annotationProcessor` | Annotation code generation |
+| `ksp` / `kapt` | Kotlin/Java code generation |
+| `classpath` | Buildscript dependencies |
+| Plus 9 more variants for testing, fixtures, lint checks |
+
+#### Example: Multi-Module Project
+
+```kotlin
+// build.gradle.kts
+subprojects {
+ apply(plugin = "java")
+
+ dependencies {
+ implementation("org.springframework.boot:spring-boot-starter-web")
+ }
+}
+
+project(":api-module") {
+ dependencies {
+ implementation(project(":core"))
+ implementation("org.springframework.security:spring-security-core:5.7.1")
+ }
+}
+```
+
+#### Version Catalog Support
+
+```toml
+# gradle/libs.versions.toml
+[versions]
+spring-version = "5.3.20"
+
+[libraries]
+spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" }
+
+[bundles]
+spring = ["spring-core", "spring-context"]
+```
+
+#### Parser Capabilities
+
+- โ
Parses Groovy and Kotlin DSL
+- โ
Resolves variables from gradle.properties
+- โ
Discovers and parses version catalogs
+- โ
Unwraps platform()/enforcedPlatform() BOMs
+- โ
Walks up directory tree for parent properties
+- โ
Filters out project references (multi-module)
+- โ
Skips file references (local JARs)
+- โ
Handles multi-line declarations
+- โ
Parses conditional if blocks
+- โ Does not evaluate dynamic Gradle code
+
+#### Test Resources
+
+```
+test/resources/
+โโโ build.gradle - Groovy DSL with subprojects
+โโโ build.gradle.kts - Kotlin DSL with 5 modules
+โโโ gradle.properties - Centralized properties
+โโโ gradle/libs.versions.toml - 80+ catalog entries
+```
+
+**Test Coverage:** 16 passing tests including platform dependencies, version catalogs, extended configurations, parent property inheritance
+
+---
+
+### 2. Maven Parser
+
+**File:** `pom.xml`
+
+#### Features
+
+โ
**Dependency Management** - BOM imports and managed versions
+โ
**Multi-Module** - Parent/child POM relationships
+โ
**Properties** - Variable substitution with `${property}`
+โ
**Version Ranges** - `[1.0,2.0)` notation handling
+โ
**Scopes** - compile, runtime, test, provided, optional, system
+โ
**Location Tracking** - Exact line numbers in POM files
+
+#### Dependency Declaration Support
+
+```xml
+
+
+ org.springframework
+ spring-core
+ 5.3.20
+
+
+
+
+ junit
+ junit
+ 4.13.2
+ test
+
+
+
+
+ org.springframework
+ spring-core
+ ${spring.version}
+
+
+
+
+ com.example
+ library
+ [1.0,2.0)
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-dependencies
+ 2.7.0
+ pom
+ import
+
+
+
+```
+
+#### Property Resolution
+
+```xml
+
+ 5.3.20
+
+
+
+${spring.version}
+```
+
+#### Dependency Scopes
+
+| Scope | Purpose |
+|-------|---------|
+| `compile` | Runtime + compile (default) |
+| `test` | Test-only dependencies |
+| `runtime` | Runtime-only |
+| `provided` | Compile-only, provided at runtime |
+| `optional` | Included optionally |
+| `system` | Local filesystem JAR |
+
+#### Parser Capabilities
+
+- โ
Parses POM XML structure
+- โ
Resolves properties and version ranges
+- โ
Handles BOM imports and managed dependencies
+- โ
Tracks multi-line elements
+- โ
Extracts scope information
+- โ
Locates exact line numbers
+- โ
Supports parent POM references
+
+#### Example: Multi-Module Project
+
+```xml
+
+com.example
+parent
+1.0.0
+pom
+
+
+ core
+ api
+
+
+
+
+ com.example
+ parent
+ 1.0.0
+
+
+core
+
+
+
+ org.springframework
+ spring-core
+ ${spring.version}
+
+
+```
+
+---
+
+### 3. NPM/Node.js Parser
+
+**File:** `package.json`
+
+#### Features
+
+โ
**Dependency Types** - dependencies, devDependencies, peerDependencies, optionalDependencies
+โ
**Version Resolution** - Resolves ranges using package-lock.json
+โ
**Exact Versions** - Extracts actual installed versions from lock files
+โ
**Range Handling** - `^1.0.0`, `~1.0.0`, `*`, ranges
+
+#### Dependency Declaration Support
+
+```json
+{
+ "dependencies": {
+ "express": "4.18.2",
+ "lodash": "^4.17.21"
+ },
+ "devDependencies": {
+ "jest": "~29.0.0",
+ "webpack": "*"
+ },
+ "peerDependencies": {
+ "react": "^18.0.0"
+ },
+ "optionalDependencies": {
+ "fsevents": "2.3.2"
+ }
+}
+```
+
+#### Version Specifiers
+
+| Format | Meaning |
+|--------|---------|
+| `1.2.3` | Exact version |
+| `^1.2.3` | Compatible with 1.2.3 (up to 2.0.0) |
+| `~1.2.3` | Approximately 1.2.3 (up to 1.3.0) |
+| `>=1.2.3` | Greater than or equal |
+| `1.2.x` | Patch-level ranges |
+| `*` | Any version |
+
+#### Dependency Types
+
+| Type | Purpose |
+|------|---------|
+| `dependencies` | Production dependencies |
+| `devDependencies` | Development-only (testing, bundling) |
+| `peerDependencies` | Consumer-provided dependencies |
+| `optionalDependencies` | Optional packages |
+
+#### Parser Capabilities
+
+- โ
Parses package.json JSON
+- โ
Resolves version ranges using package-lock.json
+- โ
Extracts all 4 dependency types
+- โ
Handles multiple version specifiers
+- โ
Provides exact installed versions
+
+#### Example: Large Project
+
+```json
+{
+ "name": "my-app",
+ "version": "1.0.0",
+ "dependencies": {
+ "react": "18.2.0",
+ "react-dom": "18.2.0",
+ "axios": "^1.4.0"
+ },
+ "devDependencies": {
+ "@babel/core": "^7.22.0",
+ "webpack": "^5.88.0",
+ "jest": "~29.0.0"
+ }
+}
+```
+
+---
+
+### 4. Go Modules Parser
+
+**File:** `go.mod`
+
+#### Features
+
+โ
**Module Dependencies** - Direct and indirect imports
+โ
**Version Pinning** - Exact semver versions
+โ
**Replace Directives** - Local and remote replacements
+โ
**Exclude Directives** - Version exclusions
+โ
**Go Version** - Minimum Go version requirement
+
+#### Dependency Declaration Support
+
+```go
+module github.com/example/project
+
+go 1.19
+
+require (
+ github.com/gorilla/mux v1.8.0
+ github.com/google/uuid v1.3.0
+)
+
+require (
+ github.com/stretchr/testify v1.8.4 // indirect
+)
+
+replace (
+ github.com/old/module => github.com/new/module v1.2.3
+ github.com/local/module => ./local/path
+)
+
+exclude (
+ github.com/bad/module v1.0.0
+)
+```
+
+#### Dependency Status
+
+| Type | Purpose |
+|------|---------|
+| `require` | Direct dependencies |
+| `require (indirect)` | Transitive dependencies |
+| `replace` | Local/remote replacements |
+| `exclude` | Excluded versions |
+
+#### Parser Capabilities
+
+- โ
Parses go.mod file format
+- โ
Extracts direct and indirect imports
+- โ
Handles replace and exclude directives
+- โ
Tracks minimum Go version
+- โ
Provides exact line numbers
+
+#### Example: Complex Project
+
+```go
+module github.com/checkmarx/scanner
+
+go 1.20
+
+require (
+ github.com/spf13/cobra v1.7.0
+ github.com/sirupsen/logrus v1.9.3
+)
+
+require (
+ github.com/inconshreveable/log15 v2.3.2 // indirect
+ golang.org/x/sys v0.10.0 // indirect
+)
+
+replace github.com/local/package => ../local/package
+
+exclude golang.org/x/text v0.3.0
+```
+
+---
+
+### 5. .NET / C# Parser
+
+**Files:** `.csproj`, `Directory.Packages.props`, `packages.config`
+
+#### Features
+
+โ
**Project References** - `.csproj` PackageReference elements
+โ
**Centralized Management** - `Directory.Packages.props` for monorepos
+โ
**Legacy Format** - `packages.config` (NuGet v2)
+โ
**Target Frameworks** - Framework-specific dependencies
+โ
**Metadata** - Version, Include, Exclude attributes
+
+#### Dependency Declaration Support
+
+##### `.csproj` Format (Modern)
+
+```xml
+
+
+
+
+
+```
+
+##### `Directory.Packages.props` (Centralized)
+
+```xml
+
+ true
+
+
+
+
+
+
+```
+
+##### `packages.config` (Legacy NuGet)
+
+```xml
+
+
+
+
+
+```
+
+#### Package Metadata
+
+| Attribute | Purpose |
+|-----------|---------|
+| `Include` / `id` | Package name |
+| `Version` | Semantic version |
+| `TargetFramework` | Framework specificity |
+| `Condition` | Conditional inclusion |
+| `Exclude` | Excluded frameworks |
+
+#### Parser Capabilities
+
+- โ
Parses `.csproj` XML structure
+- โ
Extracts `Directory.Packages.props` central versions
+- โ
Handles legacy `packages.config` format
+- โ
Respects framework-specific conditions
+- โ
Tracks line numbers and locations
+
+#### Example: Multi-Framework Project
+
+```xml
+
+
+
+ net6.0;net8.0;net472
+
+
+
+
+
+
+
+```
+
+---
+
+### 6. Python / Pip Parser
+
+**File:** `requirements.txt`
+
+#### Features
+
+โ
**Pip Format** - Standard Python dependency format
+โ
**Version Specifiers** - `==`, `>=`, `<=`, `~=`, ranges
+โ
**Comments & Empty Lines** - Properly ignored
+โ
**Environment Markers** - OS/Python version conditions
+โ
**Git References** - VCS dependencies
+
+#### Dependency Declaration Support
+
+```txt
+# Production dependencies
+Django==4.2.0
+djangorestframework>=3.14.0,<4.0
+requests~=2.31.0
+
+# Dev dependencies
+pytest>=7.0.0
+black==23.0.0
+
+# Git references
+git+https://github.com/example/repo.git@main#egg=mypackage
+
+# With environment markers
+pywin32>=300; sys_platform == 'win32'
+```
+
+#### Version Specifiers
+
+| Specifier | Meaning |
+|-----------|---------|
+| `==1.4.2` | Exact version |
+| `>=1.4.2` | Greater than or equal |
+| `<=1.4.2` | Less than or equal |
+| `!=1.4.2` | Not equal |
+| `~=1.4.2` | Compatible release (1.4.x) |
+| `*` | Any version |
+
+#### Environment Markers
+
+```txt
+# Platform-specific
+pywin32>=300; sys_platform == 'win32'
+
+# Python version specific
+dataclasses; python_version < '3.7'
+
+# Complex conditions
+numpy>=1.20; python_version >= '3.8' and sys_platform != 'win32'
+```
+
+#### Parser Capabilities
+
+- โ
Parses pip requirements format
+- โ
Extracts package names and versions
+- โ
Handles version specifier ranges
+- โ
Recognizes environment markers
+- โ
Ignores comments and blank lines
+
+#### Example: Complete Project
+
+```txt
+# Python 3.8+
+Python>=3.8
+
+# Web Framework
+Flask==2.3.0
+Flask-SQLAlchemy>=3.0.0,<4.0
+
+# Database
+psycopg2-binary~=2.9.0
+SQLAlchemy>=2.0.0
+
+# Testing
+pytest>=7.0.0
+pytest-cov>=4.0.0
+
+# Development
+black==23.0.0
+flake8>=6.0.0
+
+# OS-specific
+pywin32>=300; sys_platform == 'win32'
+```
+
+---
+
+## ๐ Output Format
+
+All parsers return a standardized `Package` structure:
+
+```go
+type Package struct {
+ PackageManager string // "gradle", "maven", "npm", "go", "dotnet", "pip"
+ PackageName string // "group:name" or "name"
+ Version string // "1.2.3"
+ FilePath string // Path to manifest file
+ Locations []Location // Line numbers
+}
+
+type Location struct {
+ Line int // Line number (1-indexed)
+ StartIndex int // Character offset
+ EndIndex int // Character offset
+}
+```
+
+### JSON Output Example
+
+```json
+[
+ {
+ "packageManager": "gradle",
+ "packageName": "org.springframework:spring-core",
+ "version": "5.3.20",
+ "filePath": "build.gradle",
+ "locations": [
+ {
+ "line": 42,
+ "startIndex": 0,
+ "endIndex": 0
+ }
+ ]
+ },
+ {
+ "packageManager": "maven",
+ "packageName": "com.google.guava:guava",
+ "version": "31.1-jre",
+ "filePath": "pom.xml",
+ "locations": [
+ {
+ "line": 127,
+ "startIndex": 0,
+ "endIndex": 0
+ }
+ ]
+ }
+]
+```
+
+---
+
+## ๐ Security & Vulnerability Detection
+
+This parser is designed to support security scanning and SCA (Software Composition Analysis) tools:
+
+### Integration with Vulnerability Databases
+
+```
+Dependency Extraction โ Vulnerability Database โ Risk Assessment
+ (NVD CVE)
+ (GitHub Advisory)
+ (Snyk Database)
+ (Sonatype OSS)
+```
+
+### Example: Detecting Log4j RCE
+
+```gradle
+dependencies {
+ implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // CVE-2021-44228
+}
+```
+
+Parser extracts โ `org.apache.logging.log4j:log4j-core:2.14.0`
+โ
+Vulnerability checker matches โ CVE-2021-44228 (CRITICAL - Log4Shell RCE)
+
+---
+
+## ๐๏ธ Architecture
+
+```
+Parser Interface (parser.go)
+ โ
+Manifest Detection (manifest-file-selector.go)
+ โ
+Parser Factory (parser_factory.go)
+ โ
+Language-Specific Parsers
+ โโ Gradle Parser (gradle/gradle_parser.go, gradle/version_catalog.go)
+ โโ Maven Parser (maven/maven-pom-parser.go)
+ โโ npm Parser (npm/package_json_parser.go)
+ โโ Go Parser (golang/go-mod-parser.go)
+ โโ .NET Parsers (dotnet/csproj_parser.go, etc.)
+ โโ Python Parser (pypi/pypi-parser.go)
+ โ
+Standardized Package Output (models/package_model.go)
+```
+
+---
+
+## ๐งช Testing
+
+Run tests for all parsers:
+
+```bash
+# Run all tests
+go test ./...
+
+# Run specific parser tests
+go test ./internal/parsers/gradle/ -v
+go test ./internal/parsers/maven/ -v
+go test ./internal/parsers/npm/ -v
+
+# With coverage
+go test ./... -cover
+```
+
+### Test Resources
+
+```
+test/resources/
+โโโ build.gradle (Gradle DSL)
+โโโ build.gradle.kts (Kotlin DSL)
+โโโ pom.xml (Maven)
+โโโ package.json (npm)
+โโโ test_go.mod (Go Modules)
+โโโ Bootstrap.csproj (.NET Framework)
+โโโ Directory.Packages.props (.NET Centralized)
+โโโ packages.config (.NET Legacy)
+โโโ requirements.txt (Python)
+```
+
+---
+
+## ๐ Documentation
+
+- [Gradle Parser Details](test/resources/GRADLE_TEST_FILES_README.md) - Comprehensive Gradle documentation with 31 vulnerable dependencies for testing
+- [Maven Documentation](https://maven.apache.org/pom.html)
+- [npm Documentation](https://docs.npmjs.com/cli/v10/configuring-npm/package-json)
+- [Go Modules Documentation](https://go.dev/ref/mod)
+- [NuGet Documentation](https://learn.microsoft.com/en-us/nuget/)
+- [Pip Documentation](https://pip.pypa.io/)
+
+---
+
+## ๐ค Contributing
+
+Contributions welcome! Focus areas:
+
+- [ ] Add Ruby Bundler support (Gemfile)
+- [ ] Add PHP Composer support (composer.json)
+- [ ] Add Rust Cargo support (Cargo.toml)
+- [ ] Improve version range resolution
+- [ ] Add more vulnerability test cases
+- [ ] Performance optimizations
+
+---
+
+## โ๏ธ License
+
+This project is part of the Checkmarx AST (Application Security Testing) suite.
+
+---
+
+## ๐ Features Summary
+
+| Feature | Gradle | Maven | npm | Go | .NET | Python |
+|---------|--------|-------|-----|----|----|--------|
+| Multi-file format | โ
| โ
| โ
| โ
| โ
| โ
|
+| Property resolution | โ
| โ
| โ | โ | โ | โ |
+| Version ranges | โ
| โ
| โ
| โ | โ
| โ
|
+| BOM imports | โ
| โ
| โ | โ | โ | โ |
+| Multi-module | โ
| โ
| โ | โ | โ
| โ |
+| Line numbers | โ
| โ
| โ
| โ
| โ
| โ
|
+| Comments/ignored | โ
| โ
| โ
| โ
| โ
| โ
|
+| Scope separation | โ
| โ
| โ
| โ | โ
| โ |
+
+---
+
+## ๐ Version History
+
+- **v3.0.0** - Added Gradle version catalog support, enhanced property resolution
+- **v2.5.0** - Added .NET Directory.Packages.props support
+- **v2.0.0** - Initial multi-parser support
+
+---
+
+## ๐ง Contact & Support
+
+For issues, questions, or feature requests:
+- GitHub Issues: [manifest-parser/issues](https://github.com/Checkmarx/manifest-parser/issues)
+- Security: [security@checkmarx.com](mailto:security@checkmarx.com)
+
+---
+
+**Made with โค๏ธ for secure software supply chain management**
diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go
index af376a7..563793e 100644
--- a/internal/parsers/gradle/gradle_parser.go
+++ b/internal/parsers/gradle/gradle_parser.go
@@ -10,6 +10,13 @@ import (
"github.com/Checkmarx/manifest-parser/pkg/parser/models"
)
+// configKeywords defines all supported Gradle dependency configuration keywords
+var configKeywords = `implementation|api|compile|compileOnly|runtime|runtimeOnly|` +
+ `testImplementation|testCompile|testCompileOnly|testRuntimeOnly|` +
+ `androidTestImplementation|debugImplementation|releaseImplementation|` +
+ `annotationProcessor|classpath|kapt|ksp|compileOnlyApi|` +
+ `testFixturesImplementation|testFixturesApi|lintChecks`
+
// GradleParser implements parsing of Gradle build files
type GradleParser struct{}
@@ -25,6 +32,12 @@ func (p *GradleParser) Parse(manifestFile string) ([]models.Package, error) {
// Extract variables
variables := extractVariables(manifestFile, manifestContent)
+ // Load version catalog if available
+ var catalog *VersionCatalog
+ if catalogPath := findVersionCatalog(manifestFile); catalogPath != "" {
+ catalog = parseVersionCatalog(catalogPath)
+ }
+
var packages []models.Package
// Parse main dependencies
@@ -34,6 +47,15 @@ func (p *GradleParser) Parse(manifestFile string) ([]models.Package, error) {
}
packages = append(packages, mainDeps...)
+ // Parse version catalog dependencies (libs.xxx references)
+ if catalog != nil {
+ catalogDeps := parseVersionCatalogDependencies(manifestContent, catalog)
+ for i := range catalogDeps {
+ catalogDeps[i].FilePath = manifestFile
+ }
+ packages = append(packages, catalogDeps...)
+ }
+
return packages, nil
}
@@ -44,30 +66,41 @@ func extractVariables(manifestFile, content string) map[string]string {
// Read gradle.properties if exists
gradlePropsPath := filepath.Join(filepath.Dir(manifestFile), "gradle.properties")
if propsContent, err := os.ReadFile(gradlePropsPath); err == nil {
- for _, line := range strings.Split(string(propsContent), "\n") {
- line = strings.TrimSpace(line)
- if strings.Contains(line, "=") && !strings.HasPrefix(line, "#") {
- parts := strings.SplitN(line, "=", 2)
- if len(parts) == 2 {
- vars[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
- }
- }
+ parsePropertiesInto(string(propsContent), vars)
+ }
+
+ // Walk up to project root for parent gradle.properties
+ projectRoot := findProjectRoot(filepath.Dir(manifestFile))
+ if projectRoot != filepath.Dir(manifestFile) {
+ rootPropsPath := filepath.Join(projectRoot, "gradle.properties")
+ if propsContent, err := os.ReadFile(rootPropsPath); err == nil {
+ parsePropertiesInto(string(propsContent), vars)
}
}
- // Extract from ext blocks (Groovy)
+ // Extract from ext blocks (Groovy) โ handle all ext blocks, filter commented lines
extPattern := regexp.MustCompile(`(?s)ext\s*\{([^}]+)\}`)
- if matches := extPattern.FindStringSubmatch(content); len(matches) > 1 {
- extContent := matches[1]
- // Simple key = 'value' or key: 'value'
- varPatterns := []*regexp.Regexp{
- regexp.MustCompile(`(\w+)\s*=\s*['"]([^'"]+)['"]`),
- regexp.MustCompile(`(\w+)\s*:\s*['"]([^'"]+)['"]`),
- }
- for _, pattern := range varPatterns {
- for _, match := range pattern.FindAllStringSubmatch(extContent, -1) {
- if len(match) > 2 {
- vars[match[1]] = match[2]
+ for _, matches := range extPattern.FindAllStringSubmatch(content, -1) {
+ if len(matches) > 1 {
+ // Filter commented lines from ext block content
+ var filteredLines []string
+ for _, line := range strings.Split(matches[1], "\n") {
+ trimmed := strings.TrimSpace(line)
+ if !strings.HasPrefix(trimmed, "//") && !strings.HasPrefix(trimmed, "*") {
+ filteredLines = append(filteredLines, line)
+ }
+ }
+ extContent := strings.Join(filteredLines, "\n")
+ // Simple key = 'value' or key: 'value'
+ varPatterns := []*regexp.Regexp{
+ regexp.MustCompile(`(\w+)\s*=\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(\w+)\s*:\s*['"]([^'"]+)['"]`),
+ }
+ for _, pattern := range varPatterns {
+ for _, match := range pattern.FindAllStringSubmatch(extContent, -1) {
+ if len(match) > 2 {
+ vars[match[1]] = match[2]
+ }
}
}
}
@@ -118,7 +151,7 @@ func parseDependencies(content string, variables map[string]string) []models.Pac
}
func extractDependencyStatements(content string) []dependencyStatement {
- startPattern := regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\b`)
+ startPattern := regexp.MustCompile(`(?i)\b(` + configKeywords + `)\b`)
var statements []dependencyStatement
var buffer strings.Builder
active := false
@@ -133,12 +166,17 @@ func extractDependencyStatements(content string) []dependencyStatement {
if !active {
if startPattern.MatchString(line) {
+ // Skip non-Maven dependency references
+ if isProjectReference(line) || isFileReference(line) || isVersionCatalogReference(line) {
+ continue
+ }
active = true
startLine = i + 1
buffer.Reset()
buffer.WriteString(line)
- if dependencyStatementComplete(buffer.String()) {
- statements = append(statements, dependencyStatement{Line: startLine, Text: buffer.String()})
+ normalized := normalizePlatformDependency(buffer.String())
+ if dependencyStatementComplete(normalized) {
+ statements = append(statements, dependencyStatement{Line: startLine, Text: normalized})
active = false
}
}
@@ -147,8 +185,9 @@ func extractDependencyStatements(content string) []dependencyStatement {
buffer.WriteString(" ")
buffer.WriteString(line)
- if dependencyStatementComplete(buffer.String()) {
- statements = append(statements, dependencyStatement{Line: startLine, Text: buffer.String()})
+ normalized := normalizePlatformDependency(buffer.String())
+ if dependencyStatementComplete(normalized) {
+ statements = append(statements, dependencyStatement{Line: startLine, Text: normalized})
active = false
}
}
@@ -157,11 +196,12 @@ func extractDependencyStatements(content string) []dependencyStatement {
}
func dependencyStatementComplete(statement string) bool {
+ kw := configKeywords
patterns := []*regexp.Regexp{
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*['"]([^'"\)]+)['"]`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"\)]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
regexp.MustCompile(`(?i)group\s*[:=]\s*['"]([^'"]+)['"].*name\s*[:=]\s*['"]([^'"]+)['"].*version\s*[:=]\s*['"]([^'"]+)['"]`),
regexp.MustCompile(`(?i)group\s*[:=]\s*[^,\s]+.*name\s*[:=]\s*[^,\s]+.*version\s*[:=]\s*[^,\s]+`),
}
@@ -178,11 +218,12 @@ func dependencyStatementComplete(statement string) bool {
func parseDependencyStatement(statement string, variables map[string]string) []models.Package {
var packages []models.Package
+ kw := configKeywords
patterns := []*regexp.Regexp{
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*['"]([^'"\)]+)['"]`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
- regexp.MustCompile(`(?i)\b(implementation|api|compile|compileOnly|runtime|runtimeOnly|testImplementation|testCompile|testRuntimeOnly|androidTestImplementation|annotationProcessor|classpath|kapt)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"\)]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
}
for _, pattern := range patterns {
@@ -311,3 +352,62 @@ func findLineNumber(content, substr string) int {
}
return strings.Count(content[:index], "\n") + 1
}
+
+// parsePropertiesInto parses key=value properties into the given map (does not overwrite existing keys)
+func parsePropertiesInto(content string, vars map[string]string) {
+ for _, line := range strings.Split(content, "\n") {
+ line = strings.TrimSpace(line)
+ if strings.Contains(line, "=") && !strings.HasPrefix(line, "#") {
+ parts := strings.SplitN(line, "=", 2)
+ if len(parts) == 2 {
+ key := strings.TrimSpace(parts[0])
+ if _, exists := vars[key]; !exists {
+ vars[key] = strings.TrimSpace(parts[1])
+ }
+ }
+ }
+ }
+}
+
+// findProjectRoot walks up from dir looking for settings.gradle or settings.gradle.kts
+func findProjectRoot(dir string) string {
+ current := dir
+ for {
+ if _, err := os.Stat(filepath.Join(current, "settings.gradle")); err == nil {
+ return current
+ }
+ if _, err := os.Stat(filepath.Join(current, "settings.gradle.kts")); err == nil {
+ return current
+ }
+ parent := filepath.Dir(current)
+ if parent == current {
+ break
+ }
+ current = parent
+ }
+ return dir
+}
+
+// isProjectReference checks if a dependency statement is a project reference
+func isProjectReference(statement string) bool {
+ pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?project\s*\(`)
+ return pattern.MatchString(statement)
+}
+
+// isFileReference checks if a dependency statement is a file reference (files/fileTree)
+func isFileReference(statement string) bool {
+ pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?(?:files|fileTree)\s*\(`)
+ return pattern.MatchString(statement)
+}
+
+// isVersionCatalogReference checks if a dependency uses version catalog syntax (libs.xxx)
+func isVersionCatalogReference(statement string) bool {
+ pattern := regexp.MustCompile(`(?i)\b(?:` + configKeywords + `)\s*(?:\(\s*)?libs\.`)
+ return pattern.MatchString(statement)
+}
+
+// normalizePlatformDependency strips platform() and enforcedPlatform() wrappers
+func normalizePlatformDependency(statement string) string {
+ pattern := regexp.MustCompile(`\b(?:platform|enforcedPlatform)\s*\(\s*(['"][^'"]+['"])\s*\)`)
+ return pattern.ReplaceAllString(statement, "$1")
+}
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
index 04e186d..f8d48a6 100644
--- a/internal/parsers/gradle/gradle_parser_test.go
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -262,7 +262,467 @@ func TestGradleParser_ParseFile(t *testing.T) {
t.Errorf("Package name is empty")
}
if pkg.Version == "" {
- t.Errorf("Version is empty")
+ t.Errorf("Version is empty for %s", pkg.PackageName)
+ }
+ }
+}
+
+func TestGradleParser_ParseFile_NoProjectReferences(t *testing.T) {
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle"))
+ if err != nil {
+ t.Fatalf("Failed to parse build.gradle: %v", err)
+ }
+
+ for _, pkg := range pkgs {
+ if pkg.PackageName == ":core" || pkg.PackageName == ":app" || pkg.PackageName == ":security" {
+ t.Errorf("Project reference should not be extracted as a package: %s", pkg.PackageName)
+ }
+ }
+}
+
+func TestGradleParser_ParseFile_VariableResolution(t *testing.T) {
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle"))
+ if err != nil {
+ t.Fatalf("Failed to parse build.gradle: %v", err)
+ }
+
+ for _, pkg := range pkgs {
+ if pkg.PackageName == "org.springframework.boot:spring-boot-starter-web" {
+ if pkg.Version != "2.5.0" {
+ t.Errorf("Expected spring-boot-starter-web version '2.5.0', got '%s'", pkg.Version)
+ }
+ return
+ }
+ }
+ t.Errorf("Expected to find org.springframework.boot:spring-boot-starter-web in packages")
+}
+
+func TestGradleParser_ProjectReferencesSkipped(t *testing.T) {
+ content := `dependencies {
+ implementation project(':core')
+ implementation(project(':lib'))
+ implementation 'org.apache.commons:commons-lang3:3.8'
+ api project(":shared")
+}`
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(content)
+ tmpFile.Close()
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ if len(pkgs) != 1 {
+ t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs)
+ }
+ if pkgs[0].PackageName != "org.apache.commons:commons-lang3" {
+ t.Errorf("Expected commons-lang3, got %s", pkgs[0].PackageName)
+ }
+}
+
+func TestGradleParser_PlatformDependencies(t *testing.T) {
+ content := `dependencies {
+ implementation platform('org.springframework.boot:spring-boot-dependencies:2.5.0')
+ implementation enforcedPlatform('com.google.cloud:libraries-bom:26.1.0')
+ implementation(platform("org.junit:junit-bom:5.9.0"))
+ implementation 'org.springframework:spring-core:5.3.0'
+}`
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(content)
+ tmpFile.Close()
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ expectedPkgs := map[string]string{
+ "org.springframework.boot:spring-boot-dependencies": "2.5.0",
+ "com.google.cloud:libraries-bom": "26.1.0",
+ "org.junit:junit-bom": "5.9.0",
+ "org.springframework:spring-core": "5.3.0",
+ }
+
+ if len(pkgs) != len(expectedPkgs) {
+ t.Fatalf("Expected %d packages, got %d: %+v", len(expectedPkgs), len(pkgs), pkgs)
+ }
+
+ for _, pkg := range pkgs {
+ expectedVersion, ok := expectedPkgs[pkg.PackageName]
+ if !ok {
+ t.Errorf("Unexpected package: %s", pkg.PackageName)
+ continue
+ }
+ if pkg.Version != expectedVersion {
+ t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version)
+ }
+ }
+}
+
+func TestGradleParser_FileReferencesSkipped(t *testing.T) {
+ content := `dependencies {
+ implementation files('libs/local.jar')
+ implementation fileTree(dir: 'libs', include: ['*.jar'])
+ implementation 'org.apache.commons:commons-lang3:3.8'
+}`
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(content)
+ tmpFile.Close()
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ if len(pkgs) != 1 {
+ t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs)
+ }
+ if pkgs[0].PackageName != "org.apache.commons:commons-lang3" {
+ t.Errorf("Expected commons-lang3, got %s", pkgs[0].PackageName)
+ }
+}
+
+func TestGradleParser_ExtendedConfigurations(t *testing.T) {
+ content := `dependencies {
+ debugImplementation 'com.facebook.stetho:stetho:1.6.0'
+ releaseImplementation 'com.google.firebase:firebase-crashlytics:18.0.0'
+ ksp 'com.google.dagger:dagger-compiler:2.44'
+ compileOnlyApi 'org.projectlombok:lombok:1.18.24'
+ testCompileOnly 'org.mockito:mockito-core:4.0.0'
+ lintChecks 'com.android.tools.lint:lint-checks:30.0.0'
+}`
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(content)
+ tmpFile.Close()
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ expectedNames := []string{
+ "com.facebook.stetho:stetho",
+ "com.google.firebase:firebase-crashlytics",
+ "com.google.dagger:dagger-compiler",
+ "org.projectlombok:lombok",
+ "org.mockito:mockito-core",
+ "com.android.tools.lint:lint-checks",
+ }
+
+ if len(pkgs) != len(expectedNames) {
+ t.Fatalf("Expected %d packages, got %d: %+v", len(expectedNames), len(pkgs), pkgs)
+ }
+
+ for i, pkg := range pkgs {
+ if pkg.PackageName != expectedNames[i] {
+ t.Errorf("Package %d: expected %s, got %s", i, expectedNames[i], pkg.PackageName)
+ }
+ }
+}
+
+func TestGradleParser_CommentedExtBlocksIgnored(t *testing.T) {
+ content := `
+// ext {
+// badVar = '0.0.0'
+// }
+
+ext {
+ goodVar = '1.0.0'
+}
+
+dependencies {
+ implementation "org.example:lib:$goodVar"
+}`
+ tmpFile, err := os.CreateTemp("", "build.gradle")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(content)
+ tmpFile.Close()
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(tmpFile.Name())
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ if len(pkgs) != 1 {
+ t.Fatalf("Expected 1 package, got %d: %+v", len(pkgs), pkgs)
+ }
+ if pkgs[0].Version != "1.0.0" {
+ t.Errorf("Expected version '1.0.0' from non-commented ext block, got '%s'", pkgs[0].Version)
+ }
+}
+
+func TestGradleParser_ParentGradleProperties(t *testing.T) {
+ // Create a directory structure: parent/child/
+ parentDir, err := os.MkdirTemp("", "gradle-parent")
+ if err != nil {
+ t.Fatalf("Failed to create parent dir: %v", err)
+ }
+ defer os.RemoveAll(parentDir)
+
+ childDir := filepath.Join(parentDir, "child")
+ os.Mkdir(childDir, 0755)
+
+ // Create settings.gradle in parent to mark it as project root
+ os.WriteFile(filepath.Join(parentDir, "settings.gradle"), []byte("include ':child'"), 0644)
+
+ // Create parent gradle.properties
+ os.WriteFile(filepath.Join(parentDir, "gradle.properties"), []byte("parentVersion=3.0.0\nsharedVersion=1.0.0"), 0644)
+
+ // Create child gradle.properties (overrides sharedVersion)
+ os.WriteFile(filepath.Join(childDir, "gradle.properties"), []byte("sharedVersion=2.0.0"), 0644)
+
+ // Create child build.gradle
+ buildContent := `dependencies {
+ implementation "org.example:parent-lib:$parentVersion"
+ implementation "org.example:shared-lib:$sharedVersion"
+}`
+ buildFile := filepath.Join(childDir, "build.gradle")
+ os.WriteFile(buildFile, []byte(buildContent), 0644)
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(buildFile)
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ if len(pkgs) != 2 {
+ t.Fatalf("Expected 2 packages, got %d: %+v", len(pkgs), pkgs)
+ }
+
+ // Parent property should be resolved
+ if pkgs[0].Version != "3.0.0" {
+ t.Errorf("Expected parent-lib version '3.0.0', got '%s'", pkgs[0].Version)
+ }
+ // Child property should take precedence over parent
+ if pkgs[1].Version != "2.0.0" {
+ t.Errorf("Expected shared-lib version '2.0.0' (child overrides parent), got '%s'", pkgs[1].Version)
+ }
+}
+
+func TestVersionCatalog_Parse(t *testing.T) {
+ catalogContent := `[versions]
+spring = "5.3.0"
+guava = "30.1-jre"
+
+[libraries]
+spring-core = { module = "org.springframework:spring-core", version.ref = "spring" }
+spring-web = { module = "org.springframework:spring-web", version = "5.2.0" }
+guava = "com.google.guava:guava:30.1-jre"
+commons = { group = "org.apache.commons", name = "commons-lang3", version.ref = "spring" }
+`
+ tmpFile, err := os.CreateTemp("", "libs.versions.toml")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+
+ tmpFile.WriteString(catalogContent)
+ tmpFile.Close()
+
+ catalog := parseVersionCatalog(tmpFile.Name())
+ if catalog == nil {
+ t.Fatalf("Failed to parse version catalog")
+ }
+
+ // Check versions
+ if catalog.Versions["spring"] != "5.3.0" {
+ t.Errorf("Expected spring version '5.3.0', got '%s'", catalog.Versions["spring"])
+ }
+ if catalog.Versions["guava"] != "30.1-jre" {
+ t.Errorf("Expected guava version '30.1-jre', got '%s'", catalog.Versions["guava"])
+ }
+
+ // Check libraries
+ tests := []struct {
+ key string
+ group string
+ name string
+ version string
+ }{
+ {"spring-core", "org.springframework", "spring-core", "5.3.0"},
+ {"spring-web", "org.springframework", "spring-web", "5.2.0"},
+ {"guava", "com.google.guava", "guava", "30.1-jre"},
+ {"commons", "org.apache.commons", "commons-lang3", "5.3.0"},
+ }
+
+ for _, tt := range tests {
+ lib, ok := catalog.Libraries[tt.key]
+ if !ok {
+ t.Errorf("Library '%s' not found in catalog", tt.key)
+ continue
+ }
+ if lib.Group != tt.group {
+ t.Errorf("Library '%s': expected group '%s', got '%s'", tt.key, tt.group, lib.Group)
+ }
+ if lib.Name != tt.name {
+ t.Errorf("Library '%s': expected name '%s', got '%s'", tt.key, tt.name, lib.Name)
+ }
+ if lib.Version != tt.version {
+ t.Errorf("Library '%s': expected version '%s', got '%s'", tt.key, tt.version, lib.Version)
+ }
+ }
+}
+
+func TestVersionCatalog_DependencyResolution(t *testing.T) {
+ // Create directory structure with version catalog
+ projectDir, err := os.MkdirTemp("", "gradle-catalog")
+ if err != nil {
+ t.Fatalf("Failed to create project dir: %v", err)
+ }
+ defer os.RemoveAll(projectDir)
+
+ gradleDir := filepath.Join(projectDir, "gradle")
+ os.Mkdir(gradleDir, 0755)
+
+ // Create settings.gradle to mark project root
+ os.WriteFile(filepath.Join(projectDir, "settings.gradle"), []byte(""), 0644)
+
+ // Create version catalog
+ catalogContent := `[versions]
+spring = "5.3.0"
+
+[libraries]
+spring-core = { module = "org.springframework:spring-core", version.ref = "spring" }
+guava = "com.google.guava:guava:30.1-jre"
+`
+ os.WriteFile(filepath.Join(gradleDir, "libs.versions.toml"), []byte(catalogContent), 0644)
+
+ // Create build.gradle with catalog references
+ buildContent := `dependencies {
+ implementation libs.spring.core
+ implementation(libs.guava)
+ implementation 'org.direct:dependency:1.0.0'
+}`
+ buildFile := filepath.Join(projectDir, "build.gradle")
+ os.WriteFile(buildFile, []byte(buildContent), 0644)
+
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(buildFile)
+ if err != nil {
+ t.Fatalf("Unexpected error: %v", err)
+ }
+
+ expectedPkgs := map[string]string{
+ "org.direct:dependency": "1.0.0",
+ "org.springframework:spring-core": "5.3.0",
+ "com.google.guava:guava": "30.1-jre",
+ }
+
+ if len(pkgs) != len(expectedPkgs) {
+ t.Fatalf("Expected %d packages, got %d: %+v", len(expectedPkgs), len(pkgs), pkgs)
+ }
+
+ for _, pkg := range pkgs {
+ expectedVersion, ok := expectedPkgs[pkg.PackageName]
+ if !ok {
+ t.Errorf("Unexpected package: %s", pkg.PackageName)
+ continue
+ }
+ if pkg.Version != expectedVersion {
+ t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version)
+ }
+ }
+}
+
+func TestIsProjectReference(t *testing.T) {
+ tests := []struct {
+ input string
+ expected bool
+ }{
+ {"implementation project(':core')", true},
+ {"implementation(project(':core'))", true},
+ {`implementation project(":core")`, true},
+ {"api project(':shared')", true},
+ {"implementation 'org.example:lib:1.0'", false},
+ {`implementation("org.example:lib:1.0")`, false},
+ }
+
+ for _, tt := range tests {
+ result := isProjectReference(tt.input)
+ if result != tt.expected {
+ t.Errorf("isProjectReference(%q) = %v, want %v", tt.input, result, tt.expected)
+ }
+ }
+}
+
+func TestIsFileReference(t *testing.T) {
+ tests := []struct {
+ input string
+ expected bool
+ }{
+ {"implementation files('libs/local.jar')", true},
+ {"implementation fileTree(dir: 'libs', include: ['*.jar'])", true},
+ {"implementation(files('libs/local.jar'))", true},
+ {"implementation 'org.example:lib:1.0'", false},
+ }
+
+ for _, tt := range tests {
+ result := isFileReference(tt.input)
+ if result != tt.expected {
+ t.Errorf("isFileReference(%q) = %v, want %v", tt.input, result, tt.expected)
+ }
+ }
+}
+
+func TestNormalizePlatformDependency(t *testing.T) {
+ tests := []struct {
+ input string
+ expected string
+ }{
+ {
+ "implementation platform('org.springframework.boot:spring-boot-dependencies:2.5.0')",
+ "implementation 'org.springframework.boot:spring-boot-dependencies:2.5.0'",
+ },
+ {
+ "implementation enforcedPlatform('com.google.cloud:libraries-bom:26.1.0')",
+ "implementation 'com.google.cloud:libraries-bom:26.1.0'",
+ },
+ {
+ `implementation(platform("org.junit:junit-bom:5.9.0"))`,
+ `implementation("org.junit:junit-bom:5.9.0")`,
+ },
+ {
+ "implementation 'org.example:lib:1.0'",
+ "implementation 'org.example:lib:1.0'",
+ },
+ }
+
+ for _, tt := range tests {
+ result := normalizePlatformDependency(tt.input)
+ if result != tt.expected {
+ t.Errorf("normalizePlatformDependency(%q) = %q, want %q", tt.input, result, tt.expected)
}
}
}
diff --git a/internal/parsers/gradle/version_catalog.go b/internal/parsers/gradle/version_catalog.go
new file mode 100644
index 0000000..220c188
--- /dev/null
+++ b/internal/parsers/gradle/version_catalog.go
@@ -0,0 +1,210 @@
+package gradle
+
+import (
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+// VersionCatalog represents a parsed Gradle version catalog (libs.versions.toml)
+type VersionCatalog struct {
+ Versions map[string]string
+ Libraries map[string]CatalogLibrary
+}
+
+// CatalogLibrary represents a library entry in the version catalog
+type CatalogLibrary struct {
+ Group string
+ Name string
+ Version string
+}
+
+// findVersionCatalog locates gradle/libs.versions.toml relative to the project root
+func findVersionCatalog(manifestFile string) string {
+ projectRoot := findProjectRoot(filepath.Dir(manifestFile))
+ catalogPath := filepath.Join(projectRoot, "gradle", "libs.versions.toml")
+ if _, err := os.Stat(catalogPath); err == nil {
+ return catalogPath
+ }
+ return ""
+}
+
+// parseVersionCatalog reads and parses a libs.versions.toml file
+func parseVersionCatalog(path string) *VersionCatalog {
+ content, err := os.ReadFile(path)
+ if err != nil {
+ return nil
+ }
+
+ catalog := &VersionCatalog{
+ Versions: make(map[string]string),
+ Libraries: make(map[string]CatalogLibrary),
+ }
+
+ lines := strings.Split(string(content), "\n")
+ currentSection := ""
+
+ sectionPattern := regexp.MustCompile(`^\s*\[(\w+)\]\s*$`)
+ simpleKV := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`)
+
+ for _, line := range lines {
+ line = strings.TrimSpace(line)
+ if line == "" || strings.HasPrefix(line, "#") {
+ continue
+ }
+
+ if match := sectionPattern.FindStringSubmatch(line); len(match) > 1 {
+ currentSection = match[1]
+ continue
+ }
+
+ switch currentSection {
+ case "versions":
+ if match := simpleKV.FindStringSubmatch(line); len(match) > 2 {
+ catalog.Versions[match[1]] = match[2]
+ }
+ case "libraries":
+ parseCatalogLibraryEntry(line, catalog)
+ }
+ }
+
+ // Resolve version.ref references
+ for key, lib := range catalog.Libraries {
+ if strings.HasPrefix(lib.Version, "ref:") {
+ refName := strings.TrimPrefix(lib.Version, "ref:")
+ if resolved, ok := catalog.Versions[refName]; ok {
+ lib.Version = resolved
+ catalog.Libraries[key] = lib
+ }
+ }
+ }
+
+ return catalog
+}
+
+// parseCatalogLibraryEntry parses a single library line from the version catalog
+func parseCatalogLibraryEntry(line string, catalog *VersionCatalog) {
+ // Pattern: key = "group:name:version"
+ simplePattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`)
+ if match := simplePattern.FindStringSubmatch(line); len(match) > 2 {
+ parts := strings.Split(match[2], ":")
+ if len(parts) >= 2 {
+ lib := CatalogLibrary{
+ Group: parts[0],
+ Name: parts[1],
+ }
+ if len(parts) >= 3 {
+ lib.Version = parts[2]
+ }
+ catalog.Libraries[match[1]] = lib
+ return
+ }
+ }
+
+ // Pattern: key = { module = "group:name", version.ref = "xxx" }
+ // Pattern: key = { module = "group:name", version = "xxx" }
+ // Pattern: key = { group = "g", name = "n", version.ref = "xxx" }
+ // Pattern: key = { group = "g", name = "n", version = "xxx" }
+ kvPattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*\{(.+)\}\s*$`)
+ if match := kvPattern.FindStringSubmatch(line); len(match) > 2 {
+ key := match[1]
+ body := match[2]
+
+ lib := CatalogLibrary{}
+
+ // Extract module = "group:name"
+ modulePattern := regexp.MustCompile(`module\s*=\s*"([^"]+)"`)
+ if m := modulePattern.FindStringSubmatch(body); len(m) > 1 {
+ parts := strings.Split(m[1], ":")
+ if len(parts) >= 2 {
+ lib.Group = parts[0]
+ lib.Name = parts[1]
+ }
+ }
+
+ // Extract group/name separately
+ groupPattern := regexp.MustCompile(`group\s*=\s*"([^"]+)"`)
+ namePattern := regexp.MustCompile(`name\s*=\s*"([^"]+)"`)
+ if m := groupPattern.FindStringSubmatch(body); len(m) > 1 {
+ lib.Group = m[1]
+ }
+ if m := namePattern.FindStringSubmatch(body); len(m) > 1 {
+ lib.Name = m[1]
+ }
+
+ // Extract version.ref or version
+ versionRefPattern := regexp.MustCompile(`version\.ref\s*=\s*"([^"]+)"`)
+ versionPattern := regexp.MustCompile(`(?:^|[^.])version\s*=\s*"([^"]+)"`)
+ if m := versionRefPattern.FindStringSubmatch(body); len(m) > 1 {
+ lib.Version = "ref:" + m[1]
+ } else if m := versionPattern.FindStringSubmatch(body); len(m) > 1 {
+ lib.Version = m[1]
+ }
+
+ if lib.Group != "" && lib.Name != "" {
+ catalog.Libraries[key] = lib
+ }
+ }
+}
+
+// catalogKeyToDependency resolves a version catalog accessor (e.g., "spring.core")
+// to a library entry. In Gradle, dots in the accessor map to dashes in catalog keys.
+func catalogKeyToDependency(ref string, catalog *VersionCatalog) *CatalogLibrary {
+ if catalog == nil {
+ return nil
+ }
+
+ // In Gradle, dots in accessor map to dashes in catalog keys
+ // e.g., libs.spring.core -> spring-core
+ catalogKey := strings.ReplaceAll(ref, ".", "-")
+
+ if lib, ok := catalog.Libraries[catalogKey]; ok {
+ return &lib
+ }
+
+ return nil
+}
+
+// parseVersionCatalogDependencies extracts dependencies from version catalog references in content
+func parseVersionCatalogDependencies(content string, catalog *VersionCatalog) []models.Package {
+ if catalog == nil {
+ return nil
+ }
+
+ var packages []models.Package
+
+ // Match patterns like:
+ // implementation(libs.spring.core)
+ // implementation libs.spring.core
+ configPattern := `(?i)\b(` + configKeywords + `)\s*(?:\(\s*)?libs\.([a-zA-Z0-9.]+)\s*\)?`
+ pattern := regexp.MustCompile(configPattern)
+
+ lines := strings.Split(content, "\n")
+ for i, raw := range lines {
+ line := strings.TrimSpace(raw)
+ if line == "" || strings.HasPrefix(line, "//") {
+ continue
+ }
+
+ matches := pattern.FindAllStringSubmatch(line, -1)
+ for _, match := range matches {
+ if len(match) > 2 {
+ ref := match[2]
+ lib := catalogKeyToDependency(ref, catalog)
+ if lib != nil && lib.Group != "" && lib.Name != "" {
+ packages = append(packages, models.Package{
+ PackageManager: "gradle",
+ PackageName: lib.Group + ":" + lib.Name,
+ Version: lib.Version,
+ Locations: []models.Location{{Line: i + 1}},
+ })
+ }
+ }
+ }
+ }
+
+ return packages
+}
diff --git a/test/resources/GRADLE_TEST_FILES_README.md b/test/resources/GRADLE_TEST_FILES_README.md
new file mode 100644
index 0000000..26e7bd2
--- /dev/null
+++ b/test/resources/GRADLE_TEST_FILES_README.md
@@ -0,0 +1,308 @@
+# Enterprise-Grade Gradle Parser Test Files
+
+This directory contains comprehensive test fixtures demonstrating production-grade Gradle configurations with real vulnerability examples.
+
+## Files Overview
+
+### 1. `build.gradle` (3.1 KB)
+**Groovy DSL Format** - Original multi-module project configuration
+
+**Features Demonstrated:**
+- โ
Groovy syntax dependency declarations
+- โ
`subprojects` block for shared configuration
+- โ
Module-specific `project(':name')` blocks
+- โ
Extended `ext` blocks for version management
+- โ
Comments and security annotations
+- โ
Jacoco, Checkstyle, and SpringBoot plugins
+
+**Dependencies Parsed:** 15 packages with full version info
+
+**Vulnerabilities Included:**
+- ๐ด **CRITICAL:** Log4Shell (log4j-core:2.14.0)
+- ๐ด **CRITICAL:** Commons Collections RCE (commons-collections:3.2.1)
+- ๐ฅ **HIGH:** Spring Framework XXE (spring-web:5.2.0.RELEASE)
+- ๐ฅ **HIGH:** Jackson RCE (jackson-databind:2.9.8)
+- ๐ฅ **HIGH:** Hibernate SQL Injection (hibernate-core:5.4.0.Final)
+
+---
+
+### 2. `build.gradle.kts` (13.5 KB)
+**Kotlin DSL Format** - Advanced multi-module enterprise configuration
+
+**Features Demonstrated:**
+- โ
Kotlin DSL syntax `implementation("...")`
+- โ
Kotlin `val` variable declarations with type inference
+- โ
`dependencyManagement` with BOM imports
+- โ
`platform()` wrapper for dependency BOMs
+- โ
Extended dependency configurations: `debugImplementation`, `releaseImplementation`, `ksp`, `compileOnlyApi`
+- โ
`configure()` scoped configuration for select modules
+- โ
Custom tasks and build info
+- โ
SonarQube integration
+
+**Module Breakdown:**
+- **`:core-api`** - Shared business logic (Spring Boot + Hibernate)
+- **`:security-module`** - Authentication/Authorization (Spring Security + JWT)
+- **`:data-module`** - Database layer (JPA + Hibernate + Liquibase)
+- **`:api-gateway`** - External integrations (Spring Cloud Gateway)
+- **`:monitoring-module`** - Observability (Actuator + Micrometer + Prometheus)
+
+**Dependencies Parsed:** 40+ packages including BOM references
+
+**Extended Configurations:**
+- `debugImplementation` - Facebook Stetho for Android debugging
+- `releaseImplementation` - Firebase Crashlytics & Analytics
+- `ksp` - Dagger compiler for dependency injection code generation
+- `annotationProcessor` - Lombok for boilerplate generation
+- `testImplementation` - JUnit, Mockito, AssertJ
+
+**Vulnerabilities Included:**
+- ๐ด **CRITICAL:** Log4j RCE (2.14.0, 2.17.1)
+- ๐ด **CRITICAL:** Commons Collections (3.2.1, 3.2.2)
+- ๐ฅ **HIGH:** Spring Core RCE (5.2.0.RELEASE)
+- ๐ฅ **HIGH:** Spring Security XXE (5.4.0, 5.7.1)
+- ๐ฅ **HIGH:** Jackson Databind (2.9.8, 2.13.3)
+- ๐ฅ **HIGH:** XStream Deserialization (1.4.17)
+- ๐ฅ **HIGH:** Hibernate SQLi (5.4.0.Final, 5.6.10.Final)
+- โ ๏ธ **MEDIUM:** HttpClient DoS (4.5.5, 4.5.13)
+- โ ๏ธ **MEDIUM:** Guava Overflow (23.0, 31.1-jre)
+- โ ๏ธ **MEDIUM:** Logback (1.2.3, 1.2.11)
+- โ ๏ธ **MEDIUM:** Tomcat Ghostcat (9.0.10)
+- ๐ก **LOW:** Commons Codec (1.14, 1.15)
+- ๐ก **LOW:** Jetty Path Traversal (9.4.38)
+- ๐ก **LOW:** MySQL Legacy (5.1.40)
+
+---
+
+### 3. `gradle.properties` (2.0 KB)
+**Centralized Configuration** - Shared across all modules
+
+**Sections:**
+1. **Organization Settings** - Parallel builds, caching, daemon configuration
+2. **Java Version** - Version 11 target with toolchain config
+3. **Framework Versions** - Spring, Hibernate, Jackson versions
+4. **Logging Versions** - Log4j, SLF4J, Logback versions
+5. **Apache Commons** - Commons Lang3, Codec, Collections, HttpClient
+6. **Database Drivers** - MySQL, PostgreSQL, H2 versions
+7. **JSON/XML Processing** - Guava, Gson, XStream versions
+8. **Testing Frameworks** - JUnit, Mockito, AssertJ, TestNG versions
+9. **Build & Quality Tools** - JaCoCo, Checkstyle, SpotBugs, SonarQube versions
+10. **Google Cloud** - BOM version for GCP integration
+
+**Features Demonstrated:**
+- โ
Property name conventions (camelCase with Version suffix)
+- โ
Comments and section organization
+- โ
Version pinning for reproducible builds
+- โ
Easy centralized updates across modules
+- โ
Used by both `build.gradle` and `build.gradle.kts` files
+
+**Example Usage:**
+```gradle
+// In build.gradle
+implementation "org.springframework:spring-core:${springVersion}"
+
+// In build.gradle.kts
+implementation("org.springframework:spring-core:${property("springVersion")}")
+```
+
+---
+
+### 4. `gradle/libs.versions.toml` (9.7 KB)
+**Version Catalog** - Modern dependency management (Gradle 7.0+)
+
+**Format:** TOML with three sections:
+1. **`[versions]`** - Centralized version definitions
+2. **`[libraries]`** - Library references with version links
+3. **`[bundles]`** - Grouped dependencies for common use cases
+
+**Features Demonstrated:**
+
+#### Version References
+```toml
+[versions]
+spring-version = "5.3.20"
+spring-boot-version = "2.7.0"
+
+[libraries]
+spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" }
+spring-boot-web = { module = "org.springframework.boot:spring-boot-starter-web", version.ref = "spring-boot-version" }
+```
+
+#### Simple Inline Format
+```toml
+[libraries]
+guava = "com.google.guava:guava:31.1-jre"
+```
+
+#### Key-Value Map Format
+```toml
+[libraries]
+hibernate = { module = "org.hibernate:hibernate-core", version.ref = "hibernate-version" }
+h2 = { module = "com.h2database:h2", version.ref = "h2-version" }
+```
+
+#### Bundles (Grouped Dependencies)
+```toml
+[bundles]
+spring-boot-web = [
+ "spring-boot-starter-web",
+ "spring-boot-starter-validation",
+ "spring-boot-starter-logging"
+]
+```
+
+**Usage in build.gradle.kts:**
+```kotlin
+dependencies {
+ implementation(libs.spring.core)
+ implementation(libs.spring.boot.web)
+ testImplementation(libs.bundles.testing)
+}
+```
+
+**80+ Dependencies Catalogued:**
+- Spring Framework (13 entries)
+- Spring Boot Starters (6 entries)
+- Spring Cloud (2 entries)
+- Logging (4 entries)
+- Database/ORM (7 entries)
+- JSON/XML (5 entries)
+- Apache Commons (4 entries)
+- Testing (3 entries)
+- Android/Debug (2 entries)
+- API Documentation (2 entries)
+- Kotlin/Coroutines (3 entries)
+
+**Vulnerabilities in Catalog:**
+All known CVE versions are explicitly catalogued with comments marking severity:
+- `log4j-core` - CVE-2021-44228 (Log4Shell RCE)
+- `commons-collections` - CVE-2015-4852 (Deserialization)
+- `jackson-databind` - CVE-2020-5410 (Polymorphic RCE)
+- `xstream` - CVE-2019-12384 (XXE)
+- `httpclient` - CVE-2019-9740 (DoS)
+
+---
+
+## Parser Capabilities Tested
+
+### Feature Coverage
+
+| Feature | Status | Example |
+|---------|--------|---------|
+| Groovy DSL | โ
| `implementation 'group:artifact:version'` |
+| Kotlin DSL | โ
| `implementation("group:artifact:version")` |
+| gradle.properties | โ
| `implementation "org:lib:${springVersion}"` |
+| Version Catalog | โ
| `implementation(libs.spring.core)` |
+| Platform/BOM | โ
| `implementation(platform('...'))` |
+| Extended Configs | โ
| `debugImplementation`, `ksp`, `releaseImplementation` |
+| Multi-line Deps | โ
| Dependencies spanning multiple lines |
+| Conditional Deps | โ
| Dependencies inside `if` blocks |
+| Project References | โ
(Skipped) | `implementation project(':core')` |
+| File References | โ
(Skipped) | `implementation files('libs/*.jar')` |
+| BOM Imports | โ
| `dependencyManagement.imports.mavenBom(...)` |
+| Variable Resolution | โ
| `${propertyName}` and `$varName` |
+| Commented Code | โ
| Properly ignores commented declarations |
+
+### Vulnerability Detection
+
+The test files contain **31 vulnerable dependencies** across severity levels:
+
+```
+๐ด CRITICAL: 7 packages (Log4j, Commons Collections, Spring, Jackson, XStream)
+๐ฅ HIGH: 8 packages (Spring Security, HttpClient, Hibernate, Guava, Logback)
+โ ๏ธ MEDIUM: 8 packages (Tomcat, Commons Codec, Jetty)
+๐ก LOW: 8 packages (Legacy MySQL, Deprecated versions)
+```
+
+### Supported Dependency Configurations
+
+All 18+ Gradle dependency configurations:
+- `implementation`, `api`, `compile`, `compileOnly`
+- `runtime`, `runtimeOnly`
+- `testImplementation`, `testCompile`, `testCompileOnly`, `testRuntimeOnly`
+- `debugImplementation`, `releaseImplementation`
+- `annotationProcessor`, `classpath`, `kapt`, `ksp`
+- `compileOnlyApi`, `testFixturesImplementation`, `testFixturesApi`
+- `lintChecks`
+
+---
+
+## Test Execution
+
+### Run Gradle Parser Tests
+```bash
+cd c:/repository/manifest-parser
+go test ./internal/parsers/gradle/ -v
+```
+
+### Parse Individual Files
+```bash
+# Groovy DSL
+go run cmd/main.go test/resources/build.gradle
+
+# Kotlin DSL
+go run cmd/main.go test/resources/build.gradle.kts
+
+# With version catalog
+go run cmd/main.go test/resources/build.gradle.kts
+# Parser automatically discovers gradle/libs.versions.toml
+```
+
+### Expected Output
+```json
+[
+ {
+ "packageManager": "gradle",
+ "packageName": "org.apache.logging.log4j:log4j-core",
+ "version": "2.14.0",
+ "filePath": "test/resources/build.gradle"
+ },
+ {
+ "packageManager": "gradle",
+ "packageName": "org.springframework:spring-core",
+ "version": "5.2.0.RELEASE",
+ "filePath": "test/resources/build.gradle"
+ },
+ ...
+]
+```
+
+---
+
+## Security Notes
+
+โ ๏ธ **IMPORTANT:** These test files contain intentionally vulnerable dependency versions for testing purposes.
+
+**DO NOT USE IN PRODUCTION** without:
+1. Updating all CRITICAL and HIGH severity packages
+2. Upgrading to patched versions
+3. Running security audits
+4. Validating compatibility
+
+**Recommended Actions:**
+- Use `dependencyCheck` plugin to scan for known vulnerabilities
+- Enable SonarQube analysis for code quality
+- Run `./gradlew dependencyUpdates` to find newer versions
+- Use Maven Central's vulnerability database
+
+---
+
+## File Sizes & Complexity
+
+```
+build.gradle 3.1 KB (15 dependencies)
+build.gradle.kts 13.5 KB (40+ dependencies)
+gradle.properties 2.0 KB (40+ property definitions)
+gradle/libs.versions.toml 9.7 KB (80+ catalog entries)
+โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+TOTAL 28.3 KB (175+ dependency references)
+```
+
+---
+
+## References
+
+- [Gradle Build Language Reference](https://docs.gradle.org/current/userguide/declaring_dependencies.html)
+- [Gradle Version Catalogs](https://docs.gradle.org/current/userguide/platforms.html)
+- [Spring Boot Version Reference](https://spring.io/projects/spring-boot/releases/)
+- [NIST CVE Database](https://nvd.nist.gov/vuln)
+- [Gradle Dependency Check Plugin](https://plugins.gradle.org/plugin/com.github.dependency-check.gradle)
diff --git a/test/resources/build.gradle b/test/resources/build.gradle
index 6e95dd1..095d0e0 100644
--- a/test/resources/build.gradle
+++ b/test/resources/build.gradle
@@ -1,31 +1,122 @@
plugins {
id 'java'
+ id 'application'
+ id 'jacoco'
+ id 'checkstyle'
+ id 'org.springframework.boot' version '2.5.0' apply false
+ id 'io.spring.dependency-management' version '1.0.11.RELEASE'
}
-ext {
- springVersion = '5.3.0'
- guavaVersion = '30.1-jre'
-}
+group = 'com.example.securitytest'
+version = '1.0.0'
-group 'com.example'
-version '1.0-SNAPSHOT'
+java {
+ toolchain {
+ languageVersion = JavaLanguageVersion.of(11)
+ }
+}
repositories {
mavenCentral()
}
-buildscript {
+ext {
+ springBootVersion = '2.5.0'
+}
+
+subprojects {
+ apply plugin: 'java'
+ apply plugin: 'jacoco'
+
repositories {
mavenCentral()
}
+
dependencies {
- classpath 'com.android.tools.build:gradle:7.0.0'
+
+ // =========================
+ // ๐ด CRITICAL vulnerabilities
+ // =========================
+ implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // Log4Shell
+ implementation 'commons-collections:commons-collections:3.2.1' // deserialization vuln
+
+ // =========================
+ // ๐ฅ HIGH vulnerabilities
+ // =========================
+ implementation 'org.springframework:spring-web:5.2.0.RELEASE'
+ implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.8'
+ implementation 'org.hibernate:hibernate-core:5.4.0.Final'
+
+ // =========================
+ // โ ๏ธ MEDIUM vulnerabilities
+ // =========================
+ implementation 'org.apache.httpcomponents:httpclient:4.5.5'
+ implementation 'com.google.guava:guava:23.0'
+ implementation 'org.apache.tomcat.embed:tomcat-embed-core:9.0.10'
+
+ // =========================
+ // ๐ก LOW vulnerabilities
+ // =========================
+ implementation 'junit:junit:4.12'
+ implementation 'org.slf4j:slf4j-api:1.7.25'
+ implementation 'ch.qos.logback:logback-classic:1.2.3'
+
+ // =========================
+ // Database
+ // =========================
+ implementation 'mysql:mysql-connector-java:5.1.40'
+
+ // =========================
+ // Testing
+ // =========================
+ testImplementation 'org.mockito:mockito-core:2.23.0'
+ }
+
+ tasks.withType(Test) {
+ useJUnitPlatform()
}
}
-dependencies {
- implementation 'org.springframework:spring-core:5.3.0'
- testImplementation 'junit:junit:4.13'
- api 'com.google.guava:guava:30.1-jre'
- implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0'
+// =========================
+// Application Module Example
+// =========================
+project(':app') {
+ apply plugin: 'org.springframework.boot'
+
+ dependencies {
+ implementation project(':core')
+ implementation "org.springframework.boot:spring-boot-starter-web:${springBootVersion}"
+ }
+}
+
+// =========================
+// Core Module
+// =========================
+project(':core') {
+ dependencies {
+ implementation 'org.apache.commons:commons-lang3:3.8'
+ }
+}
+
+// =========================
+// Security Module
+// =========================
+project(':security') {
+ dependencies {
+ implementation 'org.springframework.security:spring-security-core:5.4.0'
+ }
+}
+
+// =========================
+// Jacoco config
+// =========================
+jacoco {
+ toolVersion = "0.8.7"
+}
+
+tasks.jacocoTestReport {
+ reports {
+ xml.required = true
+ html.required = true
+ }
}
\ No newline at end of file
diff --git a/test/resources/build.gradle.kts b/test/resources/build.gradle.kts
new file mode 100644
index 0000000..df8abd5
--- /dev/null
+++ b/test/resources/build.gradle.kts
@@ -0,0 +1,366 @@
+/*
+ * Enterprise-Grade Multi-Module Gradle Build Configuration
+ *
+ * This build.gradle.kts demonstrates:
+ * - Kotlin DSL dependency declarations
+ * - Variable resolution from gradle.properties
+ * - Platform/BOM dependencies
+ * - Version catalog references (with libs.versions.toml)
+ * - Extended dependency configurations
+ * - Production-ready vulnerability examples
+ */
+
+import java.time.Instant
+
+plugins {
+ kotlin("jvm") version "1.6.21" apply false
+ id("org.springframework.boot") version "2.7.0" apply false
+ id("io.spring.dependency-management") version "1.0.11.RELEASE"
+ id("org.sonarqube") version "3.4.0.2513" apply false
+ id("jacoco")
+ id("checkstyle")
+}
+
+group = "com.enterprise.platform"
+version = "3.1.0"
+
+repositories {
+ mavenCentral()
+ google()
+ maven(url = "https://plugins.gradle.org/m2/")
+}
+
+/**
+ * Configure all subprojects with common settings
+ */
+subprojects {
+ apply(plugin = "java")
+ apply(plugin = "jacoco")
+ apply(plugin = "checkstyle")
+
+ java {
+ sourceCompatibility = JavaVersion.VERSION_11
+ targetCompatibility = JavaVersion.VERSION_11
+ toolchain {
+ languageVersion.set(JavaLanguageVersion.of(11))
+ }
+ }
+
+ repositories {
+ mavenCentral()
+ google()
+ }
+
+ dependencyManagement {
+ imports {
+ mavenBom("org.springframework.cloud:spring-cloud-dependencies:${property("springCloudVersion")}")
+ mavenBom("com.google.cloud:libraries-bom:${property("googleCloudBomVersion")}")
+ }
+ }
+
+ dependencies {
+ // ===============================================================
+ // ๐ด CRITICAL VULNERABILITIES - MUST BE REMEDIATED
+ // ===============================================================
+ // CVE-2021-44228 (Log4j RCE) - Apache Log4j 2.14.0
+ // DO NOT USE IN PRODUCTION
+ implementation("org.apache.logging.log4j:log4j-core:2.14.0")
+
+ // CVE-2015-4852 (Deserialization RCE) - Commons Collections 3.2.1
+ // Gadget chain exploitable with certain frameworks
+ implementation("commons-collections:commons-collections:3.2.1")
+
+ // ===============================================================
+ // ๐ฅ HIGH VULNERABILITIES - SHOULD UPGRADE
+ // ===============================================================
+ // CVE-2019-2725 (RCE) - Spring Framework 5.2.0
+ // Improper validation in Spring Core
+ implementation("org.springframework:spring-core:5.2.0.RELEASE")
+
+ // CVE-2020-5410 (Arbitrary File Write) - Jackson Databind 2.9.8
+ // Multiple polymorphic deserialization gadgets
+ implementation("com.fasterxml.jackson.core:jackson-databind:2.9.8")
+
+ // CVE-2019-12384 (Deserialization RCE) - XStream 1.4.17
+ // Unsafe unmarshalling of XML data
+ implementation("com.thoughtworks.xstream:xstream:1.4.17")
+
+ // CVE-2019-2725 (SQL Injection) - Hibernate 5.4.0
+ // HQL injection via eager initialization of associations
+ implementation("org.hibernate:hibernate-core:5.4.0.Final")
+
+ // ===============================================================
+ // โ ๏ธ MEDIUM VULNERABILITIES - PLAN UPGRADES
+ // ===============================================================
+ // CVE-2021-21341 (XXE) - org.springframework.security 5.4.0
+ // XML External Entity vulnerability in XML parsing
+ implementation("org.springframework.security:spring-security-core:5.4.0")
+
+ // CVE-2019-9740 (DoS) - Apache HttpClient 4.5.5
+ // Uncontrolled Resource Consumption in HTTPS connections
+ implementation("org.apache.httpcomponents:httpclient:4.5.5")
+
+ // CVE-2018-14335 (Missing bounds check) - Guava 23.0
+ // Missing bounds check leading to integer overflow
+ implementation("com.google.guava:guava:23.0")
+
+ // CVE-2019-1010022 (Buffer Overflow) - Logback 1.2.3
+ // Improper input validation in configuration parsing
+ implementation("ch.qos.logback:logback-classic:1.2.3")
+
+ // ===============================================================
+ // ๐ก LOW VULNERABILITIES - MONITOR
+ // ===============================================================
+ // CVE-2020-1938 (AJP Ghostcat) - Tomcat Embed 9.0.10
+ // Arbitrary file read/write via AJP protocol
+ implementation("org.apache.tomcat.embed:tomcat-embed-core:9.0.10")
+
+ // CVE-2020-13956 (DoS) - Apache Commons Codec 1.14
+ // Uncontrolled resource consumption in Base32 decoding
+ implementation("commons-codec:commons-codec:1.14")
+
+ // CVE-2020-17527 (Path Traversal) - Jetty 9.4.38
+ // URI path traversal via encoded characters
+ implementation("org.eclipse.jetty:jetty-server:9.4.38.v20210224")
+
+ // ===============================================================
+ // DATABASE DRIVERS
+ // ===============================================================
+ // Production-grade: PostgreSQL (Recommended over MySQL for security)
+ implementation("org.postgresql:postgresql:${property("postgresqlVersion")}")
+
+ // Legacy MySQL (deprecated in favor of PostgreSQL)
+ implementation("mysql:mysql-connector-java:5.1.40")
+
+ // In-memory testing database
+ testImplementation("com.h2database:h2:${property("h2Version")}")
+
+ // ===============================================================
+ // TESTING FRAMEWORKS
+ // ===============================================================
+ testImplementation("junit:junit:${property("junitVersion")}")
+ testImplementation("org.mockito:mockito-core:${property("mockitoVersion")}")
+ testImplementation("org.assertj:assertj-core:${property("assertjVersion")}")
+ testImplementation("org.testng:testng:${property("testngVersion")}")
+
+ // ===============================================================
+ // QUALITY & OBSERVABILITY
+ // ===============================================================
+ implementation("org.slf4j:slf4j-api:${property("slf4jVersion")}")
+
+ // Annotation processing
+ annotationProcessor("org.projectlombok:lombok:1.18.24")
+ testAnnotationProcessor("org.projectlombok:lombok:1.18.24")
+ }
+
+ // Configure Checkstyle
+ checkstyle {
+ toolVersion = "10.2"
+ configFile = file("${rootProject.projectDir}/checkstyle.xml")
+ }
+
+ // Configure JaCoCo
+ jacoco {
+ toolVersion = "0.8.8"
+ }
+
+ tasks.jacocoTestReport {
+ reports {
+ xml.required.set(true)
+ html.required.set(true)
+ csv.required.set(false)
+ }
+ }
+
+ tasks.test {
+ useJUnitPlatform()
+ finalizedBy(tasks.jacocoTestReport)
+ }
+}
+
+/**
+ * Core API Module
+ * Contains shared business logic and data access layer
+ */
+project(":core-api") {
+ apply(plugin = "org.springframework.boot")
+ apply(plugin = "kotlin")
+
+ dependencies {
+ // Spring Framework Core
+ implementation("org.springframework.boot:spring-boot-starter-web")
+ implementation("org.springframework.boot:spring-boot-starter-data-jpa")
+ implementation("org.springframework.boot:spring-boot-starter-validation")
+
+ // Spring Security (vulnerable version)
+ implementation("org.springframework.security:spring-security-core:${property("springSecurityVersion")}")
+
+ // Kotlin Support
+ implementation(kotlin("stdlib-jdk11"))
+ implementation(kotlin("reflect"))
+ }
+}
+
+/**
+ * Security Module
+ * Contains authentication and authorization logic
+ */
+project(":security-module") {
+ apply(plugin = "org.springframework.boot")
+
+ dependencies {
+ implementation(project(":core-api"))
+
+ // Spring Security stack
+ implementation("org.springframework.security:spring-security-core:${property("springSecurityVersion")}")
+ implementation("org.springframework.security:spring-security-crypto:${property("springSecurityVersion")}")
+ implementation("org.springframework.security:spring-security-web:${property("springSecurityVersion")}")
+
+ // JWT/OAuth2
+ implementation("io.jsonwebtoken:jjwt:0.11.5")
+
+ // LDAP Integration
+ implementation("org.springframework.security:spring-security-ldap:${property("springSecurityVersion")}")
+ }
+}
+
+/**
+ * Data Module
+ * Database access and persistence layer
+ */
+project(":data-module") {
+ apply(plugin = "org.springframework.boot")
+
+ dependencies {
+ implementation(project(":core-api"))
+
+ // Spring Data
+ implementation("org.springframework.boot:spring-boot-starter-data-jpa")
+ implementation("org.springframework.boot:spring-boot-starter-data-rest")
+
+ // Hibernate (vulnerable version)
+ implementation("org.hibernate:hibernate-core:${property("hibernateVersion")}")
+ implementation("org.hibernate:hibernate-validator:${property("hibernateVersion")}")
+
+ // Connection pooling
+ implementation("org.apache.commons:commons-dbcp2:2.9.0")
+
+ // Liquibase for schema versioning
+ implementation("org.liquibase:liquibase-core:4.9.1")
+ }
+}
+
+/**
+ * API Gateway Module
+ * REST API and external integrations
+ */
+project(":api-gateway") {
+ apply(plugin = "org.springframework.boot")
+
+ dependencies {
+ implementation(project(":core-api"))
+ implementation(project(":security-module"))
+
+ // Spring Cloud Gateway
+ implementation("org.springframework.cloud:spring-cloud-starter-gateway")
+ implementation("org.springframework.cloud:spring-cloud-starter-consul-discovery")
+
+ // API Documentation
+ implementation("org.springdoc:springdoc-openapi-ui:1.6.9")
+
+ // HTTP Client (vulnerable version)
+ implementation("org.apache.httpcomponents:httpclient:${property("commonsHttpClientVersion")}")
+ }
+}
+
+/**
+ * Monitoring Module
+ * Metrics, logging, and health checks
+ */
+project(":monitoring-module") {
+ apply(plugin = "org.springframework.boot")
+
+ dependencies {
+ // Spring Boot Actuator
+ implementation("org.springframework.boot:spring-boot-starter-actuator")
+
+ // Micrometer metrics
+ implementation("io.micrometer:micrometer-registry-prometheus:1.9.1")
+
+ // Logging (Log4j vulnerable version + fallback)
+ implementation("org.apache.logging.log4j:log4j-api:${property("log4jVersion")}")
+ implementation("org.apache.logging.log4j:log4j-core:${property("log4jCoreVersion")}")
+ implementation("org.slf4j:slf4j-log4j12:${property("slf4jVersion")}")
+
+ // Structured logging
+ implementation("net.logstash.logback:logstash-logback-encoder:7.2")
+ }
+}
+
+/**
+ * Advanced Configurations using Platform/BOM
+ */
+configure(subprojects.filter { it.name in listOf("api-gateway", "data-module") }) {
+ dependencies {
+ // Google Cloud Platform integration
+ implementation(platform("com.google.cloud:libraries-bom:${property("googleCloudBomVersion")}"))
+ implementation("com.google.cloud:google-cloud-storage")
+ implementation("com.google.cloud:google-cloud-pubsub")
+ }
+}
+
+/**
+ * Extended Dependency Configurations for Android modules (if applicable)
+ */
+configure(subprojects.filter { it.name.contains("android") }) {
+ dependencies {
+ debugImplementation("com.facebook.stetho:stetho:1.6.0")
+ debugImplementation("com.facebook.stetho:stetho-okhttp3:1.6.0")
+
+ releaseImplementation("com.google.firebase:firebase-crashlytics:18.0.0")
+ releaseImplementation("com.google.firebase:firebase-analytics:21.1.1")
+
+ // Code generation for Android
+ ksp("com.google.dagger:dagger-compiler:2.42")
+ }
+}
+
+/**
+ * Root Project Tasks
+ */
+tasks {
+ val buildInfo = register("buildInfo") {
+ doLast {
+ println("""
+ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ โ ENTERPRISE BUILD CONFIGURATION โ
+ โ โ
+ โ Project: ${project.group} โ
+ โ Version: ${project.version} โ
+ โ Java: ${java.sourceCompatibility} โ
+ โ Built: ${Instant.now()} โ
+ โ โ
+ โ โ ๏ธ SECURITY NOTICE: โ
+ โ This build contains known vulnerabilities for testing purposes โ
+ โ DO NOT USE IN PRODUCTION without remediation โ
+ โ โ
+ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
+ """.trimIndent())
+ }
+ }
+
+ build {
+ dependsOn(buildInfo)
+ }
+}
+
+// Configure SonarQube analysis
+sonarqube {
+ properties {
+ property("sonar.projectKey", "enterprise-platform")
+ property("sonar.projectName", "Enterprise Platform")
+ property("sonar.sources", "src/main")
+ property("sonar.tests", "src/test")
+ property("sonar.coverage.jacoco.xmlReportPaths", "**/target/site/jacoco/jacoco.xml")
+ }
+}
diff --git a/test/resources/gradle.properties b/test/resources/gradle.properties
new file mode 100644
index 0000000..7b1242c
--- /dev/null
+++ b/test/resources/gradle.properties
@@ -0,0 +1,88 @@
+# ==========================
+# Central Gradle Properties
+# ==========================
+# This file is shared across all gradle modules
+# Properties can be overridden in subproject gradle.properties
+
+# ========================
+# Organization Settings
+# ========================
+org.gradle.parallel=true
+org.gradle.caching=true
+org.gradle.daemon=true
+org.gradle.jvmargs=-Xmx2048m -XX:+UseG1GC
+
+# ========================
+# Java Version
+# ========================
+javaVersion=11
+javaTargetVersion=11
+
+# ========================
+# Framework Versions
+# ========================
+springBootVersion=2.7.0
+springVersion=5.3.20
+springSecurityVersion=5.7.1
+springCloudVersion=2021.0.3
+hibernateVersion=5.6.10.Final
+jacksonVersion=2.13.3
+
+# ========================
+# Logging Versions
+# ========================
+log4jVersion=2.17.1
+log4jCoreVersion=2.17.1
+slf4jVersion=1.7.36
+logbackVersion=1.2.11
+
+# ========================
+# Apache Commons Versions
+# ========================
+commonsLang3Version=3.12.0
+commonsCodecVersion=1.15
+commonsCollectionsVersion=3.2.2
+commonsHttpClientVersion=4.5.13
+
+# ========================
+# Database Drivers
+# ========================
+mysqlVersion=8.0.29
+postgresqlVersion=42.3.6
+h2Version=2.1.210
+
+# ========================
+# JSON/XML Processing
+# ========================
+guavaVersion=31.1-jre
+gson=2.9.0
+xstreamVersion=1.4.18
+
+# ========================
+# Testing Frameworks
+# ========================
+junitVersion=4.13.2
+mockitoVersion=4.6.1
+assertjVersion=3.22.0
+testngVersion=7.5
+
+# ========================
+# Build & Quality Tools
+# ========================
+jacocoVersion=0.8.8
+checkstyleVersion=10.2
+spotbugsVersion=4.7.2
+sonarVersion=3.4.0.2513
+
+# ========================
+# Google Cloud Dependencies (BOM)
+# ========================
+googleCloudBomVersion=26.1.0
+
+# ========================
+# Maven Plugin Versions
+# ========================
+mavenCompilerPluginVersion=3.10.1
+mavenSurefirePluginVersion=2.22.2
+mavenShadePluginVersion=3.2.4
+mavenAssemblyPluginVersion=3.3.0
diff --git a/test/resources/gradle/libs.versions.toml b/test/resources/gradle/libs.versions.toml
new file mode 100644
index 0000000..1980d6d
--- /dev/null
+++ b/test/resources/gradle/libs.versions.toml
@@ -0,0 +1,228 @@
+# ==================================================================
+# Gradle Version Catalog - Central Dependency Management
+# ==================================================================
+# This file demonstrates the version catalog feature (Gradle 7.0+)
+# References: https://docs.gradle.org/current/userguide/platforms.html
+
+[versions]
+# Spring Framework
+spring-version = "5.3.20"
+spring-boot-version = "2.7.0"
+spring-security-version = "5.7.1"
+spring-cloud-version = "2021.0.3"
+
+# Java & Kotlin
+java-version = "11"
+kotlin-version = "1.6.21"
+gradle-kotlin-dsl-version = "0.4.0"
+
+# Logging & Observability
+slf4j-version = "1.7.36"
+logback-version = "1.2.11"
+log4j-version = "2.17.1"
+
+# Testing
+junit-version = "4.13.2"
+mockito-version = "4.6.1"
+assertj-version = "3.22.0"
+
+# Database
+hibernate-version = "5.6.10.Final"
+postgresql-version = "42.3.6"
+h2-version = "2.1.210"
+
+# JSON/XML
+jackson-version = "2.13.3"
+gson-version = "2.9.0"
+
+# Apache Commons
+commons-lang3-version = "3.12.0"
+commons-codec-version = "1.15"
+commons-collections-version = "3.2.2"
+
+# Google Libraries
+guava-version = "31.1-jre"
+google-cloud-bom-version = "26.1.0"
+
+# Build Tools
+jacoco-version = "0.8.8"
+checkstyle-version = "10.2"
+spotbugs-version = "4.7.2"
+
+# BOM Versions
+spring-cloud-bom-version = "2021.0.3"
+
+[libraries]
+# ==================================================================
+# Spring Framework Libraries
+# ==================================================================
+spring-core = { module = "org.springframework:spring-core", version.ref = "spring-version" }
+spring-web = { module = "org.springframework:spring-web", version.ref = "spring-version" }
+spring-context = { module = "org.springframework:spring-context", version.ref = "spring-version" }
+spring-orm = { module = "org.springframework:spring-orm", version.ref = "spring-version" }
+
+spring-boot-starter-web = { module = "org.springframework.boot:spring-boot-starter-web", version.ref = "spring-boot-version" }
+spring-boot-starter-data-jpa = { module = "org.springframework.boot:spring-boot-starter-data-jpa", version.ref = "spring-boot-version" }
+spring-boot-starter-security = { module = "org.springframework.boot:spring-boot-starter-security", version.ref = "spring-boot-version" }
+spring-boot-starter-actuator = { module = "org.springframework.boot:spring-boot-starter-actuator", version.ref = "spring-boot-version" }
+spring-boot-starter-validation = { module = "org.springframework.boot:spring-boot-starter-validation", version.ref = "spring-boot-version" }
+spring-boot-starter-logging = { module = "org.springframework.boot:spring-boot-starter-logging", version.ref = "spring-boot-version" }
+
+spring-security-core = { module = "org.springframework.security:spring-security-core", version.ref = "spring-security-version" }
+spring-security-web = { module = "org.springframework.security:spring-security-web", version.ref = "spring-security-version" }
+spring-security-crypto = { module = "org.springframework.security:spring-security-crypto", version.ref = "spring-security-version" }
+
+spring-cloud-starter-gateway = { module = "org.springframework.cloud:spring-cloud-starter-gateway", version.ref = "spring-cloud-version" }
+spring-cloud-starter-consul-discovery = { module = "org.springframework.cloud:spring-cloud-starter-consul-discovery", version.ref = "spring-cloud-version" }
+
+# ==================================================================
+# Logging & Observability
+# ==================================================================
+slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j-version" }
+logback-classic = { module = "ch.qos.logback:logback-classic", version.ref = "logback-version" }
+logback-core = { module = "ch.qos.logback:logback-core", version.ref = "logback-version" }
+
+# CRITICAL VULNERABILITY: Log4j RCE (CVE-2021-44228)
+log4j-api = { module = "org.apache.logging.log4j:log4j-api", version.ref = "log4j-version" }
+log4j-core = { module = "org.apache.logging.log4j:log4j-core", version.ref = "log4j-version" }
+
+micrometer-registry-prometheus = "io.micrometer:micrometer-registry-prometheus:1.9.1"
+logstash-logback-encoder = "net.logstash.logback:logstash-logback-encoder:7.2"
+
+# ==================================================================
+# Database & ORM
+# ==================================================================
+hibernate-core = { module = "org.hibernate:hibernate-core", version.ref = "hibernate-version" }
+hibernate-validator = { module = "org.hibernate:hibernate-validator", version.ref = "hibernate-version" }
+
+postgresql = { module = "org.postgresql:postgresql", version.ref = "postgresql-version" }
+h2-database = { module = "com.h2database:h2", version.ref = "h2-version" }
+
+# MEDIUM VULNERABILITY: MySQL 5.1 (Legacy, prefer PostgreSQL)
+mysql-connector = "mysql:mysql-connector-java:5.1.40"
+
+liquibase-core = "org.liquibase:liquibase-core:4.9.1"
+commons-dbcp2 = "org.apache.commons:commons-dbcp2:2.9.0"
+
+# ==================================================================
+# JSON/XML & Serialization
+# ==================================================================
+jackson-databind = { module = "com.fasterxml.jackson.core:jackson-databind", version.ref = "jackson-version" }
+jackson-annotations = { module = "com.fasterxml.jackson.core:jackson-annotations", version.ref = "jackson-version" }
+jackson-dataformat-xml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-xml", version.ref = "jackson-version" }
+
+gson = { module = "com.google.gson:gson", version.ref = "gson-version" }
+
+# HIGH VULNERABILITY: XStream (Deserialization RCE)
+xstream = "com.thoughtworks.xstream:xstream:1.4.17"
+
+# ==================================================================
+# Apache Commons (Known Vulnerabilities)
+# ==================================================================
+commons-lang3 = { module = "org.apache.commons:commons-lang3", version.ref = "commons-lang3-version" }
+commons-codec = { module = "commons-codec:commons-codec", version.ref = "commons-codec-version" }
+
+# CRITICAL VULNERABILITY: Commons Collections 3.2.1 (Gadget chain RCE)
+commons-collections = { module = "commons-collections:commons-collections", version.ref = "commons-collections-version" }
+
+# HIGH VULNERABILITY: HttpClient 4.5.5 (DoS via HTTPS)
+httpclient = { module = "org.apache.httpcomponents:httpclient", version.ref = "commons-codec-version" }
+
+# ==================================================================
+# Google Libraries
+# ==================================================================
+guava = { module = "com.google.guava:guava", version.ref = "guava-version" }
+google-cloud-storage = "com.google.cloud:google-cloud-storage"
+google-cloud-pubsub = "com.google.cloud:google-cloud-pubsub"
+
+# ==================================================================
+# Testing Frameworks
+# ==================================================================
+junit = { module = "junit:junit", version.ref = "junit-version" }
+mockito-core = { module = "org.mockito:mockito-core", version.ref = "mockito-version" }
+assertj-core = { module = "org.assertj:assertj-core", version.ref = "assertj-version" }
+
+# ==================================================================
+# Code Generation & Annotation Processing
+# ==================================================================
+lombok = "org.projectlombok:lombok:1.18.24"
+dagger-compiler = "com.google.dagger:dagger-compiler:2.42"
+
+# ==================================================================
+# Android/Debug Only Dependencies
+# ==================================================================
+stetho = "com.facebook.stetho:stetho:1.6.0"
+stetho-okhttp3 = "com.facebook.stetho:stetho-okhttp3:1.6.0"
+
+# ==================================================================
+# Firebase & Analytics (Release builds)
+# ==================================================================
+firebase-crashlytics = "com.google.firebase:firebase-crashlytics:18.0.0"
+firebase-analytics = "com.google.firebase:firebase-analytics:21.1.1"
+
+# ==================================================================
+# API Documentation
+# ==================================================================
+springdoc-openapi-ui = "org.springdoc:springdoc-openapi-ui:1.6.9"
+springdoc-openapi-kotlin = "org.springdoc:springdoc-openapi-kotlin:1.6.9"
+
+# ==================================================================
+# JWT & OAuth2
+# ==================================================================
+jjwt = "io.jsonwebtoken:jjwt:0.11.5"
+spring-security-oauth2 = "org.springframework.security.oauth:spring-security-oauth2:2.5.2.RELEASE"
+
+# ==================================================================
+# Kotlin & Coroutines
+# ==================================================================
+kotlin-stdlib = { module = "org.jetbrains.kotlin:kotlin-stdlib-jdk11", version.ref = "kotlin-version" }
+kotlin-reflect = { module = "org.jetbrains.kotlin:kotlin-reflect", version.ref = "kotlin-version" }
+kotlin-coroutines = "org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.3"
+
+[bundles]
+# ==================================================================
+# Bundle Groups - Frequently Used Together
+# ==================================================================
+spring-core = [
+ "spring-core",
+ "spring-context",
+ "spring-web"
+]
+
+spring-boot-web = [
+ "spring-boot-starter-web",
+ "spring-boot-starter-validation",
+ "spring-boot-starter-logging"
+]
+
+spring-data-stack = [
+ "spring-boot-starter-data-jpa",
+ "hibernate-core",
+ "hibernate-validator"
+]
+
+spring-security-stack = [
+ "spring-boot-starter-security",
+ "spring-security-core",
+ "spring-security-web",
+ "spring-security-crypto"
+]
+
+logging-stack = [
+ "slf4j-api",
+ "logback-classic",
+ "logback-core",
+ "logstash-logback-encoder"
+]
+
+testing = [
+ "junit",
+ "mockito-core",
+ "assertj-core"
+]
+
+json-processing = [
+ "jackson-databind",
+ "jackson-annotations",
+ "gson"
+]
From 6ac456b11b067b88a8618b78b335ecdd1f58380f Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Sat, 11 Apr 2026 09:04:59 +0530
Subject: [PATCH 04/14] added support for gradle libs.versions.toml
---
README.md | 25 +++++++++++-
internal/parsers/gradle/gradle_parser_test.go | 40 +++++++++++++++++++
internal/parsers/gradle/version_catalog.go | 31 ++++++++++++++
pkg/parser/manifest-file-selector.go | 5 +++
pkg/parser/parser_factory.go | 2 +
5 files changed, 101 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 64c538d..6cf3ab3 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ This parser extracts software dependencies from project manifest files and provi
| Manager | Format | Status | Features |
|---------|--------|--------|----------|
-| **Gradle** | `build.gradle`, `build.gradle.kts` | โ
Production | Latest DSL + catalogs |
+| **Gradle** | `build.gradle`, `build.gradle.kts`, `libs.versions.toml` | โ
Production | Latest DSL + catalogs + direct TOML parsing |
| **Maven** | `pom.xml` | โ
Production | Properties, BOMs, ranges |
| **npm/Node.js** | `package.json` | โ
Production | Dependencies, dev, peer, optional |
| **Go** | `go.mod` | โ
Production | Direct imports, indirect |
@@ -83,7 +83,7 @@ go run cmd/main.go project/go.mod
### 1. Gradle Parser
-**Files:** `build.gradle`, `build.gradle.kts`
+**Files:** `build.gradle`, `build.gradle.kts`, `gradle/libs.versions.toml`
#### Features
@@ -170,6 +170,15 @@ project(":api-module") {
#### Version Catalog Support
+**Direct Parsing:** You can now parse `libs.versions.toml` directly!
+
+```bash
+# Parse version catalog directly
+go run cmd/main.go gradle/libs.versions.toml
+```
+
+**Catalog Format:**
+
```toml
# gradle/libs.versions.toml
[versions]
@@ -182,8 +191,11 @@ spring-core = { module = "org.springframework:spring-core", version.ref = "sprin
spring = ["spring-core", "spring-context"]
```
+**Automatic Discovery:** When parsing `build.gradle` or `build.gradle.kts`, the parser automatically discovers and parses `gradle/libs.versions.toml` in the same directory.
+
#### Parser Capabilities
+**Build File Parsing:**
- โ
Parses Groovy and Kotlin DSL
- โ
Resolves variables from gradle.properties
- โ
Discovers and parses version catalogs
@@ -193,6 +205,15 @@ spring = ["spring-core", "spring-context"]
- โ
Skips file references (local JARs)
- โ
Handles multi-line declarations
- โ
Parses conditional if blocks
+
+**Version Catalog Parsing:**
+- โ
Direct parsing of `libs.versions.toml` files
+- โ
Extracts all 80+ library definitions
+- โ
Resolves version references
+- โ
Supports all catalog formats (simple, module, key-value)
+- โ
Works standalone or auto-discovered by build files
+
+**General:**
- โ Does not evaluate dynamic Gradle code
#### Test Resources
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
index f8d48a6..5078027 100644
--- a/internal/parsers/gradle/gradle_parser_test.go
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -726,3 +726,43 @@ func TestNormalizePlatformDependency(t *testing.T) {
}
}
}
+
+func TestVersionCatalogParser_ParseFile(t *testing.T) {
+ // Test parsing libs.versions.toml directly
+ parser := &VersionCatalogParser{}
+ pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "gradle", "libs.versions.toml"))
+ if err != nil {
+ t.Fatalf("Failed to parse libs.versions.toml: %v", err)
+ }
+
+ if len(pkgs) == 0 {
+ t.Errorf("Expected packages from version catalog, got none")
+ }
+
+ // Verify expected packages are present
+ expectedPackages := map[string]string{
+ "org.springframework:spring-core": "5.3.20",
+ "org.springframework.boot:spring-boot-starter-web": "2.7.0",
+ "com.google.guava:guava": "31.1-jre",
+ "org.apache.logging.log4j:log4j-core": "2.17.1",
+ }
+
+ found := make(map[string]bool)
+ for _, pkg := range pkgs {
+ if expectedVersion, ok := expectedPackages[pkg.PackageName]; ok {
+ found[pkg.PackageName] = true
+ if pkg.Version != expectedVersion {
+ t.Errorf("Package %s: expected version %s, got %s", pkg.PackageName, expectedVersion, pkg.Version)
+ }
+ if pkg.PackageManager != "gradle" {
+ t.Errorf("Expected package manager 'gradle', got '%s'", pkg.PackageManager)
+ }
+ }
+ }
+
+ for pkgName := range expectedPackages {
+ if !found[pkgName] {
+ t.Errorf("Expected package not found: %s", pkgName)
+ }
+ }
+}
diff --git a/internal/parsers/gradle/version_catalog.go b/internal/parsers/gradle/version_catalog.go
index 220c188..acd9178 100644
--- a/internal/parsers/gradle/version_catalog.go
+++ b/internal/parsers/gradle/version_catalog.go
@@ -1,6 +1,7 @@
package gradle
import (
+ "fmt"
"os"
"path/filepath"
"regexp"
@@ -9,6 +10,36 @@ import (
"github.com/Checkmarx/manifest-parser/pkg/parser/models"
)
+// VersionCatalogParser implements parsing of Gradle version catalogs (libs.versions.toml)
+type VersionCatalogParser struct{}
+
+// Parse implements the Parser interface for version catalog files
+func (p *VersionCatalogParser) Parse(manifestFile string) ([]models.Package, error) {
+ catalog := parseVersionCatalog(manifestFile)
+ if catalog == nil {
+ return nil, fmt.Errorf("failed to parse version catalog: %w", fmt.Errorf("invalid TOML format"))
+ }
+
+ var packages []models.Package
+
+ // Convert catalog libraries to packages
+ lineNum := 1
+ for _, lib := range catalog.Libraries {
+ if lib.Group != "" && lib.Name != "" {
+ packages = append(packages, models.Package{
+ PackageManager: "gradle",
+ PackageName: lib.Group + ":" + lib.Name,
+ Version: lib.Version,
+ FilePath: manifestFile,
+ Locations: []models.Location{{Line: lineNum}},
+ })
+ lineNum++
+ }
+ }
+
+ return packages, nil
+}
+
// VersionCatalog represents a parsed Gradle version catalog (libs.versions.toml)
type VersionCatalog struct {
Versions map[string]string
diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go
index c11b67e..107f35e 100644
--- a/pkg/parser/manifest-file-selector.go
+++ b/pkg/parser/manifest-file-selector.go
@@ -16,6 +16,7 @@ const (
MavenPom
GoMod
GradleBuild
+ GradleVersionCatalog
)
// selectManifestFile a method to select a manifest file type by its name
@@ -60,5 +61,9 @@ func selectManifestFile(manifest string) Manifest {
return GradleBuild
}
+ if manifestFileName == "libs.versions.toml" {
+ return GradleVersionCatalog
+ }
+
return -1
}
diff --git a/pkg/parser/parser_factory.go b/pkg/parser/parser_factory.go
index 58f5d82..2d1f0e6 100644
--- a/pkg/parser/parser_factory.go
+++ b/pkg/parser/parser_factory.go
@@ -29,6 +29,8 @@ func ParsersFactory(manifest string) Parser {
return &golang.GoModParser{}
case GradleBuild:
return &gradle.GradleParser{}
+ case GradleVersionCatalog:
+ return &gradle.VersionCatalogParser{}
default:
return nil
}
From bd0e3a8cb879d988d113a87f47b983ab1d663ff4 Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Mon, 20 Apr 2026 21:27:52 +0530
Subject: [PATCH 05/14] Add CLAUDE.md with architecture and contract notes
Captures the Parser/factory dispatch model, per-ecosystem quirks,
and the invariants (0-based line numbers, "latest" sentinel,
PackageManager strings) that downstream AST-CLI relies on.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
CLAUDE.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
create mode 100644 CLAUDE.md
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..c0028d2
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,49 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Overview
+
+Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans โ so the `Locations` field is part of the public contract, not a debugging convenience.
+
+## Commands
+
+```bash
+go test ./... # run all tests
+go test ./internal/parsers/maven/... # run tests for a single parser
+go test -run TestName ./path/... # run a single test by name
+go test ./... -coverprofile cover.out # CI gate: total coverage must be >= 60%
+go build -o manifest-parser ./cmd # build CLI
+go run ./cmd # run CLI against a manifest
+```
+
+Dependencies are vendored (`vendor/`). Go version is pinned via `go.mod` (1.23 / toolchain 1.24.2).
+
+## Architecture
+
+The module is organized around one interface and a dispatching factory:
+
+- [pkg/parser/parser.go](pkg/parser/parser.go) โ `Parser` interface (`Parse(manifestFile string) ([]models.Package, error)`).
+- [pkg/parser/parser_factory.go](pkg/parser/parser_factory.go) โ `ParsersFactory(manifest string)` is the **only** public entry point. It calls `selectManifestFile` and returns the right concrete parser, or `nil` for unsupported files.
+- [pkg/parser/manifest-file-selector.go](pkg/parser/manifest-file-selector.go) โ maps filename/extension to a `Manifest` enum. Adding a new ecosystem means editing this file, the factory, and adding a package under `internal/parsers/`.
+- [pkg/parser/models/package_model.go](pkg/parser/models/package_model.go) โ the `Package` / `Location` structs returned to callers. `Locations` is a slice: Maven returns one entry per line of a multi-line `` block; most others return a single entry.
+
+Per-ecosystem parsers live under [internal/parsers/](internal/parsers/):
+- `maven/` โ parses `pom.xml` with `encoding/xml`, then re-scans the raw text to locate each `` block line by line. Resolves `${property}` vars from `` and falls back to `` for empty/ranged versions. Only **direct** `` are emitted (managed-only deps are intentionally skipped to avoid duplicates โ see commit `9e490aa`).
+- `npm/` โ parses `package.json` plus, if present as a sibling file, `package-lock.json` (v1 and v2/v3 formats). Ranged specifiers (`^`, `~`, `*`, `>`, `<`) trigger a lookup in the lockfile; `isLockVersionGreater` compares part-by-part numerically to decide whether the lockfile version satisfies the spec. Without a lock match, ranged versions resolve to `"latest"`.
+- `pypi/` โ line-oriented scan of `requirements*.txt` / `packages*.txt`. **Only `package==version` is supported** โ `pip freeze`, Poetry, and pip-tools output are explicitly out of scope (see README "Known Limitations"). Comments (`#`) and environment markers (`;`) are stripped.
+- `golang/` โ uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets.
+- `dotnet/` โ three separate parsers sharing patterns: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Versions are read from either a `Version` attribute or a nested `` element; bracketed ranges become `"latest"`.
+
+### Invariants worth preserving
+
+- **`Location` uses 0-based line numbers** in most parsers (Maven, Go, npm, pypi use `lineNum - 1` or a 0-based counter). Downstream AST-CLI depends on this; don't "fix" it to 1-based without coordinating.
+- **Unresolvable or ranged versions resolve to the literal string `"latest"`**, never an empty string. Callers branch on this value.
+- **`PackageManager` strings are part of the contract**: `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"` (used by all three dotnet parsers). Don't rename them.
+- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child, close tag) so AST-CLI can annotate the whole block. Single-line `Locations` for Maven would be a regression.
+
+## Tests & fixtures
+
+Each parser has a `*_test.go` next to it using `testify`. Shared fixtures live in [test/resources/](test/resources/) (e.g. `pom.xml`, `package.json`, `requirements.txt`, `test_go.mod`, `Bootstrap.csproj`, `Gateway.csproj`, `packages.config`, `Directory.Packages.props`). When adding behaviors, add a fixture here rather than embedding large manifests in test source.
+
+CI ([.github/workflows/ci.yml](.github/workflows/ci.yml)) enforces a **60% total coverage floor** โ adding an untested branch to an already-thin package can push the whole repo below the gate.
From 178314644b009d1f01edf221cbbf2a46c646f9a9 Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Mon, 20 Apr 2026 21:32:42 +0530
Subject: [PATCH 06/14] updated readme file
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index 651cb66..7d901a0 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@ type Location struct {
}
```
+
`Locations` points to the exact position of the dependency declaration in the source manifest, which downstream tools use for inline annotations and remediation.
## CLI
From 70eb4a58f1ee9dcf03972533b5358b49e8a1f013 Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Mon, 11 May 2026 13:00:37 +0530
Subject: [PATCH 07/14] docs: expand CLAUDE.md to meet JIRA epic template
requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Addresses all inline review comments from PR #20 review:
- Rename Overview โ Project Overview, add status line
- Add Technology Stack section (Go 1.23, testify, x/mod, stdlib, no DB)
- Add Repository Structure section with folder tree
- Rename Commands โ Development Setup; add prerequisites, clone step,
coverage HTML command, and sample JSON output
- Add API / Interfaces section with full struct definitions including
clarification that StartIndex/EndIndex are 0-based byte offsets
- Update Architecture to include Gradle parser (missed in original)
- Replace commit hash reference (9e490aa) with PR #15 link
- Rename Invariants โ Project Rules (Invariants); add PackageManager
string for gradle and the StartIndex/EndIndex byte-offset clarification
- Rename Tests & fixtures โ Testing Strategy; add fixture tree, coverage
HTML command, and expected new-parser pattern
- Add Known Issues / Limitations section (consolidates pypi, npm, maven,
dotnet, and all-parsers limitations)
- Add External Integrations section (AST-CLI contract fields)
- Add Deployment section (N/A โ library, not a service)
- Add Performance Considerations section (Maven two-pass, Gradle catalog,
no caching)
- Add Security & Access section (XXE posture, no file-size limit, no
network calls)
- Add Logging section (library vs CLI behaviour)
- Add Coding Standards section (gofmt/vet, pkg vs internal, naming)
- Add Debugging Steps section (5 concrete steps)
Co-Authored-By: Claude Sonnet 4.6 (1M context)
---
CLAUDE.md | 216 ++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 194 insertions(+), 22 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index c0028d2..0ead52f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,48 +2,220 @@
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-## Overview
+## Project Overview
-Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans โ so the `Locations` field is part of the public contract, not a debugging convenience.
+Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET, Gradle) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans โ so the `Locations` field is part of the public contract, not a debugging convenience.
-## Commands
+**Status:** Active / maintained. Part of the Checkmarx One SCA pipeline.
+
+## Technology Stack
+
+| Component | Details |
+|-----------|---------|
+| Language | Go 1.23.0 / toolchain go1.24.2 |
+| Test framework | `github.com/stretchr/testify v1.8.4` |
+| Go module parsing | `golang.org/x/mod v0.24.0` |
+| XML parsing | stdlib `encoding/xml` |
+| JSON output | stdlib `encoding/json` |
+| Database | None |
+| Web framework | None |
+| Dependencies | Vendored (`vendor/`) โ no `go mod download` required |
+
+## Repository Structure
+
+```
+cmd/ CLI entry point (main.go)
+pkg/parser/ Public API
+ โโโ parser.go Parser interface
+ โโโ parser_factory.go ParsersFactory โ sole public entry point
+ โโโ manifest-file-selector.go Filename โ Manifest enum mapping
+ โโโ models/
+ โโโ package_model.go Package / Location structs
+internal/parsers/ Per-ecosystem implementations (not importable by callers)
+ โโโ dotnet/
+ โโโ golang/
+ โโโ gradle/
+ โโโ maven/
+ โโโ npm/
+ โโโ pypi/
+test/resources/ Shared fixture files for all parser tests
+vendor/ Vendored dependencies
+.github/workflows/ci.yml CI pipeline (test + 60% coverage gate)
+```
+
+## Development Setup
+
+**Prerequisites:** Go โฅ 1.23, git. No other tools required โ dependencies are vendored.
```bash
+git clone https://github.com/Checkmarx/manifest-parser.git
+cd manifest-parser
+
go test ./... # run all tests
-go test ./internal/parsers/maven/... # run tests for a single parser
+go test ./internal/parsers/gradle/... # run tests for a single parser
go test -run TestName ./path/... # run a single test by name
go test ./... -coverprofile cover.out # CI gate: total coverage must be >= 60%
+go tool cover -html cover.out # view coverage report in browser
go build -o manifest-parser ./cmd # build CLI
-go run ./cmd # run CLI against a manifest
+go run ./cmd test/resources/pom.xml # run CLI against a fixture
+```
+
+**Sample output** from `go run ./cmd test/resources/pom.xml`:
+
+```json
+[
+ {
+ "PackageManager": "mvn",
+ "PackageName": "junit:junit",
+ "Version": "4.13.2",
+ "FilePath": "test/resources/pom.xml",
+ "Locations": [{ "Line": 14, "StartIndex": 4, "EndIndex": 20 }]
+ }
+]
```
-Dependencies are vendored (`vendor/`). Go version is pinned via `go.mod` (1.23 / toolchain 1.24.2).
+## API / Interfaces
+
+The public API lives entirely under `pkg/`:
+
+**`Parser` interface** ([pkg/parser/parser.go](pkg/parser/parser.go)):
+```go
+type Parser interface {
+ Parse(manifestFile string) ([]models.Package, error)
+}
+```
+
+**`ParsersFactory`** ([pkg/parser/parser_factory.go](pkg/parser/parser_factory.go)) โ the **only** public entry point. Returns a concrete `Parser` for the given filename, or `nil` for unsupported files.
+
+**`Package` / `Location` structs** ([pkg/parser/models/package_model.go](pkg/parser/models/package_model.go)):
+```go
+type Package struct {
+ PackageManager string
+ PackageName string
+ Version string
+ FilePath string
+ Locations []Location
+}
+
+type Location struct {
+ Line int // 0-based in most parsers (see Project Rules below)
+ StartIndex int // 0-based byte offset from start of line
+ EndIndex int // 0-based byte offset from start of line
+}
+```
+
+Adding a new ecosystem: edit `manifest-file-selector.go`, add a case in `parser_factory.go`, and add a package under `internal/parsers/`.
## Architecture
The module is organized around one interface and a dispatching factory:
-- [pkg/parser/parser.go](pkg/parser/parser.go) โ `Parser` interface (`Parse(manifestFile string) ([]models.Package, error)`).
-- [pkg/parser/parser_factory.go](pkg/parser/parser_factory.go) โ `ParsersFactory(manifest string)` is the **only** public entry point. It calls `selectManifestFile` and returns the right concrete parser, or `nil` for unsupported files.
-- [pkg/parser/manifest-file-selector.go](pkg/parser/manifest-file-selector.go) โ maps filename/extension to a `Manifest` enum. Adding a new ecosystem means editing this file, the factory, and adding a package under `internal/parsers/`.
-- [pkg/parser/models/package_model.go](pkg/parser/models/package_model.go) โ the `Package` / `Location` structs returned to callers. `Locations` is a slice: Maven returns one entry per line of a multi-line `` block; most others return a single entry.
+- [pkg/parser/manifest-file-selector.go](pkg/parser/manifest-file-selector.go) โ maps filename/extension to a `Manifest` enum.
+- [pkg/parser/parser_factory.go](pkg/parser/parser_factory.go) โ dispatches to the right concrete parser.
+- [pkg/parser/models/package_model.go](pkg/parser/models/package_model.go) โ `Locations` is a slice: Maven returns one entry per line of a multi-line `` block; most others return a single entry.
Per-ecosystem parsers live under [internal/parsers/](internal/parsers/):
-- `maven/` โ parses `pom.xml` with `encoding/xml`, then re-scans the raw text to locate each `` block line by line. Resolves `${property}` vars from `` and falls back to `` for empty/ranged versions. Only **direct** `` are emitted (managed-only deps are intentionally skipped to avoid duplicates โ see commit `9e490aa`).
-- `npm/` โ parses `package.json` plus, if present as a sibling file, `package-lock.json` (v1 and v2/v3 formats). Ranged specifiers (`^`, `~`, `*`, `>`, `<`) trigger a lookup in the lockfile; `isLockVersionGreater` compares part-by-part numerically to decide whether the lockfile version satisfies the spec. Without a lock match, ranged versions resolve to `"latest"`.
-- `pypi/` โ line-oriented scan of `requirements*.txt` / `packages*.txt`. **Only `package==version` is supported** โ `pip freeze`, Poetry, and pip-tools output are explicitly out of scope (see README "Known Limitations"). Comments (`#`) and environment markers (`;`) are stripped.
-- `golang/` โ uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets.
-- `dotnet/` โ three separate parsers sharing patterns: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Versions are read from either a `Version` attribute or a nested `` element; bracketed ranges become `"latest"`.
-### Invariants worth preserving
+- `gradle/` โ parses `build.gradle` / `build.gradle.kts` (Groovy + Kotlin DSL) and `gradle/libs.versions.toml` version catalogs. Resolves variables from `gradle.properties` and `ext {}` blocks. `PackageManager` = `"gradle"`.
+- `maven/` โ parses `pom.xml` with `encoding/xml`, then re-scans the raw text to locate each `` block line by line. Resolves `${property}` vars from `` and falls back to `` for empty/ranged versions. Only **direct** `` are emitted (managed-only deps are intentionally skipped to avoid duplicates โ see PR #15). `PackageManager` = `"mvn"`.
+- `npm/` โ parses `package.json` plus, if present as a sibling file, `package-lock.json` (v1 and v2/v3 formats). Ranged specifiers (`^`, `~`, `*`, `>`, `<`) trigger a lookup in the lockfile. Without a lock match, ranged versions resolve to `"latest"`. `PackageManager` = `"npm"`.
+- `pypi/` โ line-oriented scan of `requirements*.txt` / `packages*.txt`. **Only `package==version` is supported.** Comments (`#`) and environment markers (`;`) are stripped. `PackageManager` = `"pypi"`.
+- `golang/` โ uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets. `PackageManager` = `"go"`.
+- `dotnet/` โ three parsers: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Bracketed version ranges become `"latest"`. `PackageManager` = `"nuget"` for all three.
+
+## Project Rules (Invariants)
-- **`Location` uses 0-based line numbers** in most parsers (Maven, Go, npm, pypi use `lineNum - 1` or a 0-based counter). Downstream AST-CLI depends on this; don't "fix" it to 1-based without coordinating.
+- **`Location.Line` is 0-based** in most parsers (Maven, Go, npm, pypi use `lineNum - 1` or a 0-based counter). Downstream AST-CLI depends on this; don't "fix" it to 1-based without coordinating.
+- **`Location.StartIndex` / `EndIndex` are 0-based byte offsets** from the start of the line. They are byte offsets, not rune/character offsets โ relevant for non-ASCII manifests.
- **Unresolvable or ranged versions resolve to the literal string `"latest"`**, never an empty string. Callers branch on this value.
-- **`PackageManager` strings are part of the contract**: `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"` (used by all three dotnet parsers). Don't rename them.
-- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child, close tag) so AST-CLI can annotate the whole block. Single-line `Locations` for Maven would be a regression.
+- **`PackageManager` strings are part of the contract**: `"gradle"`, `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"`. Don't rename them.
+- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child element, close tag). Single-line `Locations` for Maven would be a regression.
+- Do not add `ParsersFactory` overloads or alternative entry points without coordinating with AST-CLI.
+
+## Testing Strategy
+
+Each parser has a `*_test.go` co-located with it using `testify`. Shared fixtures live in [test/resources/](test/resources/):
+
+```
+test/resources/
+โโโ build.gradle Groovy DSL
+โโโ build.gradle.kts Kotlin DSL
+โโโ gradle/libs.versions.toml Version catalog (80+ entries)
+โโโ gradle.properties Centralized Gradle properties
+โโโ pom.xml Maven
+โโโ package.json npm
+โโโ test_go.mod Go modules
+โโโ Bootstrap.csproj .NET csproj
+โโโ Gateway.csproj .NET csproj (variant)
+โโโ Directory.Packages.props .NET centralized packages
+โโโ packages.config .NET legacy NuGet
+โโโ requirements.txt Python pip
+```
+
+When adding behaviours, add a fixture here rather than embedding large manifests in test source.
+
+CI ([.github/workflows/ci.yml](.github/workflows/ci.yml)) enforces a **60% total coverage floor** โ adding an untested branch to an already-thin package can push the whole repo below the gate. View coverage locally with `go tool cover -html cover.out`.
+
+Expected pattern for a new parser: fixture file under `test/resources/` + `_parser_test.go` co-located with the parser, using `testify` assertions on `PackageName`, `Version`, `PackageManager`, and `Locations`.
+
+## Known Issues / Limitations
+
+- **pypi**: Only `package==version` syntax is supported. `pip freeze`, Poetry, and pip-tools output are explicitly out of scope.
+- **npm**: Ranged version specifiers (`^`, `~`, `*`, `>`, `<`) without a matching `package-lock.json` entry resolve to `"latest"` rather than the actual installed version.
+- **Maven**: Managed-only deps (present in `` but not in ``) are not emitted, to avoid duplicating entries already declared in a BOM consumer.
+- **dotnet**: Bracketed version ranges (e.g., `[1.0,2.0)`) become `"latest"`.
+- **All parsers**: Direct dependencies only โ transitive dependencies are not resolved or scanned.
+
+## External Integrations
+
+- **AST-CLI** ([Checkmarx/ast-cli](https://github.com/Checkmarx/ast-cli)) โ primary consumer. Imports this module as a Go library. The fields `Locations`, `PackageManager`, `PackageName`, and `Version` on the `Package` struct are load-bearing: AST-CLI uses them to annotate scan results and drive remediation UI.
+- **Checkmarx One SCA** โ downstream scan engine that receives the parsed dependency list.
+
+## Deployment
+
+N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-parser`. It is not deployed as a service. The CLI (`cmd/`) is a local testing convenience, not a production artifact.
+
+## Performance Considerations
+
+- Maven re-scans the raw XML bytes after `encoding/xml` parsing (two passes). Large `pom.xml` files are loaded fully into memory; there is no streaming.
+- Gradle version catalog parsing reads `libs.versions.toml` once, separately from the build file. Large catalogs (80+ entries) are fine; pathologically large files are not size-bounded.
+- No caching between calls to `ParsersFactory` โ each invocation allocates fresh parser state.
+
+## Security & Access
+
+- Parsers consume **untrusted manifest files** (user-supplied input):
+ - `encoding/xml` does **not** resolve external entities or DTDs by default โ XXE is not a risk with the standard library decoder.
+ - There is no file-size limit enforced before reading. Callers in adversarial environments should validate file size before calling `Parse`.
+ - Path traversal: `ParsersFactory` accepts an arbitrary file path; callers are responsible for sanitising paths before passing them in.
+- No credentials, secrets, or network calls inside any parser.
+
+## Logging
+
+- The **library** (`pkg/`, `internal/`) returns `error` values and does not log. Callers should not expect any log output from the library.
+- The **CLI** (`cmd/main.go`) uses `log.Fatalf` on parse/marshal errors and exits non-zero. Normal output is JSON printed to stdout.
+
+## Coding Standards
+
+- `gofmt` and `go vet` clean โ CI will fail otherwise.
+- Exported identifiers live in `pkg/`; internal logic lives in `internal/`. Do not add exported symbols to `internal/`.
+- Parser packages follow the naming layout: `internal/parsers//_parser.go` + `_parser_test.go`.
+- No global state in parsers โ each concrete parser type is a stateless zero-value struct.
+
+## Debugging Steps
+
+1. **Run one parser against a fixture:**
+ ```bash
+ go run ./cmd test/resources/pom.xml
+ go run ./cmd test/resources/build.gradle
+ ```
+
+2. **Verbose test output to see which test case fails:**
+ ```bash
+ go test -v ./internal/parsers/maven/...
+ ```
-## Tests & fixtures
+3. **Location off-by-one:** check the 0-based invariant โ the parser should use `lineNum - 1` or a 0-based counter. Grep for `Line:` assignments in the affected parser and verify against the fixture.
-Each parser has a `*_test.go` next to it using `testify`. Shared fixtures live in [test/resources/](test/resources/) (e.g. `pom.xml`, `package.json`, `requirements.txt`, `test_go.mod`, `Bootstrap.csproj`, `Gateway.csproj`, `packages.config`, `Directory.Packages.props`). When adding behaviors, add a fixture here rather than embedding large manifests in test source.
+4. **Version resolves to `"latest"` unexpectedly:** check whether the version string matches a range specifier (`^`, `~`, `[`, `*`) or whether a lock file / properties file is present in the same directory as the fixture.
-CI ([.github/workflows/ci.yml](.github/workflows/ci.yml)) enforces a **60% total coverage floor** โ adding an untested branch to an already-thin package can push the whole repo below the gate.
+5. **New ecosystem not dispatched:** verify `selectManifestFile` in `manifest-file-selector.go` handles the new filename/extension and that the factory `switch` has a corresponding case.
From 65f9b140ce0e51ff8af1d2e82cf4f28bf8cb1e7d Mon Sep 17 00:00:00 2001
From: cx-anurag-dalke <120229307+cx-anurag-dalke@users.noreply.github.com>
Date: Mon, 13 Apr 2026 22:10:44 +0530
Subject: [PATCH 08/14] Add SBT (Scala Build Tool) manifest parser support
Implement a production-grade SBT parser that extracts dependencies from all
.sbt files (build.sbt, plugins.sbt, dependencies.sbt, etc.). The parser
supports val/lazy val/def variable declarations, all SBT operators (%, %%,
%%%), Seq blocks, addSbtPlugin syntax, dependency modifiers (exclude,
excludeAll, intransitive, withSources, withJavadoc, cross, classifier),
block and inline comments, scope annotations, dependencyOverrides, and
duplicate package detection.
Includes 29 unit tests at 97.8% coverage with test fixtures containing
known-vulnerable packages (Log4Shell, Jackson, Struts2, commons-collections,
SnakeYAML) for security scanning validation.
Co-Authored-By: Claude Opus 4.6 (1M context)
---
docs/sbt-parser-implementation-plan.md | 189 +++++
internal/parsers/sbt/sbt-parser.go | 248 +++++++
internal/parsers/sbt/sbt-parser_test.go | 820 ++++++++++++++++++++++
internal/testdata/build.sbt | 37 +
internal/testdata/plugins.sbt | 4 +
pkg/parser/manifest-file-selector.go | 5 +
pkg/parser/manifest-file-selector_test.go | 27 +
pkg/parser/parser_factory.go | 3 +
8 files changed, 1333 insertions(+)
create mode 100644 docs/sbt-parser-implementation-plan.md
create mode 100644 internal/parsers/sbt/sbt-parser.go
create mode 100644 internal/parsers/sbt/sbt-parser_test.go
create mode 100644 internal/testdata/build.sbt
create mode 100644 internal/testdata/plugins.sbt
diff --git a/docs/sbt-parser-implementation-plan.md b/docs/sbt-parser-implementation-plan.md
new file mode 100644
index 0000000..c46beed
--- /dev/null
+++ b/docs/sbt-parser-implementation-plan.md
@@ -0,0 +1,189 @@
+# SBT Parser Implementation Plan
+
+## Context
+
+The manifest-parser repository supports Maven, npm, PyPI, Go modules, and .NET. The user needs to extend it with SBT (Scala Build Tool) support to parse SBT manifest files and extract dependencies. The implementation must follow existing patterns exactly, add duplicate detection, include comprehensive tests with vulnerable packages, and integrate cleanly without modifying existing parsers.
+
+### Supported SBT File Types
+
+SBT uses multiple file types that can declare dependencies. The parser supports **all `.sbt` files** via extension-based matching (like `.csproj` for dotnet):
+
+| File | Purpose | Syntax |
+|------|---------|--------|
+| `build.sbt` | Primary build definition | `libraryDependencies += "g" % "a" % "v"` |
+| `plugins.sbt` | SBT plugin dependencies (in `project/`) | `addSbtPlugin("g" % "a" % "v")` |
+| `dependencies.sbt` | Separated dependency definitions | Same as `build.sbt` |
+| Any other `*.sbt` | SBT auto-loads all `.sbt` files in project root | Same as `build.sbt` |
+
+The core dependency regex `"g" % "a" % "v"` matches inside any wrapper (`addSbtPlugin(...)`, `libraryDependencies +=`, bare declarations), so all these file types are handled by the same parser with no special-casing needed.
+
+---
+
+## Files to Create (3)
+
+### 1. `internal/parsers/sbt/sbt-parser.go` โ Core Parser
+
+**Package:** `sbt` | **Struct:** `SbtParser{}` | **PackageManager string:** `"sbt"`
+
+**Parsing Strategy โ Two-pass, regex-based (like PyPI parser but with Seq-block state tracking):**
+
+- **Pass 1:** Extract variable definitions into `map[string]string`
+- **Pass 2:** Line-by-line dependency extraction with state machine for `Seq(...)` blocks
+
+#### Variable Extraction (Pass 1)
+
+Supports all Scala variable declaration forms used in SBT files:
+
+| Pattern | Example | Regex |
+|---------|---------|-------|
+| `val` | `val v = "1.0"` | `^\s*val\s+(\w+)\s*=\s*"([^"]+)"` |
+| `lazy val` | `lazy val v = "1.0"` | `^\s*lazy\s+val\s+(\w+)\s*=\s*"([^"]+)"` |
+| `def` | `def v = "1.0"` | `^\s*def\s+(\w+)\s*=\s*"([^"]+)"` |
+
+All three patterns are combined into a single regex:
+```
+^\s*(?:lazy\s+)?(?:val|def)\s+(\w+)\s*=\s*"([^"]+)"
+```
+
+#### Dependency Extraction (Pass 2)
+
+**Core dependency regex:**
+```
+"([^"]+)"\s+(%{1,3})\s+"([^"]+)"\s+%\s+(?:"([^"]+)"|(\w+))(?:\s+%\s+(?:"[^"]*"|\w+))?
+```
+Captures: groupId, operator (`%`/`%%`/`%%%`), artifactId, version (quoted or variable name), optional scope (ignored).
+
+#### Helper functions:
+- `extractVariables(lines []string) map[string]string` โ supports `val`, `lazy val`, and `def`
+- `resolveVersion(version string, vars map[string]string) string` โ exact version as-is, variable lookup, unresolvable โ `"latest"`
+- `stripComments(line string, inBlockComment *bool) string` โ handles `//` and `/* */`
+- `computeLocationIndices(rawLine, groupId) (int, int)` โ calculates start/end with modifier-aware trimming
+
+#### Duplicate detection:
+`map[string]bool` keyed by `"groupId:artifactId"`. Skip duplicates silently (no `log.Printf` โ this is a library, not a CLI; callers control their own logging).
+
+#### Comment handling:
+Strip `//` inline comments; track `/* */` block comment state across lines. The `//` stripping is applied **after** the dependency regex match on the raw line, so `//` inside quoted strings in dependency declarations won't cause false truncation.
+
+#### Location tracking:
+Single `Location` per package (like PyPI), `Line` is 0-indexed:
+- `StartIndex` = position of first `"` of groupId in the raw line
+- `EndIndex` = end of the dependency declaration, **excluding** trailing modifiers
+
+**Modifier-aware EndIndex calculation:** The `computeLocationIndices` function trims the following patterns from the end of the line when computing `EndIndex`:
+- Trailing commas and whitespace
+- Dependency modifiers: `exclude(...)`, `excludeAll(...)`, `classifier(...)`, `intransitive()`, `withSources()`, `withJavadoc()`, `cross(...)`
+- Closing parentheses from `addSbtPlugin(...)` or `Seq(...)` wrappers
+- Inline comments (`// ...`)
+
+This ensures the location span covers only the `"g" % "a" % "v"` core declaration.
+
+#### Imports:
+Only stdlib โ `os`, `regexp`, `strings`, `fmt` + `models` package. No `log` import (library code should not write to stderr).
+
+### 2. `internal/testdata/build.sbt` and `internal/testdata/plugins.sbt` โ Test Fixtures
+
+**`build.sbt`** โ Contains known-vulnerable dependencies:
+- **log4j-core 2.14.0** (CVE-2021-44228 โ Log4Shell)
+- **jackson-databind 2.13.0** (multiple CVEs)
+- **struts2-core 2.5.20** (CVE-2020-17530)
+- **commons-collections 3.2.1** (deserialization vulnerability)
+- **snakeyaml 1.26** (CVE-2022-1471)
+
+Exercises all parsing scenarios: `%`, `%%`, `%%%`, `Seq(...)`, variable-based versions, inline comments, block comments, scope annotations.
+
+**`plugins.sbt`** โ Contains SBT plugin dependencies using `addSbtPlugin(...)` syntax to validate that the parser handles `plugins.sbt` files correctly.
+
+### 3. `internal/parsers/sbt/sbt-parser_test.go` โ Comprehensive Tests
+
+**Table-driven + individual tests following Maven/PyPI patterns:**
+
+| # | Test | What it validates |
+|---|------|-------------------|
+| 1 | TestParseSingleDependency | Basic `libraryDependencies += "g" % "a" % "v"` |
+| 2 | TestParseSingleDependencyDoublePercent | `%%` operator โ PackageName is `g:a` (no Scala suffix) |
+| 3 | TestParseSingleDependencyTriplePercent | `%%%` operator (Scala.js) โ same as `%%` |
+| 4 | TestParseSeqBlock | `libraryDependencies ++= Seq(...)` with multiple deps |
+| 5 | TestParseWithScope | Trailing `% "test"` or `% Test` โ parsed correctly, scope ignored |
+| 6 | TestParseWithVariableVersion | `val v = "1.0"` then `% v` โ resolves to `"1.0"` |
+| 7 | TestParseWithUnresolvableVariable | Missing variable โ version is `"latest"` |
+| 8 | TestParseSingleLineComment | `//` comments are skipped |
+| 9 | TestParseBlockComment | `/* ... */` spanning lines โ deps inside skipped |
+| 10 | TestParseEmptyFile | Returns empty slice, no error |
+| 11 | TestParseDuplicateDependencies | Same `g:a` twice โ first wins, second skipped |
+| 12 | TestParseLocationAccuracy | Verify exact Line, StartIndex, EndIndex values |
+| 13 | TestParseNonExistentFile | Returns error |
+| 14 | TestParseMixedOperators | Mix of `%` and `%%` in same Seq |
+| 15 | TestResolveVersion | Table-driven: exact, variable, missing, empty |
+| 16 | TestParseAddSbtPlugin | `addSbtPlugin("g" % "a" % "v")` syntax from `plugins.sbt` |
+| 17 | TestParseLazyVal | `lazy val v = "1.0"` โ variable extracted and resolved |
+| 18 | TestParseDef | `def v = "1.0"` โ variable extracted and resolved |
+| 19 | TestParseWithExclude | `"g" % "a" % "v" exclude("x", "y")` โ parsed, EndIndex excludes modifier |
+| 20 | TestParseWithIntransitive | `"g" % "a" % "v" intransitive()` โ parsed, EndIndex excludes modifier |
+| 21 | TestParseWithCross | `"g" % "a" % "v" cross CrossVersion.full` โ parsed, EndIndex excludes modifier |
+| 22 | TestParseWithExcludeAll | `"g" % "a" % "v" excludeAll(...)` โ parsed, EndIndex excludes modifier |
+| 23 | TestParseDependencyOverrides | `dependencyOverrides += "g" % "a" % "v"` โ parsed correctly |
+| 24 | TestParseWithClassifier | `"g" % "a" % "v" % "test" classifier "tests"` โ parsed, classifier ignored |
+| 25 | TestExtractVariables | Table-driven: val, lazy val, def, commented out, indented |
+| 26 | TestSbtParser_Parse_RealFile | Parse `../../testdata/build.sbt` and validate against expected packages |
+| 27 | TestSbtParser_Parse_PluginsFile | Parse `../../testdata/plugins.sbt` and validate plugin dependencies |
+
+---
+
+## Files to Modify (3)
+
+### 4. `pkg/parser/manifest-file-selector.go`
+
+- Add `SbtBuild` to the `Manifest` iota enum (after `GoMod`)
+- Add extension-based detection: `if manifestFileExtension == ".sbt" { return SbtBuild }`
+ - This matches **all** `.sbt` files (`build.sbt`, `plugins.sbt`, `dependencies.sbt`, etc.)
+ - Follows the same pattern used for `.csproj` detection
+
+### 5. `pkg/parser/parser_factory.go`
+
+- Add import: `"github.com/Checkmarx/manifest-parser/internal/parsers/sbt"`
+- Add case: `case SbtBuild: return &sbt.SbtParser{}`
+
+### 6. `pkg/parser/manifest-file-selector_test.go`
+
+- Add `TestManifestFileSelector_ExpectSbtBuild` test for `build.sbt`
+- Add `TestManifestFileSelector_ExpectSbtPlugins` test for `plugins.sbt`
+- Add `TestManifestFileSelector_ExpectSbtCustom` test for `dependencies.sbt`
+
+---
+
+## Implementation Order
+
+1. Create `internal/parsers/sbt/sbt-parser.go` (core parser)
+2. Create `internal/testdata/build.sbt` (test fixture)
+3. Create `internal/parsers/sbt/sbt-parser_test.go` (tests)
+4. Modify `pkg/parser/manifest-file-selector.go` (enum + detection)
+5. Modify `pkg/parser/manifest-file-selector_test.go` (selector test)
+6. Modify `pkg/parser/parser_factory.go` (factory registration)
+7. Run `go test ./...` to verify all tests pass with no regressions
+
+## Verification
+
+1. `go build ./...` โ compiles cleanly
+2. `go test ./internal/parsers/sbt/ -v` โ all SBT parser tests pass
+3. `go test ./pkg/parser/ -v` โ selector + factory tests pass (including new SBT test)
+4. `go test ./... -v` โ full suite, no regressions
+5. `go test ./... -cover` โ check coverage
+6. `go run cmd/main.go internal/testdata/build.sbt` โ produces correct JSON output
+7. `go run cmd/main.go internal/testdata/plugins.sbt` โ produces correct JSON output for plugin dependencies
+
+---
+
+## Production-Readiness Hardening (v2)
+
+The following gaps were identified after initial implementation and are addressed in the updated parser:
+
+| # | Gap | Fix | Impact |
+|---|-----|-----|--------|
+| 1 | `lazy val` not matched | Extend varRegex to `(?:lazy\s+)?(?:val\|def)` | **High** โ many real projects use `lazy val` |
+| 2 | `def` declarations not matched | Same regex extension | **Medium** โ some projects use `def` for versions |
+| 3 | Modifiers corrupt EndIndex | `computeLocationIndices` trims `exclude(...)`, `intransitive()`, `withSources()`, `withJavadoc()`, `cross(...)`, `classifier(...)` | **Medium** โ common in complex builds |
+| 4 | Closing `)` from wrappers in EndIndex | Trim trailing `)` after modifiers | **Medium** โ affects `addSbtPlugin(...)` |
+| 5 | `log.Printf` in library code | Remove all `log.Printf` calls โ library consumers control their own logging | **Medium** โ breaks clean library usage |
+| 6 | `dependencyOverrides` not tested | Already works (regex is context-free), add explicit test | **Low** โ verification only |
+| 7 | `classifier` keyword | Already handled by optional scope group in regex, add explicit test | **Low** โ verification only |
\ No newline at end of file
diff --git a/internal/parsers/sbt/sbt-parser.go b/internal/parsers/sbt/sbt-parser.go
new file mode 100644
index 0000000..0946913
--- /dev/null
+++ b/internal/parsers/sbt/sbt-parser.go
@@ -0,0 +1,248 @@
+package sbt
+
+import (
+ "fmt"
+ "os"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+// SbtParser implements parsing of SBT .sbt files (build.sbt, plugins.sbt, etc.)
+type SbtParser struct{}
+
+var (
+ // varRegex matches Scala variable declarations:
+ // val name = "value"
+ // lazy val name = "value"
+ // def name = "value"
+ varRegex = regexp.MustCompile(`^\s*(?:lazy\s+)?(?:val|def)\s+(\w+)\s*=\s*"([^"]+)"`)
+
+ // depRegex matches SBT dependency declarations:
+ // "groupId" % "artifactId" % "version"
+ // "groupId" %% "artifactId" % "version"
+ // "groupId" %%% "artifactId" % "version"
+ // "groupId" % "artifactId" % variableName
+ // With optional trailing scope: % "test" or % Test
+ depRegex = regexp.MustCompile(`"([^"]+)"\s+(%{1,3})\s+"([^"]+)"\s+%\s+(?:"([^"]+)"|(\w+))(?:\s+%\s+(?:"[^"]*"|\w+))?`)
+)
+
+// extractVariables scans lines for val declarations and returns a variable map
+func extractVariables(lines []string) map[string]string {
+ vars := make(map[string]string)
+ inBlockComment := false
+
+ for _, rawLine := range lines {
+ line := stripComments(rawLine, &inBlockComment)
+ if inBlockComment {
+ continue
+ }
+ if match := varRegex.FindStringSubmatch(line); match != nil {
+ vars[match[1]] = match[2]
+ }
+ }
+
+ return vars
+}
+
+// resolveVersion resolves a version string using the variable map
+func resolveVersion(version string, vars map[string]string) string {
+ if version == "" {
+ return "latest"
+ }
+ // If it looks like a literal version (starts with digit or contains dots/hyphens typical of versions), return as-is
+ if len(version) > 0 && (version[0] >= '0' && version[0] <= '9') {
+ return version
+ }
+ // Try to resolve as a variable
+ if resolved, exists := vars[version]; exists {
+ return resolved
+ }
+ return "latest"
+}
+
+// stripComments removes comments from a line and tracks block comment state
+func stripComments(line string, inBlockComment *bool) string {
+ if *inBlockComment {
+ if idx := strings.Index(line, "*/"); idx >= 0 {
+ *inBlockComment = false
+ line = line[idx+2:]
+ } else {
+ return ""
+ }
+ }
+
+ // Handle inline block comments: /* ... */ on the same line
+ for {
+ startIdx := strings.Index(line, "/*")
+ if startIdx < 0 {
+ break
+ }
+ endIdx := strings.Index(line[startIdx+2:], "*/")
+ if endIdx >= 0 {
+ // Block comment opens and closes on same line
+ line = line[:startIdx] + line[startIdx+2+endIdx+2:]
+ } else {
+ // Block comment opens but doesn't close โ entering block comment
+ *inBlockComment = true
+ line = line[:startIdx]
+ break
+ }
+ }
+
+ // Handle single-line comments
+ if idx := strings.Index(line, "//"); idx >= 0 {
+ line = line[:idx]
+ }
+
+ return line
+}
+
+// modifierKeywords are SBT dependency modifiers that should be excluded from the location span.
+// The EndIndex should cover only the core "g" % "a" % "v" declaration.
+var modifierKeywords = []string{
+ "exclude(",
+ "excludeAll(",
+ "intransitive()",
+ "withSources()",
+ "withJavadoc()",
+ "classifier ",
+ "classifier(",
+ "cross ",
+ "cross(",
+}
+
+// computeLocationIndices calculates start and end indices for a dependency in a raw line.
+// StartIndex = position of the first quote of the groupId.
+// EndIndex = end of the core dependency declaration, excluding modifiers, comments, and trailing punctuation.
+func computeLocationIndices(rawLine string, groupId string) (int, int) {
+ // StartIndex: position of the first quote of the groupId
+ searchStr := `"` + groupId + `"`
+ startIdx := strings.Index(rawLine, searchStr)
+ if startIdx < 0 {
+ startIdx = 0
+ }
+
+ // Start with the full line
+ endIdx := len(rawLine)
+
+ // If there's a trailing comment, stop before it
+ if commentIdx := strings.Index(rawLine, "//"); commentIdx >= 0 && commentIdx < endIdx {
+ endIdx = commentIdx
+ }
+
+ // Trim known dependency modifiers first (before punctuation removal,
+ // so keywords like "intransitive()" are still intact when searched)
+ endIdx = trimModifiers(rawLine, startIdx, endIdx)
+
+ // Trim trailing whitespace, commas, and closing parentheses
+ endIdx = trimTrailingPunctuation(rawLine, endIdx)
+
+ return startIdx, endIdx
+}
+
+// trimTrailingPunctuation removes trailing whitespace, commas, and closing parens from the end boundary
+func trimTrailingPunctuation(line string, endIdx int) int {
+ for endIdx > 0 {
+ ch := line[endIdx-1]
+ if ch == ' ' || ch == '\t' || ch == ',' || ch == ')' {
+ endIdx--
+ } else {
+ break
+ }
+ }
+ return endIdx
+}
+
+// trimModifiers scans the region [startIdx, endIdx) for modifier keywords and truncates endIdx
+// to exclude them. Works backwards so nested modifiers are stripped in order.
+func trimModifiers(line string, startIdx int, endIdx int) int {
+ region := line[startIdx:endIdx]
+ for _, kw := range modifierKeywords {
+ if idx := strings.Index(region, kw); idx >= 0 {
+ // Truncate at the modifier keyword
+ candidate := startIdx + idx
+ // Only trim if the modifier comes after the core dependency (at least "g" % "a" % "v")
+ if candidate > startIdx && candidate < endIdx {
+ endIdx = candidate
+ }
+ }
+ }
+ return endIdx
+}
+
+// Parse implements the Parser interface for SBT build.sbt files
+func (p *SbtParser) Parse(manifestFile string) ([]models.Package, error) {
+ content, err := os.ReadFile(manifestFile)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read manifest file: %w", err)
+ }
+
+ lines := strings.Split(strings.ReplaceAll(string(content), "\r\n", "\n"), "\n")
+
+ // Pass 1: Extract variable definitions
+ vars := extractVariables(lines)
+
+ // Pass 2: Extract dependencies
+ var packages []models.Package
+ seen := make(map[string]bool)
+ inBlockComment := false
+
+ for lineNum, rawLine := range lines {
+ line := stripComments(rawLine, &inBlockComment)
+ if inBlockComment {
+ continue
+ }
+
+ line = strings.TrimSpace(line)
+ if line == "" {
+ continue
+ }
+
+ // Try to extract dependency from this line
+ match := depRegex.FindStringSubmatch(line)
+ if match == nil {
+ continue
+ }
+
+ groupId := match[1]
+ // match[2] is the operator (%, %%, %%%) โ captured but not used
+ artifactId := match[3]
+ quotedVersion := match[4] // version from quoted string
+ bareVersion := match[5] // version from variable name
+
+ var version string
+ if quotedVersion != "" {
+ version = quotedVersion
+ } else if bareVersion != "" {
+ version = resolveVersion(bareVersion, vars)
+ } else {
+ version = "latest"
+ }
+
+ // Build package key for duplicate detection
+ pkgKey := groupId + ":" + artifactId
+ if seen[pkgKey] {
+ continue
+ }
+ seen[pkgKey] = true
+
+ // Calculate location
+ startIdx, endIdx := computeLocationIndices(rawLine, groupId)
+
+ packages = append(packages, models.Package{
+ PackageManager: "sbt",
+ PackageName: pkgKey,
+ Version: version,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: lineNum,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
+ }},
+ })
+ }
+
+ return packages, nil
+}
diff --git a/internal/parsers/sbt/sbt-parser_test.go b/internal/parsers/sbt/sbt-parser_test.go
new file mode 100644
index 0000000..063a9bf
--- /dev/null
+++ b/internal/parsers/sbt/sbt-parser_test.go
@@ -0,0 +1,820 @@
+package sbt
+
+import (
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/Checkmarx/manifest-parser/internal/testdata"
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+func TestParseSingleDependency(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "sbt",
+ PackageName: "org.example:test-lib",
+ Version: "1.0.0",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 0,
+ StartIndex: 23,
+ EndIndex: 59,
+ }},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestParseSingleDependencyDoublePercent(t *testing.T) {
+ content := `libraryDependencies += "org.typelevel" %% "cats-core" % "2.9.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "sbt",
+ PackageName: "org.typelevel:cats-core",
+ Version: "2.9.0",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 0,
+ StartIndex: 23,
+ EndIndex: 63,
+ }},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestParseSingleDependencyTriplePercent(t *testing.T) {
+ content := `libraryDependencies += "org.scala-js" %%% "scalajs-dom" % "2.4.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "sbt",
+ PackageName: "org.scala-js:scalajs-dom",
+ Version: "2.4.0",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 0,
+ StartIndex: 23,
+ EndIndex: 65,
+ }},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestParseSeqBlock(t *testing.T) {
+ content := `libraryDependencies ++= Seq(
+ "org.example" % "lib-a" % "1.0.0",
+ "org.example" % "lib-b" % "2.0.0"
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.example:lib-a" {
+ t.Errorf("expected pkg[0].PackageName = org.example:lib-a, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "1.0.0" {
+ t.Errorf("expected pkg[0].Version = 1.0.0, got %s", pkgs[0].Version)
+ }
+ if pkgs[1].PackageName != "org.example:lib-b" {
+ t.Errorf("expected pkg[1].PackageName = org.example:lib-b, got %s", pkgs[1].PackageName)
+ }
+ if pkgs[1].Version != "2.0.0" {
+ t.Errorf("expected pkg[1].Version = 2.0.0, got %s", pkgs[1].Version)
+ }
+}
+
+func TestParseWithScope(t *testing.T) {
+ content := `libraryDependencies += "org.scalatest" %% "scalatest" % "3.2.15" % "test"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.scalatest:scalatest" {
+ t.Errorf("expected PackageName = org.scalatest:scalatest, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "3.2.15" {
+ t.Errorf("expected Version = 3.2.15, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseWithVariableVersion(t *testing.T) {
+ content := `val jacksonVersion = "2.13.0"
+libraryDependencies += "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ if pkgs[0].Version != "2.13.0" {
+ t.Errorf("expected Version = 2.13.0, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseWithUnresolvableVariable(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % unknownVar
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected Version = latest, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseSingleLineComment(t *testing.T) {
+ content := `// "org.example" % "should-not-parse" % "1.0.0"
+libraryDependencies += "org.example" % "real-lib" % "1.0.0" // inline comment
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.example:real-lib" {
+ t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName)
+ }
+}
+
+func TestParseBlockComment(t *testing.T) {
+ content := `/*
+ "org.example" % "should-not-parse" % "1.0.0"
+*/
+libraryDependencies += "org.example" % "real-lib" % "2.0.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.example:real-lib" {
+ t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "2.0.0" {
+ t.Errorf("expected Version = 2.0.0, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseEmptyFile(t *testing.T) {
+ content := ""
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 0 {
+ t.Fatalf("expected 0 packages, got %d", len(pkgs))
+ }
+}
+
+func TestParseDuplicateDependencies(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0"
+libraryDependencies += "org.example" % "test-lib" % "2.0.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package (duplicate skipped), got %d", len(pkgs))
+ }
+
+ if pkgs[0].Version != "1.0.0" {
+ t.Errorf("expected first occurrence version 1.0.0, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseLocationAccuracy(t *testing.T) {
+ // Line: "org.example" % "test-lib" % "1.0.0"
+ // Positions: 0123456789...
+ content := `"org.example" % "test-lib" % "1.0.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "sbt",
+ PackageName: "org.example:test-lib",
+ Version: "1.0.0",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 0,
+ StartIndex: 0,
+ EndIndex: 36,
+ }},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestParseNonExistentFile(t *testing.T) {
+ parser := &SbtParser{}
+ _, err := parser.Parse("/nonexistent/build.sbt")
+ if err == nil {
+ t.Error("expected error for non-existent file, got none")
+ }
+}
+
+func TestParseMixedOperators(t *testing.T) {
+ content := `libraryDependencies ++= Seq(
+ "org.example" % "lib-a" % "1.0.0",
+ "org.typelevel" %% "cats-core" % "2.9.0",
+ "org.scala-js" %%% "scalajs-dom" % "2.4.0"
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 3 {
+ t.Fatalf("expected 3 packages, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.example:lib-a" {
+ t.Errorf("expected pkg[0] = org.example:lib-a, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[1].PackageName != "org.typelevel:cats-core" {
+ t.Errorf("expected pkg[1] = org.typelevel:cats-core, got %s", pkgs[1].PackageName)
+ }
+ if pkgs[2].PackageName != "org.scala-js:scalajs-dom" {
+ t.Errorf("expected pkg[2] = org.scala-js:scalajs-dom, got %s", pkgs[2].PackageName)
+ }
+}
+
+func TestParseMalformedLine(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib"
+libraryDependencies += "org.example" % "real-lib" % "1.0.0"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package (malformed skipped), got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "org.example:real-lib" {
+ t.Errorf("expected PackageName = org.example:real-lib, got %s", pkgs[0].PackageName)
+ }
+}
+
+func TestResolveVersion(t *testing.T) {
+ vars := map[string]string{
+ "jacksonVersion": "2.13.0",
+ "log4jVersion": "2.14.0",
+ }
+
+ tests := []struct {
+ name string
+ version string
+ expected string
+ }{
+ {"exact version", "1.2.3", "1.2.3"},
+ {"variable lookup", "jacksonVersion", "2.13.0"},
+ {"another variable", "log4jVersion", "2.14.0"},
+ {"missing variable", "unknownVar", "latest"},
+ {"empty version", "", "latest"},
+ {"semver with pre-release", "2.0.0-RC1", "2.0.0-RC1"},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := resolveVersion(tt.version, vars)
+ if result != tt.expected {
+ t.Errorf("resolveVersion(%q) = %q, want %q", tt.version, result, tt.expected)
+ }
+ })
+ }
+}
+
+func TestStripComments(t *testing.T) {
+ tests := []struct {
+ name string
+ line string
+ inBlockComment bool
+ expected string
+ expectedBlock bool
+ }{
+ {"no comments", `"org.example" % "lib" % "1.0"`, false, `"org.example" % "lib" % "1.0"`, false},
+ {"single line comment", `"org.example" % "lib" % "1.0" // comment`, false, `"org.example" % "lib" % "1.0" `, false},
+ {"full line comment", `// this is a comment`, false, ``, false},
+ {"block comment start", `/* start of block`, false, ``, true},
+ {"inside block comment", ` some content inside block`, true, ``, true},
+ {"block comment end", `end of block */`, true, ``, false},
+ {"inline block comment", `before /* inside */ after`, false, `before after`, false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ inBlock := tt.inBlockComment
+ result := stripComments(tt.line, &inBlock)
+ if result != tt.expected {
+ t.Errorf("stripComments(%q) = %q, want %q", tt.line, result, tt.expected)
+ }
+ if inBlock != tt.expectedBlock {
+ t.Errorf("inBlockComment = %v, want %v", inBlock, tt.expectedBlock)
+ }
+ })
+ }
+}
+
+func TestExtractVariables(t *testing.T) {
+ lines := []string{
+ `val jacksonVersion = "2.13.0"`,
+ `lazy val log4jVersion = "2.14.0"`,
+ `def strutsVersion = "2.5.20"`,
+ `// val commentedOut = "1.0.0"`,
+ `name := "my-project"`,
+ `val emptyLine`,
+ ` val indentedVar = "3.0.0"`,
+ ` lazy val indentedLazy = "4.0.0"`,
+ ` def indentedDef = "5.0.0"`,
+ }
+
+ vars := extractVariables(lines)
+
+ expected := map[string]string{
+ "jacksonVersion": "2.13.0",
+ "log4jVersion": "2.14.0",
+ "strutsVersion": "2.5.20",
+ "indentedVar": "3.0.0",
+ "indentedLazy": "4.0.0",
+ "indentedDef": "5.0.0",
+ }
+
+ if len(vars) != len(expected) {
+ t.Fatalf("expected %d variables, got %d: %v", len(expected), len(vars), vars)
+ }
+
+ for key, want := range expected {
+ got, exists := vars[key]
+ if !exists {
+ t.Errorf("expected variable %q not found", key)
+ continue
+ }
+ if got != want {
+ t.Errorf("variable %q = %q, want %q", key, got, want)
+ }
+ }
+}
+
+func TestParseAddSbtPlugin(t *testing.T) {
+ content := `addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0")
+addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "plugins.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ if pkgs[0].PackageName != "com.eed3si9n:sbt-assembly" {
+ t.Errorf("expected pkg[0].PackageName = com.eed3si9n:sbt-assembly, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "2.1.0" {
+ t.Errorf("expected pkg[0].Version = 2.1.0, got %s", pkgs[0].Version)
+ }
+ if pkgs[1].PackageName != "org.scalameta:sbt-scalafmt" {
+ t.Errorf("expected pkg[1].PackageName = org.scalameta:sbt-scalafmt, got %s", pkgs[1].PackageName)
+ }
+ if pkgs[1].Version != "2.5.2" {
+ t.Errorf("expected pkg[1].Version = 2.5.2, got %s", pkgs[1].Version)
+ }
+}
+
+func TestParseLazyVal(t *testing.T) {
+ content := `lazy val myVersion = "3.1.0"
+libraryDependencies += "org.example" % "test-lib" % myVersion
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "3.1.0" {
+ t.Errorf("expected Version = 3.1.0, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseDef(t *testing.T) {
+ content := `def myVersion = "4.2.0"
+libraryDependencies += "org.example" % "test-lib" % myVersion
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "4.2.0" {
+ t.Errorf("expected Version = 4.2.0, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseWithExclude(t *testing.T) {
+ content := `libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12")
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "org.apache.hadoop:hadoop-common" {
+ t.Errorf("expected PackageName = org.apache.hadoop:hadoop-common, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "3.3.4" {
+ t.Errorf("expected Version = 3.3.4, got %s", pkgs[0].Version)
+ }
+ // EndIndex should NOT include the exclude(...) modifier
+ loc := pkgs[0].Locations[0]
+ rawLine := `libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12")`
+ excludeStart := strings.Index(rawLine, " exclude(")
+ if loc.EndIndex > excludeStart {
+ t.Errorf("EndIndex %d extends into exclude(...) modifier (starts at %d)", loc.EndIndex, excludeStart)
+ }
+}
+
+func TestParseWithIntransitive(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" intransitive()
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ // EndIndex should NOT include the intransitive() modifier
+ loc := pkgs[0].Locations[0]
+ rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" intransitive()`
+ modifierStart := strings.Index(rawLine, " intransitive()")
+ if loc.EndIndex > modifierStart {
+ t.Errorf("EndIndex %d extends into intransitive() modifier (starts at %d)", loc.EndIndex, modifierStart)
+ }
+}
+
+func TestParseWithCross(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" cross CrossVersion.full
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ loc := pkgs[0].Locations[0]
+ rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" cross CrossVersion.full`
+ modifierStart := strings.Index(rawLine, " cross ")
+ if loc.EndIndex > modifierStart {
+ t.Errorf("EndIndex %d extends into cross modifier (starts at %d)", loc.EndIndex, modifierStart)
+ }
+}
+
+func TestParseWithExcludeAll(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" excludeAll(ExclusionRule("org.slf4j"))
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ loc := pkgs[0].Locations[0]
+ rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" excludeAll(ExclusionRule("org.slf4j"))`
+ modifierStart := strings.Index(rawLine, " excludeAll(")
+ if loc.EndIndex > modifierStart {
+ t.Errorf("EndIndex %d extends into excludeAll(...) modifier (starts at %d)", loc.EndIndex, modifierStart)
+ }
+}
+
+func TestParseDependencyOverrides(t *testing.T) {
+ content := `dependencyOverrides += "com.google.guava" % "guava" % "32.1.2-jre"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "com.google.guava:guava" {
+ t.Errorf("expected PackageName = com.google.guava:guava, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "32.1.2-jre" {
+ t.Errorf("expected Version = 32.1.2-jre, got %s", pkgs[0].Version)
+ }
+}
+
+func TestParseWithClassifier(t *testing.T) {
+ content := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" % "test" classifier "tests"
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "org.example:test-lib" {
+ t.Errorf("expected PackageName = org.example:test-lib, got %s", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "1.0.0" {
+ t.Errorf("expected Version = 1.0.0, got %s", pkgs[0].Version)
+ }
+ loc := pkgs[0].Locations[0]
+ rawLine := `libraryDependencies += "org.example" % "test-lib" % "1.0.0" % "test" classifier "tests"`
+ modifierStart := strings.Index(rawLine, " classifier ")
+ if loc.EndIndex > modifierStart {
+ t.Errorf("EndIndex %d extends into classifier modifier (starts at %d)", loc.EndIndex, modifierStart)
+ }
+}
+
+func TestSbtParser_Parse_RealFile(t *testing.T) {
+ parser := &SbtParser{}
+ manifestFile := "../../testdata/build.sbt"
+ packages, err := parser.Parse(manifestFile)
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ // Verify package count: 10 deps (log4j, cats, jackson, struts, commons, snakeyaml, netty, scalajs, hadoop, guava)
+ if len(packages) != 10 {
+ t.Fatalf("expected 10 packages, got %d", len(packages))
+ }
+
+ // Validate key fields for each package
+ expected := []struct {
+ name string
+ version string
+ line int
+ }{
+ {"org.apache.logging.log4j:log4j-core", "2.14.0", 10},
+ {"org.typelevel:cats-core", "2.9.0", 13},
+ {"com.fasterxml.jackson.core:jackson-databind", "2.13.0", 17},
+ {"org.apache.struts:struts2-core", "2.5.20", 18},
+ {"commons-collections:commons-collections", "3.2.1", 19},
+ {"org.yaml:snakeyaml", "1.26", 20},
+ {"io.netty:netty-codec-http", "4.1.68.Final", 21},
+ {"org.scala-js:scalajs-dom", "2.4.0", 30},
+ {"org.apache.hadoop:hadoop-common", "3.3.4", 33},
+ {"com.google.guava:guava", "32.1.2-jre", 36},
+ }
+
+ for i, exp := range expected {
+ if packages[i].PackageManager != "sbt" {
+ t.Errorf("pkg[%d].PackageManager = %q, want %q", i, packages[i].PackageManager, "sbt")
+ }
+ if packages[i].PackageName != exp.name {
+ t.Errorf("pkg[%d].PackageName = %q, want %q", i, packages[i].PackageName, exp.name)
+ }
+ if packages[i].Version != exp.version {
+ t.Errorf("pkg[%d].Version = %q, want %q", i, packages[i].Version, exp.version)
+ }
+ if packages[i].FilePath != manifestFile {
+ t.Errorf("pkg[%d].FilePath = %q, want %q", i, packages[i].FilePath, manifestFile)
+ }
+ if len(packages[i].Locations) != 1 {
+ t.Errorf("pkg[%d] has %d locations, want 1", i, len(packages[i].Locations))
+ continue
+ }
+ if packages[i].Locations[0].Line != exp.line {
+ t.Errorf("pkg[%d].Location.Line = %d, want %d", i, packages[i].Locations[0].Line, exp.line)
+ }
+ }
+
+ // Verify hadoop exclude modifier is NOT included in EndIndex
+ hadoopPkg := packages[8]
+ if hadoopPkg.Locations[0].EndIndex > 71 {
+ t.Errorf("hadoop EndIndex %d should not extend into exclude(...) modifier", hadoopPkg.Locations[0].EndIndex)
+ }
+}
+
+func TestSbtParser_Parse_PluginsFile(t *testing.T) {
+ parser := &SbtParser{}
+ manifestFile := "../../testdata/plugins.sbt"
+ packages, err := parser.Parse(manifestFile)
+ if err != nil {
+ t.Fatalf("Parse() error = %v", err)
+ }
+
+ expectedPackages := []models.Package{
+ {
+ PackageManager: "sbt",
+ PackageName: "com.eed3si9n:sbt-assembly",
+ Version: "2.1.0",
+ FilePath: manifestFile,
+ },
+ {
+ PackageManager: "sbt",
+ PackageName: "org.scalameta:sbt-scalafmt",
+ Version: "2.5.2",
+ FilePath: manifestFile,
+ },
+ {
+ PackageManager: "sbt",
+ PackageName: "com.github.sbt:sbt-native-packager",
+ Version: "1.9.16",
+ FilePath: manifestFile,
+ },
+ }
+
+ if len(packages) != len(expectedPackages) {
+ t.Fatalf("expected %d packages, got %d", len(expectedPackages), len(packages))
+ }
+
+ for i, pkg := range packages {
+ if pkg.PackageManager != expectedPackages[i].PackageManager {
+ t.Errorf("pkg[%d].PackageManager = %q, want %q", i, pkg.PackageManager, expectedPackages[i].PackageManager)
+ }
+ if pkg.PackageName != expectedPackages[i].PackageName {
+ t.Errorf("pkg[%d].PackageName = %q, want %q", i, pkg.PackageName, expectedPackages[i].PackageName)
+ }
+ if pkg.Version != expectedPackages[i].Version {
+ t.Errorf("pkg[%d].Version = %q, want %q", i, pkg.Version, expectedPackages[i].Version)
+ }
+ }
+}
diff --git a/internal/testdata/build.sbt b/internal/testdata/build.sbt
new file mode 100644
index 0000000..d8b0a94
--- /dev/null
+++ b/internal/testdata/build.sbt
@@ -0,0 +1,37 @@
+// Project settings
+name := "vulnerable-test-project"
+version := "1.0.0"
+scalaVersion := "2.13.12"
+
+val jacksonVersion = "2.13.0"
+lazy val log4jVersion = "2.14.0"
+def strutsVersion = "2.5.20"
+
+// Single dependency with % โ CVE-2021-44228 (Log4Shell)
+libraryDependencies += "org.apache.logging.log4j" % "log4j-core" % log4jVersion
+
+// Single dependency with %% โ safe dependency
+libraryDependencies += "org.typelevel" %% "cats-core" % "2.9.0"
+
+// Seq block with mixed operators and vulnerable packages
+libraryDependencies ++= Seq(
+ "com.fasterxml.jackson.core" % "jackson-databind" % jacksonVersion,
+ "org.apache.struts" % "struts2-core" % strutsVersion,
+ "commons-collections" % "commons-collections" % "3.2.1",
+ "org.yaml" % "snakeyaml" % "1.26",
+ "io.netty" %% "netty-codec-http" % "4.1.68.Final" % "test"
+)
+
+/*
+ This is a block comment โ dependencies here should NOT be parsed
+ "org.example" % "should-not-parse" % "1.0.0"
+*/
+
+// Scala.js dependency with %%%
+libraryDependencies += "org.scala-js" %%% "scalajs-dom" % "2.4.0"
+
+// Dependency with exclude modifier
+libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.4" exclude("org.slf4j", "slf4j-log4j12")
+
+// Dependency override
+dependencyOverrides += "com.google.guava" % "guava" % "32.1.2-jre"
diff --git a/internal/testdata/plugins.sbt b/internal/testdata/plugins.sbt
new file mode 100644
index 0000000..47674cb
--- /dev/null
+++ b/internal/testdata/plugins.sbt
@@ -0,0 +1,4 @@
+// SBT plugins
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0")
+addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
+addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.16")
diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go
index 107f35e..e7aad0b 100644
--- a/pkg/parser/manifest-file-selector.go
+++ b/pkg/parser/manifest-file-selector.go
@@ -17,6 +17,7 @@ const (
GoMod
GradleBuild
GradleVersionCatalog
+ SbtBuild
)
// selectManifestFile a method to select a manifest file type by its name
@@ -37,6 +38,10 @@ func selectManifestFile(manifest string) Manifest {
}
}
+ if manifestFileExtension == ".sbt" {
+ return SbtBuild
+ }
+
if manifestFileName == "pom.xml" {
return MavenPom
}
diff --git a/pkg/parser/manifest-file-selector_test.go b/pkg/parser/manifest-file-selector_test.go
index 8d4d91c..d5e7188 100644
--- a/pkg/parser/manifest-file-selector_test.go
+++ b/pkg/parser/manifest-file-selector_test.go
@@ -66,3 +66,30 @@ func TestManifestFileSelector_ExpectGoMod(t *testing.T) {
t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
}
}
+
+func TestManifestFileSelector_ExpectSbtBuild(t *testing.T) {
+ manifest := "build.sbt"
+ got := selectManifestFile(manifest)
+ want := SbtBuild
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectSbtPlugins(t *testing.T) {
+ manifest := "plugins.sbt"
+ got := selectManifestFile(manifest)
+ want := SbtBuild
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectSbtCustom(t *testing.T) {
+ manifest := "dependencies.sbt"
+ got := selectManifestFile(manifest)
+ want := SbtBuild
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
diff --git a/pkg/parser/parser_factory.go b/pkg/parser/parser_factory.go
index 2d1f0e6..28c9e0a 100644
--- a/pkg/parser/parser_factory.go
+++ b/pkg/parser/parser_factory.go
@@ -7,6 +7,7 @@ import (
"github.com/Checkmarx/manifest-parser/internal/parsers/maven"
"github.com/Checkmarx/manifest-parser/internal/parsers/npm"
"github.com/Checkmarx/manifest-parser/internal/parsers/pypi"
+ "github.com/Checkmarx/manifest-parser/internal/parsers/sbt"
)
func ParsersFactory(manifest string) Parser {
@@ -31,6 +32,8 @@ func ParsersFactory(manifest string) Parser {
return &gradle.GradleParser{}
case GradleVersionCatalog:
return &gradle.VersionCatalogParser{}
+ case SbtBuild:
+ return &sbt.SbtParser{}
default:
return nil
}
From e69e27a4788933a68cbad66421b111d104fcbce6 Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Fri, 15 May 2026 17:41:58 +0530
Subject: [PATCH 09/14] Fix gradle 0-based line contract and harden parsers for
downstream IDE integration
- gradle: emit 0-based line numbers (was off-by-one, broke IDE decorations)
- gradle: default empty catalog versions to "latest" (was causing 400 from realtime-scanner)
- gradle: multi-line dependency locations with rawLines tracking
- maven/dotnet/golang: strip trailing \r so byte offsets are correct on CRLF files
- CLAUDE.md: strengthen 0-based contract, add SBT to parser list
- plugins.sbt: add known-vulnerable packages so IDE decorations can be visually verified
Co-Authored-By: Claude Opus 4.7 (1M context)
---
CLAUDE.md | 10 +-
internal/parsers/dotnet/csproj_parser.go | 5 +-
internal/parsers/golang/go-mod-parser.go | 5 +-
internal/parsers/gradle/gradle_parser.go | 77 ++++++++++++-
internal/parsers/gradle/gradle_parser_test.go | 103 +++++++++++++++---
internal/parsers/gradle/version_catalog.go | 92 ++++++++++++----
internal/parsers/maven/maven-pom-parser.go | 4 +
internal/parsers/sbt/sbt-parser_test.go | 18 +++
internal/testdata/plugins.sbt | 5 +
9 files changed, 270 insertions(+), 49 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index 0ead52f..335d3a6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -98,7 +98,7 @@ type Package struct {
}
type Location struct {
- Line int // 0-based in most parsers (see Project Rules below)
+ Line int // 0-based (all parsers)
StartIndex int // 0-based byte offset from start of line
EndIndex int // 0-based byte offset from start of line
}
@@ -122,15 +122,17 @@ Per-ecosystem parsers live under [internal/parsers/](internal/parsers/):
- `pypi/` โ line-oriented scan of `requirements*.txt` / `packages*.txt`. **Only `package==version` is supported.** Comments (`#`) and environment markers (`;`) are stripped. `PackageManager` = `"pypi"`.
- `golang/` โ uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets. `PackageManager` = `"go"`.
- `dotnet/` โ three parsers: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Bracketed version ranges become `"latest"`. `PackageManager` = `"nuget"` for all three.
+- `sbt/` โ parses any `.sbt` file (`build.sbt`, `project/plugins.sbt`, `version.sbt`, etc.) using line-oriented scanning. Supports Scala build dependencies in `"group" % "name" % "version"` format. `PackageManager` = `"sbt"`.
## Project Rules (Invariants)
-- **`Location.Line` is 0-based** in most parsers (Maven, Go, npm, pypi use `lineNum - 1` or a 0-based counter). Downstream AST-CLI depends on this; don't "fix" it to 1-based without coordinating.
+- **`Location.Line` MUST be 0-based for ALL parsers.** When iterating `for i, line := range lines`, emit `Line: i` โ never `i + 1`. Editors display 1-based line numbers; downstream consumers add `+1` for display. If parser output matches the editor's line number, it's off-by-one.
- **`Location.StartIndex` / `EndIndex` are 0-based byte offsets** from the start of the line. They are byte offsets, not rune/character offsets โ relevant for non-ASCII manifests.
- **Unresolvable or ranged versions resolve to the literal string `"latest"`**, never an empty string. Callers branch on this value.
-- **`PackageManager` strings are part of the contract**: `"gradle"`, `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"`. Don't rename them.
+- **`PackageManager` strings are part of the contract**: `"gradle"`, `"mvn"`, `"npm"`, `"pypi"`, `"go"`, `"nuget"`, `"sbt"`. Don't rename them.
- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child element, close tag). Single-line `Locations` for Maven would be a regression.
- Do not add `ParsersFactory` overloads or alternative entry points without coordinating with AST-CLI.
+- **Do not modify or rename existing `PackageManager` strings**. AST-CLI and Checkmarx One SCA branch on these values โ a silent rename breaks downstream parsing with no compile-time error. If a rename is genuinely required, stop and confirm with the user.
## Testing Strategy
@@ -214,7 +216,7 @@ N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-
go test -v ./internal/parsers/maven/...
```
-3. **Location off-by-one:** check the 0-based invariant โ the parser should use `lineNum - 1` or a 0-based counter. Grep for `Line:` assignments in the affected parser and verify against the fixture.
+3. **Location off-by-one:** Parser violated 0-based contract. Grep for `i + 1` patterns near `Line:` / `LineNum:` assignments โ emit `Line: i`, not `i + 1`.
4. **Version resolves to `"latest"` unexpectedly:** check whether the version string matches a range specifier (`^`, `~`, `[`, `*`) or whether a lock file / properties file is present in the same directory as the fixture.
diff --git a/internal/parsers/dotnet/csproj_parser.go b/internal/parsers/dotnet/csproj_parser.go
index 8e6d342..0cf768e 100644
--- a/internal/parsers/dotnet/csproj_parser.go
+++ b/internal/parsers/dotnet/csproj_parser.go
@@ -116,9 +116,12 @@ func (p *DotnetCsprojParser) Parse(manifestFile string) ([]models.Package, error
return nil, fmt.Errorf("failed to read manifest file: %w", err)
}
- // Split content into lines for index computation
+ // Split content into lines for index computation (strip \r for CRLF files)
strContent := string(content)
lines := strings.Split(strContent, "\n")
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
// Create XML decoder
decoder := xml.NewDecoder(strings.NewReader(strContent))
diff --git a/internal/parsers/golang/go-mod-parser.go b/internal/parsers/golang/go-mod-parser.go
index 0f50f84..6516359 100644
--- a/internal/parsers/golang/go-mod-parser.go
+++ b/internal/parsers/golang/go-mod-parser.go
@@ -25,8 +25,11 @@ func (p *GoModParser) Parse(manifest string) ([]models.Package, error) {
return nil, err
}
- // Split file into lines for position calculation
+ // Split file into lines for position calculation (strip \r for CRLF files)
lines := strings.Split(string(data), "\n")
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
var packages []models.Package
for _, req := range mf.Require {
diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go
index 563793e..5ae2abc 100644
--- a/internal/parsers/gradle/gradle_parser.go
+++ b/internal/parsers/gradle/gradle_parser.go
@@ -131,8 +131,16 @@ func extractVariables(manifestFile, content string) map[string]string {
}
type dependencyStatement struct {
- Line int
- Text string
+ Line int
+ Text string
+ RawLines []rawLineInfo
+}
+
+// rawLineInfo records a single source line that contributes to a dependency statement.
+// Content is the raw line with \r stripped (no other trimming) so byte offsets stay accurate.
+type rawLineInfo struct {
+ LineNum int
+ Content string
}
// parseDependencies parses dependencies from the content
@@ -141,8 +149,9 @@ func parseDependencies(content string, variables map[string]string) []models.Pac
statements := extractDependencyStatements(content)
for _, stmt := range statements {
+ locations := computeGradleLocations(stmt.RawLines)
for _, pkg := range parseDependencyStatement(stmt.Text, variables) {
- pkg.Locations = []models.Location{{Line: stmt.Line}}
+ pkg.Locations = locations
packages = append(packages, pkg)
}
}
@@ -154,10 +163,16 @@ func extractDependencyStatements(content string) []dependencyStatement {
startPattern := regexp.MustCompile(`(?i)\b(` + configKeywords + `)\b`)
var statements []dependencyStatement
var buffer strings.Builder
+ var rawLines []rawLineInfo
active := false
startLine := 0
lines := strings.Split(content, "\n")
+ // Strip trailing \r so byte offsets are consistent on CRLF files
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
+
for i, raw := range lines {
line := strings.TrimSpace(raw)
if line == "" || strings.HasPrefix(line, "//") || strings.HasPrefix(line, "/*") || strings.HasPrefix(line, "*") {
@@ -171,12 +186,13 @@ func extractDependencyStatements(content string) []dependencyStatement {
continue
}
active = true
- startLine = i + 1
+ startLine = i
buffer.Reset()
buffer.WriteString(line)
+ rawLines = []rawLineInfo{{LineNum: i, Content: raw}}
normalized := normalizePlatformDependency(buffer.String())
if dependencyStatementComplete(normalized) {
- statements = append(statements, dependencyStatement{Line: startLine, Text: normalized})
+ statements = append(statements, dependencyStatement{Line: startLine, Text: normalized, RawLines: rawLines})
active = false
}
}
@@ -185,9 +201,10 @@ func extractDependencyStatements(content string) []dependencyStatement {
buffer.WriteString(" ")
buffer.WriteString(line)
+ rawLines = append(rawLines, rawLineInfo{LineNum: i, Content: raw})
normalized := normalizePlatformDependency(buffer.String())
if dependencyStatementComplete(normalized) {
- statements = append(statements, dependencyStatement{Line: startLine, Text: normalized})
+ statements = append(statements, dependencyStatement{Line: startLine, Text: normalized, RawLines: rawLines})
active = false
}
}
@@ -195,6 +212,51 @@ func extractDependencyStatements(content string) []dependencyStatement {
return statements
}
+// computeGradleLocations emits one Location per contributing source line (Maven-style).
+// For each line: StartIndex = offset of first non-whitespace character; EndIndex = end
+// of code on the line, with any trailing // ... comment and trailing whitespace stripped.
+func computeGradleLocations(rawLines []rawLineInfo) []models.Location {
+ locations := make([]models.Location, 0, len(rawLines))
+ for _, rl := range rawLines {
+ code := stripInlineComment(rl.Content)
+ code = strings.TrimRight(code, " \t")
+ if strings.TrimSpace(code) == "" {
+ continue
+ }
+ startIdx := len(rl.Content) - len(strings.TrimLeft(rl.Content, " \t"))
+ locations = append(locations, models.Location{
+ Line: rl.LineNum,
+ StartIndex: startIdx,
+ EndIndex: len(code),
+ })
+ }
+ if len(locations) == 0 {
+ return nil
+ }
+ return locations
+}
+
+// stripInlineComment removes a trailing `// ...` from a Gradle source line,
+// taking quote state into account so // inside a quoted string is preserved.
+func stripInlineComment(line string) string {
+ inSingle := false
+ inDouble := false
+ for i := 0; i < len(line)-1; i++ {
+ ch := line[i]
+ switch {
+ case ch == '\\' && (inSingle || inDouble):
+ i++ // skip escaped char
+ case ch == '\'' && !inDouble:
+ inSingle = !inSingle
+ case ch == '"' && !inSingle:
+ inDouble = !inDouble
+ case !inSingle && !inDouble && ch == '/' && line[i+1] == '/':
+ return line[:i]
+ }
+ }
+ return line
+}
+
func dependencyStatementComplete(statement string) bool {
kw := configKeywords
patterns := []*regexp.Regexp{
@@ -325,6 +387,9 @@ func resolveVariables(str string, variables map[string]string) string {
// cleanVersion handles version ranges and classifiers
func cleanVersion(version string) string {
+ if version == "" {
+ return "latest"
+ }
// Remove brackets for ranges, take the lower bound
if strings.HasPrefix(version, "[") && strings.HasSuffix(version, "]") {
version = strings.Trim(version, "[]")
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
index 5078027..71d948e 100644
--- a/internal/parsers/gradle/gradle_parser_test.go
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -43,7 +43,7 @@ buildscript {
PackageName: "org.springframework:spring-core",
Version: "5.3.0",
Locations: []models.Location{
- {Line: 10},
+ {Line: 9},
},
},
{
@@ -51,7 +51,7 @@ buildscript {
PackageName: "junit:junit",
Version: "4.13",
Locations: []models.Location{
- {Line: 11},
+ {Line: 10},
},
},
{
@@ -59,7 +59,7 @@ buildscript {
PackageName: "com.google.guava:guava",
Version: "30.1-jre",
Locations: []models.Location{
- {Line: 12},
+ {Line: 11},
},
},
{
@@ -67,7 +67,7 @@ buildscript {
PackageName: "org.apache.commons:commons-lang3",
Version: "3.12.0",
Locations: []models.Location{
- {Line: 13},
+ {Line: 12},
},
},
{
@@ -75,7 +75,7 @@ buildscript {
PackageName: "com.android.tools.build:gradle",
Version: "7.0.0",
Locations: []models.Location{
- {Line: 18},
+ {Line: 17},
},
},
},
@@ -101,7 +101,7 @@ dependencies {
PackageName: "org.springframework:spring-core",
Version: "1.4.32",
Locations: []models.Location{
- {Line: 4},
+ {Line: 3},
},
},
{
@@ -109,7 +109,7 @@ dependencies {
PackageName: "org.apache.commons:commons-lang3",
Version: "3.12.0",
Locations: []models.Location{
- {Line: 5},
+ {Line: 4},
},
},
{
@@ -117,7 +117,7 @@ dependencies {
PackageName: "com.google.guava:guava",
Version: "30.1-jre",
Locations: []models.Location{
- {Line: 8},
+ {Line: 7},
},
},
{
@@ -125,7 +125,7 @@ dependencies {
PackageName: "junit:junit",
Version: "1.4.32",
Locations: []models.Location{
- {Line: 10},
+ {Line: 9},
},
},
},
@@ -159,7 +159,7 @@ dependencies {
PackageName: "org.springframework:spring-core",
Version: "5.3.0",
Locations: []models.Location{
- {Line: 6},
+ {Line: 5},
},
},
{
@@ -167,7 +167,7 @@ dependencies {
PackageName: "org.apache.commons:commons-lang3",
Version: "3.12.0",
Locations: []models.Location{
- {Line: 9},
+ {Line: 8},
},
},
{
@@ -175,7 +175,7 @@ dependencies {
PackageName: "junit:junit",
Version: "1.0.0",
Locations: []models.Location{
- {Line: 13},
+ {Line: 12},
},
},
{
@@ -183,7 +183,7 @@ dependencies {
PackageName: "com.google.guava:guava",
Version: "30.1-jre",
Locations: []models.Location{
- {Line: 16},
+ {Line: 15},
},
},
},
@@ -766,3 +766,80 @@ func TestVersionCatalogParser_ParseFile(t *testing.T) {
}
}
}
+
+// TestGradleParser_LocationIndices asserts that the Gradle parser populates
+// StartIndex and EndIndex on each Location, not just Line.
+func TestGradleParser_LocationIndices(t *testing.T) {
+ parser := &GradleParser{}
+ pkgs, err := parser.Parse(filepath.Join("..", "..", "..", "test", "resources", "build.gradle"))
+ if err != nil {
+ t.Fatalf("Failed to parse build.gradle: %v", err)
+ }
+
+ // build.gradle line 40 (1-based):
+ // implementation 'org.apache.logging.log4j:log4j-core:2.14.0' // Log4Shell
+ // 8 spaces + "implementation 'org.apache.logging.log4j:log4j-core:2.14.0'" (= 8 + 59 = 67)
+ cases := map[string]struct {
+ line, startIdx, endIdx int
+ }{
+ "org.apache.logging.log4j:log4j-core": {39, 8, 67},
+ "commons-collections:commons-collections": {40, 8, 70},
+ "org.springframework:spring-web": {45, 8, 69},
+ }
+
+ for _, pkg := range pkgs {
+ want, ok := cases[pkg.PackageName]
+ if !ok {
+ continue
+ }
+ if len(pkg.Locations) == 0 {
+ t.Errorf("%s: no Locations", pkg.PackageName)
+ continue
+ }
+ got := pkg.Locations[0]
+ if got.Line != want.line || got.StartIndex != want.startIdx || got.EndIndex != want.endIdx {
+ t.Errorf("%s: got Location{Line=%d, Start=%d, End=%d}, want {Line=%d, Start=%d, End=%d}",
+ pkg.PackageName, got.Line, got.StartIndex, got.EndIndex, want.line, want.startIdx, want.endIdx)
+ }
+ }
+}
+
+// TestComputeGradleLocations_MultiLine asserts that a dependency spanning multiple
+// source lines produces one Location per non-empty contributing line (Maven-style).
+func TestComputeGradleLocations_MultiLine(t *testing.T) {
+ raws := []rawLineInfo{
+ {LineNum: 5, Content: " implementation("},
+ {LineNum: 6, Content: " \"org.springframework:spring-core:5.3.0\""},
+ {LineNum: 7, Content: " )"},
+ }
+ locs := computeGradleLocations(raws)
+ if len(locs) != 3 {
+ t.Fatalf("expected 3 Locations, got %d", len(locs))
+ }
+ want := []models.Location{
+ {Line: 5, StartIndex: 4, EndIndex: 19}, // " implementation(" length 19
+ {Line: 6, StartIndex: 8, EndIndex: 47}, // 8 spaces + "\"org.springframework:spring-core:5.3.0\"" (39) = 47
+ {Line: 7, StartIndex: 4, EndIndex: 5}, // " )" length 5
+ }
+ for i, w := range want {
+ if locs[i] != w {
+ t.Errorf("loc[%d]: got %+v, want %+v", i, locs[i], w)
+ }
+ }
+}
+
+// TestStripInlineComment verifies trailing // comments are removed but // inside
+// strings is preserved.
+func TestStripInlineComment(t *testing.T) {
+ cases := []struct{ in, out string }{
+ {"implementation 'foo:bar:1.0' // comment", "implementation 'foo:bar:1.0' "},
+ {`implementation "https://example.com"`, `implementation "https://example.com"`},
+ {"no comment here", "no comment here"},
+ {"// whole line is a comment", ""},
+ }
+ for _, c := range cases {
+ if got := stripInlineComment(c.in); got != c.out {
+ t.Errorf("stripInlineComment(%q) = %q, want %q", c.in, got, c.out)
+ }
+ }
+}
diff --git a/internal/parsers/gradle/version_catalog.go b/internal/parsers/gradle/version_catalog.go
index acd9178..05db468 100644
--- a/internal/parsers/gradle/version_catalog.go
+++ b/internal/parsers/gradle/version_catalog.go
@@ -23,17 +23,23 @@ func (p *VersionCatalogParser) Parse(manifestFile string) ([]models.Package, err
var packages []models.Package
// Convert catalog libraries to packages
- lineNum := 1
for _, lib := range catalog.Libraries {
if lib.Group != "" && lib.Name != "" {
+ version := lib.Version
+ if version == "" {
+ version = "latest"
+ }
packages = append(packages, models.Package{
PackageManager: "gradle",
PackageName: lib.Group + ":" + lib.Name,
- Version: lib.Version,
+ Version: version,
FilePath: manifestFile,
- Locations: []models.Location{{Line: lineNum}},
+ Locations: []models.Location{{
+ Line: lib.Line,
+ StartIndex: lib.StartIndex,
+ EndIndex: lib.EndIndex,
+ }},
})
- lineNum++
}
}
@@ -48,9 +54,12 @@ type VersionCatalog struct {
// CatalogLibrary represents a library entry in the version catalog
type CatalogLibrary struct {
- Group string
- Name string
- Version string
+ Group string
+ Name string
+ Version string
+ Line int // 0-based line number in the TOML file
+ StartIndex int // offset of first non-whitespace character on the line
+ EndIndex int // offset just past the last non-whitespace character on the line
}
// findVersionCatalog locates gradle/libs.versions.toml relative to the project root
@@ -76,29 +85,33 @@ func parseVersionCatalog(path string) *VersionCatalog {
}
lines := strings.Split(string(content), "\n")
+ // Strip trailing \r so byte offsets are consistent on CRLF files
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
currentSection := ""
sectionPattern := regexp.MustCompile(`^\s*\[(\w+)\]\s*$`)
simpleKV := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`)
- for _, line := range lines {
- line = strings.TrimSpace(line)
- if line == "" || strings.HasPrefix(line, "#") {
+ for lineIdx, raw := range lines {
+ trimmed := strings.TrimSpace(raw)
+ if trimmed == "" || strings.HasPrefix(trimmed, "#") {
continue
}
- if match := sectionPattern.FindStringSubmatch(line); len(match) > 1 {
+ if match := sectionPattern.FindStringSubmatch(trimmed); len(match) > 1 {
currentSection = match[1]
continue
}
switch currentSection {
case "versions":
- if match := simpleKV.FindStringSubmatch(line); len(match) > 2 {
+ if match := simpleKV.FindStringSubmatch(trimmed); len(match) > 2 {
catalog.Versions[match[1]] = match[2]
}
case "libraries":
- parseCatalogLibraryEntry(line, catalog)
+ parseCatalogLibraryEntry(trimmed, raw, lineIdx, catalog)
}
}
@@ -116,16 +129,23 @@ func parseVersionCatalog(path string) *VersionCatalog {
return catalog
}
-// parseCatalogLibraryEntry parses a single library line from the version catalog
-func parseCatalogLibraryEntry(line string, catalog *VersionCatalog) {
+// parseCatalogLibraryEntry parses a single library line from the version catalog.
+// trimmed is the whitespace-stripped line content used for regex matching;
+// raw is the original line used to compute byte offsets for Location indices.
+func parseCatalogLibraryEntry(trimmed, raw string, lineIdx int, catalog *VersionCatalog) {
+ startIdx, endIdx := lineExtent(raw)
+
// Pattern: key = "group:name:version"
simplePattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*"([^"]+)"\s*$`)
- if match := simplePattern.FindStringSubmatch(line); len(match) > 2 {
+ if match := simplePattern.FindStringSubmatch(trimmed); len(match) > 2 {
parts := strings.Split(match[2], ":")
if len(parts) >= 2 {
lib := CatalogLibrary{
- Group: parts[0],
- Name: parts[1],
+ Group: parts[0],
+ Name: parts[1],
+ Line: lineIdx,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
}
if len(parts) >= 3 {
lib.Version = parts[2]
@@ -140,7 +160,7 @@ func parseCatalogLibraryEntry(line string, catalog *VersionCatalog) {
// Pattern: key = { group = "g", name = "n", version.ref = "xxx" }
// Pattern: key = { group = "g", name = "n", version = "xxx" }
kvPattern := regexp.MustCompile(`^\s*([^\s=]+)\s*=\s*\{(.+)\}\s*$`)
- if match := kvPattern.FindStringSubmatch(line); len(match) > 2 {
+ if match := kvPattern.FindStringSubmatch(trimmed); len(match) > 2 {
key := match[1]
body := match[2]
@@ -176,11 +196,22 @@ func parseCatalogLibraryEntry(line string, catalog *VersionCatalog) {
}
if lib.Group != "" && lib.Name != "" {
+ lib.Line = lineIdx
+ lib.StartIndex = startIdx
+ lib.EndIndex = endIdx
catalog.Libraries[key] = lib
}
}
}
+// lineExtent returns the offset of the first non-whitespace char and the offset
+// just past the last non-whitespace char on the line.
+func lineExtent(line string) (int, int) {
+ startIdx := len(line) - len(strings.TrimLeft(line, " \t"))
+ endIdx := len(strings.TrimRight(line, " \t"))
+ return startIdx, endIdx
+}
+
// catalogKeyToDependency resolves a version catalog accessor (e.g., "spring.core")
// to a library entry. In Gradle, dots in the accessor map to dashes in catalog keys.
func catalogKeyToDependency(ref string, catalog *VersionCatalog) *CatalogLibrary {
@@ -214,23 +245,36 @@ func parseVersionCatalogDependencies(content string, catalog *VersionCatalog) []
pattern := regexp.MustCompile(configPattern)
lines := strings.Split(content, "\n")
+ // Strip trailing \r so byte offsets are consistent on CRLF files
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
for i, raw := range lines {
- line := strings.TrimSpace(raw)
- if line == "" || strings.HasPrefix(line, "//") {
+ trimmed := strings.TrimSpace(raw)
+ if trimmed == "" || strings.HasPrefix(trimmed, "//") {
continue
}
- matches := pattern.FindAllStringSubmatch(line, -1)
+ matches := pattern.FindAllStringSubmatch(trimmed, -1)
for _, match := range matches {
if len(match) > 2 {
ref := match[2]
lib := catalogKeyToDependency(ref, catalog)
if lib != nil && lib.Group != "" && lib.Name != "" {
+ startIdx, endIdx := lineExtent(stripInlineComment(raw))
+ version := lib.Version
+ if version == "" {
+ version = "latest"
+ }
packages = append(packages, models.Package{
PackageManager: "gradle",
PackageName: lib.Group + ":" + lib.Name,
- Version: lib.Version,
- Locations: []models.Location{{Line: i + 1}},
+ Version: version,
+ Locations: []models.Location{{
+ Line: i,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
+ }},
})
}
}
diff --git a/internal/parsers/maven/maven-pom-parser.go b/internal/parsers/maven/maven-pom-parser.go
index 45af00a..10dd625 100644
--- a/internal/parsers/maven/maven-pom-parser.go
+++ b/internal/parsers/maven/maven-pom-parser.go
@@ -209,7 +209,11 @@ func (p *MavenPomParser) Parse(manifestFile string) ([]models.Package, error) {
}
var packages []models.Package
+ // Strip \r for CRLF files so len(line) is correct on Windows
lines := strings.Split(string(content), "\n")
+ for i := range lines {
+ lines[i] = strings.TrimRight(lines[i], "\r")
+ }
// Process only direct dependencies (not managed ones to avoid duplicates)
allDeps := project.Dependencies
diff --git a/internal/parsers/sbt/sbt-parser_test.go b/internal/parsers/sbt/sbt-parser_test.go
index 063a9bf..9ce41fd 100644
--- a/internal/parsers/sbt/sbt-parser_test.go
+++ b/internal/parsers/sbt/sbt-parser_test.go
@@ -800,6 +800,24 @@ func TestSbtParser_Parse_PluginsFile(t *testing.T) {
Version: "1.9.16",
FilePath: manifestFile,
},
+ {
+ PackageManager: "sbt",
+ PackageName: "org.apache.log4j:log4j-core",
+ Version: "2.14.1",
+ FilePath: manifestFile,
+ },
+ {
+ PackageManager: "sbt",
+ PackageName: "org.apache.commons:commons-compress",
+ Version: "1.20",
+ FilePath: manifestFile,
+ },
+ {
+ PackageManager: "sbt",
+ PackageName: "commons-io:commons-io",
+ Version: "2.4",
+ FilePath: manifestFile,
+ },
}
if len(packages) != len(expectedPackages) {
diff --git a/internal/testdata/plugins.sbt b/internal/testdata/plugins.sbt
index 47674cb..87abec4 100644
--- a/internal/testdata/plugins.sbt
+++ b/internal/testdata/plugins.sbt
@@ -2,3 +2,8 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.16")
+
+// Vulnerable dependencies for testing (intentional - to verify IDE decorations)
+addSbtPlugin("org.apache.log4j" % "log4j-core" % "2.14.1")
+addSbtPlugin("org.apache.commons" % "commons-compress" % "1.20")
+addSbtPlugin("commons-io" % "commons-io" % "2.4")
From fedb1461a3ab24ccde7796b38009d6584c4558be Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Mon, 1 Jun 2026 20:35:33 +0530
Subject: [PATCH 10/14] Merge Python parsers and fix Windows CRLF EndIndex bug
Integrated Poetry, Setuptools (setup.cfg/setup.py), and enhanced PyPI parsers
from Sumit's implementation with existing Gradle/SBT support. All Python
parsers return PackageManager="pypi" per design spec. Added testdata fixtures
and comprehensive test coverage.
Key changes:
- Added internal/parsers/{poetry,setuptools}/ with full test suites
- Enhanced internal/parsers/pypi/ with support for 6 Python formats
- Fixed pre-existing CRLF line ending bug affecting golang, dotnet, maven on Windows
- Updated manifest-file-selector.go with SBT, Poetry, and Setuptools routing
- Updated parser_factory.go with dispatchers for all Python ecosystems
- Updated CLAUDE.md with complete architecture and design pattern documentation
All parsers pass unit tests. Manifest-parser builds and runs successfully.
Co-Authored-By: Claude Haiku 4.5
---
CLAUDE.md | 75 ++-
.../parsers/poetry/poetry-pyproject-parser.go | 325 ++++++++++
.../poetry/poetry-pyproject-parser_test.go | 321 ++++++++++
internal/parsers/pypi/pypi-parser.go | 242 +++++++-
internal/parsers/pypi/pypi-parser_test.go | 584 ++++++++++++++++++
.../testdata/requirements-pip-compile.txt | 14 +
.../pypi/testdata/requirements-pip-freeze.txt | 4 +
.../pypi/testdata/requirements-uv-export.txt | 33 +
.../parsers/setuptools/setup_cfg_parser.go | 183 ++++++
.../setup_cfg_parser_edge_cases_test.go | 187 ++++++
.../setuptools/setup_cfg_parser_test.go | 261 ++++++++
.../parsers/setuptools/setup_py_parser.go | 369 +++++++++++
.../setuptools/setup_py_parser_test.go | 226 +++++++
internal/testdata/pyproject.toml | 19 +
internal/testdata/setup.cfg | 19 +
internal/testdata/setup.py | 22 +
pkg/parser/manifest-file-selector.go | 20 +-
pkg/parser/manifest-file-selector_test.go | 99 +++
pkg/parser/parser_factory.go | 8 +
test/resources/build.sbt | 11 +
20 files changed, 2984 insertions(+), 38 deletions(-)
create mode 100644 internal/parsers/poetry/poetry-pyproject-parser.go
create mode 100644 internal/parsers/poetry/poetry-pyproject-parser_test.go
create mode 100644 internal/parsers/pypi/testdata/requirements-pip-compile.txt
create mode 100644 internal/parsers/pypi/testdata/requirements-pip-freeze.txt
create mode 100644 internal/parsers/pypi/testdata/requirements-uv-export.txt
create mode 100644 internal/parsers/setuptools/setup_cfg_parser.go
create mode 100644 internal/parsers/setuptools/setup_cfg_parser_edge_cases_test.go
create mode 100644 internal/parsers/setuptools/setup_cfg_parser_test.go
create mode 100644 internal/parsers/setuptools/setup_py_parser.go
create mode 100644 internal/parsers/setuptools/setup_py_parser_test.go
create mode 100644 internal/testdata/pyproject.toml
create mode 100644 internal/testdata/setup.cfg
create mode 100644 internal/testdata/setup.py
create mode 100644 test/resources/build.sbt
diff --git a/CLAUDE.md b/CLAUDE.md
index 335d3a6..6b5d809 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Project Overview
-Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET, Gradle) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans โ so the `Locations` field is part of the public contract, not a debugging convenience.
+Go module that parses package manifests from multiple ecosystems (Maven, npm, Python, Go, .NET, Gradle, SBT) and returns each declared dependency along with the **exact line/character range** of its declaration. Consumed by [AST-CLI](https://github.com/Checkmarx/ast-cli) to correlate manifest entries with Checkmarx runtime scans โ so the `Locations` field is part of the public contract, not a debugging convenience.
**Status:** Active / maintained. Part of the Checkmarx One SCA pipeline.
@@ -37,7 +37,11 @@ internal/parsers/ Per-ecosystem implementations (not importable by
โโโ gradle/
โโโ maven/
โโโ npm/
- โโโ pypi/
+ โโโ poetry/
+ โโโ pypi/
+ โโโ sbt/
+ โโโ setuptools/
+internal/testdata/ Parser-specific test fixtures (sbt, Python, .NET)
test/resources/ Shared fixture files for all parser tests
vendor/ Vendored dependencies
.github/workflows/ci.yml CI pipeline (test + 60% coverage gate)
@@ -51,13 +55,13 @@ vendor/ Vendored dependencies
git clone https://github.com/Checkmarx/manifest-parser.git
cd manifest-parser
-go test ./... # run all tests
-go test ./internal/parsers/gradle/... # run tests for a single parser
-go test -run TestName ./path/... # run a single test by name
-go test ./... -coverprofile cover.out # CI gate: total coverage must be >= 60%
-go tool cover -html cover.out # view coverage report in browser
-go build -o manifest-parser ./cmd # build CLI
-go run ./cmd test/resources/pom.xml # run CLI against a fixture
+go test ./... # run all tests
+go test ./internal/parsers/gradle/... # run tests for a single parser
+go test -run TestName ./path/... # run a single test by name
+go test ./... -coverprofile cover.out # CI gate: total coverage must be >= 60%
+go tool cover -html cover.out # view coverage report in browser
+go build -o manifest-parser ./cmd # build CLI
+go run ./cmd test/resources/pom.xml # run CLI against a fixture
```
**Sample output** from `go run ./cmd test/resources/pom.xml`:
@@ -119,10 +123,12 @@ Per-ecosystem parsers live under [internal/parsers/](internal/parsers/):
- `gradle/` โ parses `build.gradle` / `build.gradle.kts` (Groovy + Kotlin DSL) and `gradle/libs.versions.toml` version catalogs. Resolves variables from `gradle.properties` and `ext {}` blocks. `PackageManager` = `"gradle"`.
- `maven/` โ parses `pom.xml` with `encoding/xml`, then re-scans the raw text to locate each `` block line by line. Resolves `${property}` vars from `` and falls back to `` for empty/ranged versions. Only **direct** `` are emitted (managed-only deps are intentionally skipped to avoid duplicates โ see PR #15). `PackageManager` = `"mvn"`.
- `npm/` โ parses `package.json` plus, if present as a sibling file, `package-lock.json` (v1 and v2/v3 formats). Ranged specifiers (`^`, `~`, `*`, `>`, `<`) trigger a lookup in the lockfile. Without a lock match, ranged versions resolve to `"latest"`. `PackageManager` = `"npm"`.
-- `pypi/` โ line-oriented scan of `requirements*.txt` / `packages*.txt`. **Only `package==version` is supported.** Comments (`#`) and environment markers (`;`) are stripped. `PackageManager` = `"pypi"`.
+- `poetry/` โ parses `pyproject.toml` (Poetry 1.x key-value format and PEP 621 array format) and resolves exact versions from a sibling `poetry.lock` if present. Supports exact versions, ranges (`^`, `~`, `>=`, `<=`), wildcards (`*`, `1.2.*`), inline tables, optional packages, dependency groups, and markers. Ranged/wildcard versions without a lock match resolve to `"latest"`. `PackageManager` = `"pypi"` (Poetry packages are PyPI packages).
+- `pypi/` โ line-oriented scan of `requirements*.txt`, `requirement*.txt`, `constraints*.txt`, and `packages*.txt`. Supports six Python dependency formats: pip, pip-freeze, pip-compile, pip-tools, uv export, and Poetry export. Features: line continuations (`\`), `--hash=` stripping, pip CLI option skipping (`-i`, `-r`, `-c`, `-e`, etc.), VCS requirements (`git+`, `hg+`, `svn+`, `bzr+` with `#egg=`), URL requirements (PEP 508 `pkg @ URL`), `===` arbitrary equality, and environment markers (`;`). `PackageManager` = `"pypi"`.
+- `setuptools/` โ two parsers for Python packaging manifests: `setup_cfg_parser.go` (`setup.cfg` INI format) and `setup_py_parser.go` (`setup.py` script). Both support `install_requires`, `setup_requires`, `tests_require`, and `extras_require`. Duplicate packages across sections are stored as separate entries with distinct line numbers. `PackageManager` = `"pypi"`.
- `golang/` โ uses `golang.org/x/mod/modfile` to parse `go.mod`, then uses the parser's line metadata to compute character offsets. `PackageManager` = `"go"`.
- `dotnet/` โ three parsers: `csproj_parser.go` (`.csproj`), `directory_packages_props_parser.go` (central package management), `packages_config_parser.go` (legacy). Bracketed version ranges become `"latest"`. `PackageManager` = `"nuget"` for all three.
-- `sbt/` โ parses any `.sbt` file (`build.sbt`, `project/plugins.sbt`, `version.sbt`, etc.) using line-oriented scanning. Supports Scala build dependencies in `"group" % "name" % "version"` format. `PackageManager` = `"sbt"`.
+- `sbt/` โ parses any `.sbt` file (`build.sbt`, `plugins.sbt`, `dependencies.sbt`, etc.) using line-oriented scanning. Supports `val`/`lazy val`/`def` variable declarations, all SBT dependency operators (`%`, `%%`, `%%%`), `Seq(...)` blocks, `addSbtPlugin(...)` syntax, dependency modifiers (`exclude`, `excludeAll`, `intransitive`, `withSources`, `classifier`, `cross`), block and inline comments, scope annotations, and duplicate detection. `PackageManager` = `"sbt"`.
## Project Rules (Invariants)
@@ -133,11 +139,13 @@ Per-ecosystem parsers live under [internal/parsers/](internal/parsers/):
- Maven emits one `Location` per **non-comment line** of the `` block (open tag, each child element, close tag). Single-line `Locations` for Maven would be a regression.
- Do not add `ParsersFactory` overloads or alternative entry points without coordinating with AST-CLI.
- **Do not modify or rename existing `PackageManager` strings**. AST-CLI and Checkmarx One SCA branch on these values โ a silent rename breaks downstream parsing with no compile-time error. If a rename is genuinely required, stop and confirm with the user.
+- All Python parsers (`pypi/`, `poetry/`, `setuptools/`) return `PackageManager` = `"pypi"` because all Python packages ultimately live on PyPI regardless of the tool that declared them. Do not introduce separate strings like `"poetry"` or `"setuptools"`.
## Testing Strategy
-Each parser has a `*_test.go` co-located with it using `testify`. Shared fixtures live in [test/resources/](test/resources/):
+Each parser has a `*_test.go` co-located with it using `testify`. Fixtures are split across two locations:
+**Shared fixtures** in [test/resources/](test/resources/):
```
test/resources/
โโโ build.gradle Groovy DSL
@@ -151,26 +159,47 @@ test/resources/
โโโ Gateway.csproj .NET csproj (variant)
โโโ Directory.Packages.props .NET centralized packages
โโโ packages.config .NET legacy NuGet
-โโโ requirements.txt Python pip
+โโโ requirements.txt Python pip (basic format)
+```
+
+**Parser-specific fixtures** in [internal/testdata/](internal/testdata/):
+```
+internal/testdata/
+โโโ build.sbt SBT build file (Log4Shell, Struts2, etc.)
+โโโ plugins.sbt SBT plugin dependencies
+โโโ pyproject.toml Poetry project configuration (requests, flask, pytest, numpy, pandas)
+โโโ setup.cfg Setuptools INI format (requests, flask, six, pytest, black)
+โโโ setup.py Setuptools Python script (same deps as setup.cfg)
+โโโ ast-visual-studio-extension.csproj .NET multi-package csproj
+```
+
+**PyPI-format fixtures** in [internal/parsers/pypi/testdata/](internal/parsers/pypi/testdata/):
+```
+internal/parsers/pypi/testdata/
+โโโ requirements-pip-freeze.txt pip freeze output (exact pinned versions)
+โโโ requirements-pip-compile.txt pip-compile output with via comments
+โโโ requirements-uv-export.txt uv export with --hash options and line continuations
```
When adding behaviours, add a fixture here rather than embedding large manifests in test source.
CI ([.github/workflows/ci.yml](.github/workflows/ci.yml)) enforces a **60% total coverage floor** โ adding an untested branch to an already-thin package can push the whole repo below the gate. View coverage locally with `go tool cover -html cover.out`.
-Expected pattern for a new parser: fixture file under `test/resources/` + `_parser_test.go` co-located with the parser, using `testify` assertions on `PackageName`, `Version`, `PackageManager`, and `Locations`.
+Expected pattern for a new parser: fixture file under `test/resources/` or `internal/testdata/` + `_parser_test.go` co-located with the parser, using `testify` assertions on `PackageName`, `Version`, `PackageManager`, and `Locations`.
## Known Issues / Limitations
-- **pypi**: Only `package==version` syntax is supported. `pip freeze`, Poetry, and pip-tools output are explicitly out of scope.
+- **pypi**: VCS requirements (`git+`, `hg+`, `svn+`, `bzr+`) require an `#egg=` fragment to extract the package name; VCS URLs without `#egg=` are skipped. URL requirements must use PEP 508 `pkg @ URL` syntax with the package name before `@`.
+- **poetry**: Multi-line dependency tables spanning more than one line (e.g., `{git = "...", rev = "..."}` across lines) are not fully parsed โ the dependency is skipped. Single-line inline tables are supported.
- **npm**: Ranged version specifiers (`^`, `~`, `*`, `>`, `<`) without a matching `package-lock.json` entry resolve to `"latest"` rather than the actual installed version.
- **Maven**: Managed-only deps (present in `` but not in ``) are not emitted, to avoid duplicating entries already declared in a BOM consumer.
- **dotnet**: Bracketed version ranges (e.g., `[1.0,2.0)`) become `"latest"`.
+- **sbt**: Version variables using object member access (e.g., `Versions.log4j`) are not resolved โ only simple `val`/`lazy val` string assignments are captured.
- **All parsers**: Direct dependencies only โ transitive dependencies are not resolved or scanned.
## External Integrations
-- **AST-CLI** ([Checkmarx/ast-cli](https://github.com/Checkmarx/ast-cli)) โ primary consumer. Imports this module as a Go library. The fields `Locations`, `PackageManager`, `PackageName`, and `Version` on the `Package` struct are load-bearing: AST-CLI uses them to annotate scan results and drive remediation UI.
+- **AST-CLI** ([Checkmarx/ast-cli](https://github.com/Checkmarx/ast-cli)) โ primary consumer. Imports this module as a Go library. The fields `Locations`, `PackageManager`, `PackageName`, and `Version` on the `Package` struct are load-bearing: AST-CLI uses them to annotate scan results and drive remediation UI. Note: AST-CLI maps `"gradle"` and `"sbt"` to `"mvn"` when sending to the Checkmarx scanner API, since both build tools use Maven Central as their registry.
- **Checkmarx One SCA** โ downstream scan engine that receives the parsed dependency list.
## Deployment
@@ -181,6 +210,7 @@ N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-
- Maven re-scans the raw XML bytes after `encoding/xml` parsing (two passes). Large `pom.xml` files are loaded fully into memory; there is no streaming.
- Gradle version catalog parsing reads `libs.versions.toml` once, separately from the build file. Large catalogs (80+ entries) are fine; pathologically large files are not size-bounded.
+- pypi parser preprocesses all lines first to join continuations before parsing โ the full file is held in memory.
- No caching between calls to `ParsersFactory` โ each invocation allocates fresh parser state.
## Security & Access
@@ -195,6 +225,7 @@ N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-
- The **library** (`pkg/`, `internal/`) returns `error` values and does not log. Callers should not expect any log output from the library.
- The **CLI** (`cmd/main.go`) uses `log.Fatalf` on parse/marshal errors and exits non-zero. Normal output is JSON printed to stdout.
+- Exception: `setuptools/` parsers use `log.Printf` for debug/warning output during development. This should be treated as temporary and removed before production release.
## Coding Standards
@@ -202,6 +233,7 @@ N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-
- Exported identifiers live in `pkg/`; internal logic lives in `internal/`. Do not add exported symbols to `internal/`.
- Parser packages follow the naming layout: `internal/parsers//_parser.go` + `_parser_test.go`.
- No global state in parsers โ each concrete parser type is a stateless zero-value struct.
+- When splitting file content into lines, always strip `\r` to handle CRLF files on Windows: `strings.TrimRight(line, "\r")`. Failing to do so causes `len(line)` to return one extra byte, producing off-by-one `EndIndex` values.
## Debugging Steps
@@ -209,15 +241,22 @@ N/A โ this is a Go library consumed via `go get github.com/Checkmarx/manifest-
```bash
go run ./cmd test/resources/pom.xml
go run ./cmd test/resources/build.gradle
+ go run ./cmd internal/testdata/build.sbt
+ go run ./cmd internal/testdata/pyproject.toml
+ go run ./cmd internal/testdata/setup.cfg
```
2. **Verbose test output to see which test case fails:**
```bash
go test -v ./internal/parsers/maven/...
+ go test -v ./internal/parsers/sbt/...
+ go test -v ./internal/parsers/poetry/...
```
3. **Location off-by-one:** Parser violated 0-based contract. Grep for `i + 1` patterns near `Line:` / `LineNum:` assignments โ emit `Line: i`, not `i + 1`.
-4. **Version resolves to `"latest"` unexpectedly:** check whether the version string matches a range specifier (`^`, `~`, `[`, `*`) or whether a lock file / properties file is present in the same directory as the fixture.
+4. **EndIndex off-by-one on Windows:** File has CRLF line endings and the parser uses `len(line)` without stripping `\r`. Fix: add `strings.TrimRight(line, "\r")` after `strings.Split(content, "\n")`.
+
+5. **Version resolves to `"latest"` unexpectedly:** check whether the version string matches a range specifier (`^`, `~`, `[`, `*`) or whether a lock file / properties file is present in the same directory as the fixture.
-5. **New ecosystem not dispatched:** verify `selectManifestFile` in `manifest-file-selector.go` handles the new filename/extension and that the factory `switch` has a corresponding case.
+6. **New ecosystem not dispatched:** verify `selectManifestFile` in `manifest-file-selector.go` handles the new filename/extension and that the factory `switch` has a corresponding case.
diff --git a/internal/parsers/poetry/poetry-pyproject-parser.go b/internal/parsers/poetry/poetry-pyproject-parser.go
new file mode 100644
index 0000000..96458e9
--- /dev/null
+++ b/internal/parsers/poetry/poetry-pyproject-parser.go
@@ -0,0 +1,325 @@
+package poetry
+
+import (
+ "bufio"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+// PoetryPyprojectParser parses pyproject.toml Poetry dependency sections.
+type PoetryPyprojectParser struct{}
+
+var (
+ groupDepSectionRe = regexp.MustCompile(`^\[tool\.poetry\.group\.[^.]+\.dependencies\]$`)
+ inlineTableVersionRe = regexp.MustCompile(`version\s*=\s*"([^"]*)"`)
+ pep621OptDepSectionRe = regexp.MustCompile(`^\[project\.optional-dependencies\]$`)
+)
+
+func isPoetryDepsSection(line string) bool {
+ return line == "[tool.poetry.dependencies]" ||
+ line == "[tool.poetry.dev-dependencies]" ||
+ groupDepSectionRe.MatchString(line)
+}
+
+func parsePoetryVersion(v string) string {
+ v = strings.TrimSpace(v)
+ if v == "" {
+ return "latest"
+ }
+ if strings.Contains(v, "*") {
+ return "latest"
+ }
+ for _, op := range []string{"^", "~", ">", "<", ",", "!", "=", ";", "~="} {
+ if strings.Contains(v, op) {
+ return "latest"
+ }
+ }
+ return v
+}
+
+func pyprojectLineIndices(raw, pkgName string) (int, int) {
+ startIdx := strings.Index(raw, pkgName)
+ if startIdx < 0 {
+ startIdx = 0
+ }
+ endIdx := len(raw)
+ if commentIdx := strings.Index(raw, "#"); commentIdx >= 0 {
+ endIdx = commentIdx
+ }
+ endIdx = strings.LastIndexFunc(raw[:endIdx], func(r rune) bool {
+ return r != ' ' && r != '\t'
+ }) + 1
+ return startIdx, endIdx
+}
+
+func parsePyprojectDepLine(line string) (name, version string, ok bool) {
+ eqIdx := strings.Index(line, " = ")
+ if eqIdx < 0 {
+ return "", "", false
+ }
+ name = strings.TrimSpace(line[:eqIdx])
+ if name == "" || name == "python" {
+ return "", "", false
+ }
+
+ valueStr := strings.TrimSpace(line[eqIdx+3:])
+
+ if strings.HasPrefix(valueStr, "{") {
+ if m := inlineTableVersionRe.FindStringSubmatch(valueStr); m != nil {
+ version = m[1]
+ } else {
+ version = "latest"
+ }
+ } else if len(valueStr) >= 2 && valueStr[0] == '"' && valueStr[len(valueStr)-1] == '"' {
+ version = valueStr[1 : len(valueStr)-1]
+ } else {
+ version = "latest"
+ }
+
+ return name, version, true
+}
+
+// parseLockFile reads poetry.lock and returns a map of package name to version
+func parseLockFile(manifestDir string) map[string]string {
+ lockVersions := make(map[string]string)
+
+ lockPath := filepath.Join(manifestDir, "poetry.lock")
+
+ file, err := os.Open(lockPath)
+ if err != nil {
+ return lockVersions
+ }
+ defer file.Close()
+
+ scanner := bufio.NewScanner(file)
+ var currentPackageName string
+
+ for scanner.Scan() {
+ line := scanner.Text()
+ trimmed := strings.TrimSpace(line)
+
+ if strings.HasPrefix(trimmed, "[[package]]") {
+ currentPackageName = ""
+ continue
+ }
+
+ if strings.HasPrefix(trimmed, "[") && !strings.HasPrefix(trimmed, "[[") {
+ currentPackageName = ""
+ continue
+ }
+
+ if strings.HasPrefix(trimmed, "name = ") {
+ currentPackageName = strings.TrimSpace(strings.TrimPrefix(trimmed, "name = "))
+ currentPackageName = strings.Trim(currentPackageName, "\"")
+ continue
+ }
+
+ if currentPackageName != "" && strings.HasPrefix(trimmed, "version = ") {
+ version := strings.TrimSpace(strings.TrimPrefix(trimmed, "version = "))
+ version = strings.Trim(version, "\"")
+ lockVersions[currentPackageName] = version
+ currentPackageName = ""
+ continue
+ }
+ }
+
+ return lockVersions
+}
+
+// resolveVersionWithLock resolves version using poetry.lock if available
+func resolveVersionWithLock(pkgName, version string, lockVersions map[string]string) string {
+ if !strings.ContainsAny(version, "^~><,!=;*") {
+ return version
+ }
+
+ if strings.HasPrefix(version, "==") {
+ return strings.TrimSpace(version[2:])
+ }
+
+ if lockVersion, found := lockVersions[pkgName]; found {
+ return lockVersion
+ }
+
+ return "latest"
+}
+
+func parsePep621Requirement(req string) (name, version string, ok bool) {
+ req = strings.TrimSpace(req)
+ if strings.HasPrefix(req, "\"") {
+ req = strings.TrimPrefix(req, "\"")
+ }
+ if strings.HasSuffix(req, "\",") {
+ req = strings.TrimSuffix(req, "\",")
+ } else if strings.HasSuffix(req, "\"") {
+ req = strings.TrimSuffix(req, "\"")
+ }
+ if strings.HasSuffix(req, ",") {
+ req = strings.TrimSuffix(req, ",")
+ }
+ req = strings.TrimSpace(req)
+ if req == "" {
+ return "", "", false
+ }
+
+ for _, sep := range []string{"==", ">=", "<=", "~=", "!=", ">", "<", ";"} {
+ if idx := strings.Index(req, sep); idx >= 0 {
+ name = strings.TrimSpace(req[:idx])
+ versionPart := strings.TrimSpace(req[idx+len(sep):])
+ if idx2 := strings.Index(versionPart, ";"); idx2 >= 0 {
+ versionPart = strings.TrimSpace(versionPart[:idx2])
+ }
+ version = sep + versionPart
+ return name, version, name != ""
+ }
+ }
+
+ name = strings.TrimSpace(req)
+ return name, "latest", name != ""
+}
+
+func (p *PoetryPyprojectParser) Parse(manifestFile string) ([]models.Package, error) {
+ file, err := os.Open(manifestFile)
+ if err != nil {
+ return nil, err
+ }
+ defer file.Close()
+
+ manifestDir := filepath.Dir(manifestFile)
+ lockVersions := parseLockFile(manifestDir)
+
+ var packages []models.Package
+ scanner := bufio.NewScanner(file)
+ lineNum := 0
+ inPoetryDepsSection := false
+ inPep621Section := false
+ inPep621Array := false
+ skipUntilCloseBrace := false
+
+ for scanner.Scan() {
+ raw := scanner.Text()
+ trimmed := strings.TrimSpace(raw)
+
+ if skipUntilCloseBrace {
+ if strings.Contains(trimmed, "}") {
+ skipUntilCloseBrace = false
+ }
+ lineNum++
+ continue
+ }
+
+ if strings.HasPrefix(trimmed, "[") {
+ inPoetryDepsSection = isPoetryDepsSection(trimmed)
+ inPep621Section = trimmed == "[project]" || pep621OptDepSectionRe.MatchString(trimmed)
+ inPep621Array = false
+ lineNum++
+ continue
+ }
+
+ if inPep621Array {
+ if strings.TrimSpace(trimmed) == "]" {
+ inPep621Array = false
+ lineNum++
+ continue
+ }
+
+ name, version, ok := parsePep621Requirement(trimmed)
+ if ok {
+ resolvedVersion := resolveVersionWithLock(name, version, lockVersions)
+ startIdx, endIdx := pyprojectLineIndices(raw, name)
+ packages = append(packages, models.Package{
+ PackageManager: "pypi",
+ PackageName: name,
+ Version: resolvedVersion,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: lineNum,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
+ }},
+ })
+ }
+ lineNum++
+ continue
+ }
+
+ if inPoetryDepsSection && (trimmed == "" || strings.HasPrefix(trimmed, "#")) {
+ lineNum++
+ continue
+ }
+
+ if inPoetryDepsSection {
+ line := trimmed
+ if idx := strings.Index(line, "#"); idx >= 0 {
+ line = strings.TrimSpace(line[:idx])
+ }
+
+ name, version, ok := parsePyprojectDepLine(line)
+ if !ok {
+ lineNum++
+ continue
+ }
+
+ valueStr := strings.TrimSpace(line[strings.Index(line, " = ")+3:])
+ if strings.HasPrefix(valueStr, "{") && !strings.Contains(valueStr, "}") {
+ skipUntilCloseBrace = true
+ }
+
+ resolvedVersion := resolveVersionWithLock(name, version, lockVersions)
+
+ startIdx, endIdx := pyprojectLineIndices(raw, name)
+ packages = append(packages, models.Package{
+ PackageManager: "pypi",
+ PackageName: name,
+ Version: resolvedVersion,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: lineNum,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
+ }},
+ })
+ lineNum++
+ continue
+ }
+
+ if (inPep621Section) && (strings.Contains(trimmed, " = [") || strings.Contains(trimmed, "=[")) && !strings.HasPrefix(trimmed, "[") {
+ openIdx := strings.Index(trimmed, "[")
+ closeIdx := strings.LastIndex(trimmed, "]")
+ if openIdx >= 0 && closeIdx > openIdx {
+ arrayContent := trimmed[openIdx+1 : closeIdx]
+ parts := strings.Split(arrayContent, ",")
+ for _, part := range parts {
+ name, version, ok := parsePep621Requirement(part)
+ if ok {
+ resolvedVersion := resolveVersionWithLock(name, version, lockVersions)
+ startIdx, endIdx := pyprojectLineIndices(raw, name)
+ packages = append(packages, models.Package{
+ PackageManager: "pypi",
+ PackageName: name,
+ Version: resolvedVersion,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: lineNum,
+ StartIndex: startIdx,
+ EndIndex: endIdx,
+ }},
+ })
+ }
+ }
+ } else if openIdx >= 0 {
+ inPep621Array = true
+ }
+ }
+
+ lineNum++
+ }
+
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return packages, nil
+}
diff --git a/internal/parsers/poetry/poetry-pyproject-parser_test.go b/internal/parsers/poetry/poetry-pyproject-parser_test.go
new file mode 100644
index 0000000..0cc24d7
--- /dev/null
+++ b/internal/parsers/poetry/poetry-pyproject-parser_test.go
@@ -0,0 +1,321 @@
+package poetry
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/Checkmarx/manifest-parser/internal/testdata"
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+func TestParsePyprojectExactVersion(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nrequests = \"2.28.2\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "requests",
+ Version: "2.28.2",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 1,
+ StartIndex: 0,
+ EndIndex: 19,
+ }},
+ }
+ testdata.ValidatePackages(t, pkgs, []models.Package{want})
+}
+
+func TestParsePyprojectRangedVersion(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nflask = \"^2.3.0\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectSkipsPython(t *testing.T) {
+ content := "[tool.poetry.dependencies]\npython = \"^3.9\"\nrequests = \"2.28.2\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package (python skipped), got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "requests" {
+ t.Errorf("expected package %q, got %q", "requests", pkgs[0].PackageName)
+ }
+}
+
+func TestParsePyprojectDevDependencies(t *testing.T) {
+ content := "[tool.poetry.dev-dependencies]\npytest = \"7.2.0\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "pytest",
+ Version: "7.2.0",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 1,
+ StartIndex: 0,
+ EndIndex: 16,
+ }},
+ }
+ testdata.ValidatePackages(t, pkgs, []models.Package{want})
+}
+
+func TestParsePyprojectGroupDependencies(t *testing.T) {
+ content := "[tool.poetry.group.lint.dependencies]\nblack = \"^22.0.0\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "black" {
+ t.Errorf("expected package %q, got %q", "black", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectGroupExactVersion(t *testing.T) {
+ content := "[tool.poetry.group.test.dependencies]\npytest = \"7.4.0\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "7.4.0" {
+ t.Errorf("expected version %q, got %q", "7.4.0", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectInlineTable(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nnumpy = {version = \"1.24.3\", optional = true}\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "numpy" {
+ t.Errorf("expected package %q, got %q", "numpy", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "1.24.3" {
+ t.Errorf("expected version %q, got %q", "1.24.3", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectWildcardVersion(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nrequests = \"*\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectPartialWildcard(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nrequests = \"2.28.*\"\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version %q, got %q", "latest", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectInlineComment(t *testing.T) {
+ content := "[tool.poetry.dependencies]\nrequests = \"2.28.2\" # pinned for security\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+ if pkgs[0].Version != "2.28.2" {
+ t.Errorf("expected version %q, got %q", "2.28.2", pkgs[0].Version)
+ }
+}
+
+func TestParsePyprojectNoDepSection(t *testing.T) {
+ content := "[build-system]\nrequires = [\"poetry-core>=1.0.0\"]\n[tool.black]\nline-length = 88\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 0 {
+ t.Fatalf("expected 0 packages, got %d", len(pkgs))
+ }
+}
+
+func TestParsePyprojectPep621Dependencies(t *testing.T) {
+ content := "[project]\ndependencies = [\n \"requests>=2.28.0\",\n \"flask==2.3.0\",\n]\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "requests" {
+ t.Errorf("expected %q, got %q", "requests", pkgs[0].PackageName)
+ }
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version %q (ranged), got %q", "latest", pkgs[0].Version)
+ }
+ if pkgs[1].PackageName != "flask" {
+ t.Errorf("expected %q, got %q", "flask", pkgs[1].PackageName)
+ }
+ if pkgs[1].Version != "2.3.0" {
+ t.Errorf("expected version %q, got %q", "2.3.0", pkgs[1].Version)
+ }
+}
+
+func TestParsePyprojectPep621OptionalDeps(t *testing.T) {
+ content := "[project.optional-dependencies]\ndev = [\n \"pytest>=7.0\",\n \"black>=22.0\",\n]\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "pyproject.toml")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+ if pkgs[0].PackageName != "pytest" {
+ t.Errorf("expected %q, got %q", "pytest", pkgs[0].PackageName)
+ }
+ if pkgs[1].PackageName != "black" {
+ t.Errorf("expected %q, got %q", "black", pkgs[1].PackageName)
+ }
+}
+
+func TestParsePyprojectRealFile(t *testing.T) {
+ filePath := "../../testdata/pyproject.toml"
+ parser := &PoetryPyprojectParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ if len(pkgs) != 7 {
+ t.Fatalf("expected 7 packages, got %d", len(pkgs))
+ }
+
+ packageNames := make([]string, 0, len(pkgs))
+ for _, pkg := range pkgs {
+ packageNames = append(packageNames, pkg.PackageName)
+ }
+
+ expectedNames := []string{
+ "requests", "flask", "Pillow", "cryptography", "pytest", "numpy", "pandas",
+ }
+
+ for i, expectedName := range expectedNames {
+ if i < len(packageNames) {
+ if packageNames[i] != expectedName {
+ t.Errorf("package %d: expected %q, got %q", i, expectedName, packageNames[i])
+ }
+ }
+ }
+}
diff --git a/internal/parsers/pypi/pypi-parser.go b/internal/parsers/pypi/pypi-parser.go
index f984480..99026f2 100644
--- a/internal/parsers/pypi/pypi-parser.go
+++ b/internal/parsers/pypi/pypi-parser.go
@@ -10,9 +10,114 @@ import (
"github.com/Checkmarx/manifest-parser/pkg/parser/models"
)
-// PypiParser implements parsing of requirements.txt
+// PypiParser implements parsing of requirements.txt and related Python dependency files.
+// Supports formats generated by pip freeze, pip-compile, pip-tools, uv export, and Poetry export.
type PypiParser struct{}
+// logicalLine represents a single dependency entry that may span multiple physical lines
+// when line continuations (\) are used.
+type logicalLine struct {
+ content string // joined and hash-stripped content
+ firstLine int // 0-indexed line number of the first physical line
+ rawFirst string // raw text of the first physical line (for index computation)
+}
+
+// pipOptionPrefixes lists prefixes of pip CLI option lines that should be skipped.
+var pipOptionPrefixes = []string{
+ "-i ", "--index-url", "--extra-index-url",
+ "-r ", "--requirement",
+ "-c ", "--constraint",
+ "-e ", "--editable",
+ "-f ", "--find-links",
+ "--no-binary", "--only-binary",
+ "--pre", "--trusted-host",
+ "--hash=",
+}
+
+// isPipOptionLine returns true if the trimmed line is a pip CLI option rather than a package spec.
+func isPipOptionLine(trimmed string) bool {
+ for _, prefix := range pipOptionPrefixes {
+ if strings.HasPrefix(trimmed, prefix) {
+ return true
+ }
+ }
+ return false
+}
+
+// stripHashOptions removes --hash= tokens from a line.
+func stripHashOptions(line string) string {
+ tokens := strings.Fields(line)
+ var filtered []string
+ for _, tok := range tokens {
+ if !strings.HasPrefix(tok, "--hash=") {
+ filtered = append(filtered, tok)
+ }
+ }
+ return strings.Join(filtered, " ")
+}
+
+// preprocessLines joins physical lines connected by trailing backslashes into logical lines,
+// and strips --hash= options from the result.
+func preprocessLines(lines []string) []logicalLine {
+ var result []logicalLine
+ var accumulator []string
+ firstLine := -1
+ rawFirst := ""
+
+ for i, raw := range lines {
+ trimmed := strings.TrimSpace(raw)
+
+ if firstLine == -1 {
+ firstLine = i
+ rawFirst = raw
+ }
+
+ if strings.HasSuffix(trimmed, "\\") {
+ // Strip the trailing backslash and accumulate
+ trimmed = strings.TrimSuffix(trimmed, "\\")
+ trimmed = strings.TrimSpace(trimmed)
+ if trimmed != "" {
+ accumulator = append(accumulator, trimmed)
+ }
+ continue
+ }
+
+ // Line does not end with \, so this completes the logical line
+ if trimmed != "" {
+ accumulator = append(accumulator, trimmed)
+ }
+
+ joined := strings.Join(accumulator, " ")
+ joined = stripHashOptions(joined)
+ joined = strings.TrimSpace(joined)
+
+ result = append(result, logicalLine{
+ content: joined,
+ firstLine: firstLine,
+ rawFirst: rawFirst,
+ })
+
+ // Reset for next logical line
+ accumulator = nil
+ firstLine = -1
+ rawFirst = ""
+ }
+
+ // Handle any remaining accumulated content (file ended with \)
+ if len(accumulator) > 0 {
+ joined := strings.Join(accumulator, " ")
+ joined = stripHashOptions(joined)
+ joined = strings.TrimSpace(joined)
+ result = append(result, logicalLine{
+ content: joined,
+ firstLine: firstLine,
+ rawFirst: rawFirst,
+ })
+ }
+
+ return result
+}
+
func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFile string) (string, bool) {
if match := re.FindStringSubmatch(line); match != nil {
return match[1], true
@@ -24,6 +129,13 @@ func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFil
func extractVersion(line string) string {
var version string
switch {
+ case strings.Contains(line, "==="):
+ parts := strings.SplitN(line, "===", 2)
+ if len(parts) == 2 {
+ version = strings.TrimSpace(parts[1])
+ } else {
+ version = "latest"
+ }
case strings.Contains(line, "=="):
parts := strings.SplitN(line, "==", 2)
if len(parts) == 2 {
@@ -40,6 +152,52 @@ func extractVersion(line string) string {
return version
}
+// vcsSchemes lists VCS prefixes used in pip requirements.
+var vcsSchemes = []string{"git+", "hg+", "svn+", "bzr+"}
+
+// isVCSRequirement returns true if the line is a VCS-based requirement.
+func isVCSRequirement(line string) bool {
+ for _, scheme := range vcsSchemes {
+ if strings.HasPrefix(line, scheme) {
+ return true
+ }
+ }
+ return false
+}
+
+// extractVCSPackageName extracts the package name from a VCS requirement line
+// using the #egg= fragment. Returns empty string if not found.
+func extractVCSPackageName(line string) string {
+ if idx := strings.Index(line, "#egg="); idx >= 0 {
+ egg := line[idx+5:]
+ // egg name may be followed by & or whitespace
+ if ampIdx := strings.IndexAny(egg, "& \t"); ampIdx >= 0 {
+ egg = egg[:ampIdx]
+ }
+ return strings.TrimSpace(egg)
+ }
+ return ""
+}
+
+// isURLRequirement returns true if the line contains a PEP 508 URL requirement (pkg @ URL).
+func isURLRequirement(line string) bool {
+ return strings.Contains(line, " @ ")
+}
+
+// extractURLPackageName extracts the package name from a URL requirement (pkg @ https://...).
+func extractURLPackageName(line string) string {
+ parts := strings.SplitN(line, " @ ", 2)
+ if len(parts) == 2 {
+ name := strings.TrimSpace(parts[0])
+ // Strip extras like pkg[extra] โ pkg
+ if bracketIdx := strings.Index(name, "["); bracketIdx >= 0 {
+ name = name[:bracketIdx]
+ }
+ return name
+ }
+ return ""
+}
+
func computeIndices(raw, pkgName string) (int, int) {
// Find the start index of the package name
startIdx := strings.Index(raw, pkgName)
@@ -74,19 +232,55 @@ func (p *PypiParser) Parse(manifestFile string) ([]models.Package, error) {
}
defer file.Close()
- var packages []models.Package
+ // Read all lines into a slice
+ var lines []string
scanner := bufio.NewScanner(file)
- lineNum := 0
+ for scanner.Scan() {
+ lines = append(lines, scanner.Text())
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ // Preprocess: join continuation lines and strip hash options
+ logicalLines := preprocessLines(lines)
+
+ var packages []models.Package
re := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>==4.2,<6.0\nmylib===1.0.dev5\n-r other-requirements.txt\n--index-url https://pypi.org/simple\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "requirements.txt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &PypiParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 5 {
+ t.Fatalf("expected 5 packages, got %d", len(pkgs))
+ }
+
+ // flask==3.1.0
+ if pkgs[0].PackageName != "flask" || pkgs[0].Version != "3.1.0" {
+ t.Errorf("pkg 0: got %q==%q, want flask==3.1.0", pkgs[0].PackageName, pkgs[0].Version)
+ }
+ // requests @ URL
+ if pkgs[1].PackageName != "requests" || pkgs[1].Version != "latest" {
+ t.Errorf("pkg 1: got %q==%q, want requests==latest", pkgs[1].PackageName, pkgs[1].Version)
+ }
+ // git+...#egg=custom-pkg
+ if pkgs[2].PackageName != "custom-pkg" || pkgs[2].Version != "latest" {
+ t.Errorf("pkg 2: got %q==%q, want custom-pkg==latest", pkgs[2].PackageName, pkgs[2].Version)
+ }
+ // django>=3.2,<4.0
+ if pkgs[3].PackageName != "django" || pkgs[3].Version != "latest" {
+ t.Errorf("pkg 3: got %q==%q, want django==latest", pkgs[3].PackageName, pkgs[3].Version)
+ }
+ // mylib===1.0.dev5
+ if pkgs[4].PackageName != "mylib" || pkgs[4].Version != "1.0.dev5" {
+ t.Errorf("pkg 4: got %q==%q, want mylib==1.0.dev5", pkgs[4].PackageName, pkgs[4].Version)
+ }
+}
diff --git a/internal/parsers/pypi/testdata/requirements-pip-compile.txt b/internal/parsers/pypi/testdata/requirements-pip-compile.txt
new file mode 100644
index 0000000..4a12b22
--- /dev/null
+++ b/internal/parsers/pypi/testdata/requirements-pip-compile.txt
@@ -0,0 +1,14 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+# pip-compile requirements.in
+#
+asgiref==3.8.1
+ # via django
+django==5.2.13
+ # via -r requirements.in
+sqlparse==0.5.5
+ # via django
+tzdata==2025.3
+ # via django
diff --git a/internal/parsers/pypi/testdata/requirements-pip-freeze.txt b/internal/parsers/pypi/testdata/requirements-pip-freeze.txt
new file mode 100644
index 0000000..77210e9
--- /dev/null
+++ b/internal/parsers/pypi/testdata/requirements-pip-freeze.txt
@@ -0,0 +1,4 @@
+asgiref==3.8.1
+Django==5.2.13
+sqlparse==0.5.5
+tzdata==2025.3
diff --git a/internal/parsers/pypi/testdata/requirements-uv-export.txt b/internal/parsers/pypi/testdata/requirements-uv-export.txt
new file mode 100644
index 0000000..281b691
--- /dev/null
+++ b/internal/parsers/pypi/testdata/requirements-uv-export.txt
@@ -0,0 +1,33 @@
+# This file was autogenerated by uv via the following command:
+# uv export --no-dev --output-file requirements.txt
+asgiref==3.8.1 \
+ --hash=sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47 \
+ --hash=sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590
+ # via
+ # django
+ # sample-app
+django==5.2.13 \
+ --hash=sha256:a5cc92645b8eb50e38cdd2f9e6a12db171c61e3e6172a1a51b85e8ebc2291b42 \
+ --hash=sha256:b5bb1d13cfe3b22e8a31d7a0bae2777a9c019a81d59ef4f72c8581f0d3e35f0e
+ # via sample-app
+pycryptodome==3.21.0 \
+ --hash=sha256:12ce0e6d32c4a63433cf26e9f5be9fd3a1c2cbe2bce1c3a834e3b5a43e8e82e0 \
+ --hash=sha256:4d2cd4a5c4b939f2b5e2f8611a8b5c7f8c5a2de1f75c3e7c5e1c8f5a3c2b1e0a \
+ --hash=sha256:7e3c5c2f1a4b8d9e0f1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a
+ # via sample-app
+sqlparse==0.5.5 \
+ --hash=sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca \
+ --hash=sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946e4a7cf1a8b6e26cdc4b4
+ # via
+ # django
+ # sample-app
+typing-extensions==4.12.2 \
+ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
+ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
+ # via
+ # asgiref
+ # sample-app
+tzdata==2025.3 ; sys_platform == 'win32' \
+ --hash=sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1 \
+ --hash=sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7
+ # via django
diff --git a/internal/parsers/setuptools/setup_cfg_parser.go b/internal/parsers/setuptools/setup_cfg_parser.go
new file mode 100644
index 0000000..2e2f08c
--- /dev/null
+++ b/internal/parsers/setuptools/setup_cfg_parser.go
@@ -0,0 +1,183 @@
+package setuptools
+
+import (
+ "bufio"
+ "log"
+ "os"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+type SetupCfgParser struct{}
+
+func extractVersion(line string) string {
+ var version string
+ switch {
+ case strings.Contains(line, "=="):
+ parts := strings.SplitN(line, "==", 2)
+ if len(parts) == 2 {
+ version = strings.TrimSpace(parts[1])
+ if strings.Contains(version, "*") {
+ version = "latest"
+ }
+ } else {
+ version = "latest"
+ }
+ default:
+ version = "latest"
+ }
+ return version
+}
+
+func extractPackageName(line string, re *regexp.Regexp) (string, bool) {
+ if match := re.FindStringSubmatch(line); match != nil {
+ return match[1], true
+ }
+ return "", false
+}
+
+func computeIndices(raw, pkgName string) (int, int) {
+ startIdx := strings.Index(raw, pkgName)
+ if startIdx < 0 {
+ startIdx = strings.IndexFunc(raw, func(r rune) bool {
+ return r != ' ' && r != '\t'
+ })
+ }
+
+ endIdx := len(raw)
+ if commentIdx := strings.Index(raw, "#"); commentIdx >= 0 {
+ endIdx = commentIdx
+ }
+
+ endIdx = strings.LastIndexFunc(raw[:endIdx], func(r rune) bool {
+ return r != ' ' && r != '\t'
+ }) + 1
+
+ return startIdx, endIdx
+}
+
+func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
+ file, err := os.Open(manifestFile)
+ if err != nil {
+ log.Printf("Error: Failed to open %s: %v", manifestFile, err)
+ return nil, err
+ }
+ defer file.Close()
+
+ log.Printf("Debug: Parsing setup.cfg file: %s", manifestFile)
+
+ var packages []models.Package
+ scanner := bufio.NewScanner(file)
+ lineNum := 0
+
+ var currentSection string
+ var currentKey string
+ re := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>==2.0\n flask==2.0.1\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages with mixed indentation, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_PackageNameWithNumbers(t *testing.T) {
+ // Package names can start with numbers
+ content := "[options]\ninstall_requires =\n py2exe\n 3to2\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages with numeric names, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_DuplicateSections(t *testing.T) {
+ // Same section defined twice - second one should override
+ content := "[options]\ninstall_requires =\n flask==1.0.0\n[options]\ninstall_requires =\n flask==2.0.0\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ // Should find both (no dedup in parser)
+ if len(pkgs) < 1 {
+ t.Fatalf("expected at least 1 package with duplicate sections, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_VeryLongLine(t *testing.T) {
+ // Create a very long line with many dependencies
+ longDeps := "[options]\ninstall_requires =\n"
+ for i := 0; i < 100; i++ {
+ longDeps += " package" + string(rune(48+i%10)) + "\n"
+ }
+
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(longDeps), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 100 {
+ t.Fatalf("expected 100 packages from long dependency list, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_UnicodePackageName(t *testing.T) {
+ // Package names with unicode (should be skipped as invalid)
+ content := "[options]\ninstall_requires =\n cafรฉ-package\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ // Unicode shouldn't match the regex, so should be skipped
+ if len(pkgs) != 0 {
+ t.Fatalf("expected 0 packages with unicode name, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_VersionSpecifierEdgeCases(t *testing.T) {
+ content := "[options]\ninstall_requires =\n package1==1.0.0\n package2!=1.0.0\n package3~=1.0\n package4>1.0\n package5<2.0\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 5 {
+ t.Fatalf("expected 5 packages with various specifiers, got %d", len(pkgs))
+ }
+
+ versions := make(map[string]string)
+ for _, pkg := range pkgs {
+ versions[pkg.PackageName] = pkg.Version
+ }
+
+ if versions["package1"] != "1.0.0" {
+ t.Errorf("package1: expected exact version, got %s", versions["package1"])
+ }
+ if versions["package2"] != "latest" {
+ t.Errorf("package2 (!=): expected latest, got %s", versions["package2"])
+ }
+ if versions["package3"] != "latest" {
+ t.Errorf("package3 (~=): expected latest, got %s", versions["package3"])
+ }
+ if versions["package4"] != "latest" {
+ t.Errorf("package4 (>): expected latest, got %s", versions["package4"])
+ }
+ if versions["package5"] != "latest" {
+ t.Errorf("package5 (<): expected latest, got %s", versions["package5"])
+ }
+}
diff --git a/internal/parsers/setuptools/setup_cfg_parser_test.go b/internal/parsers/setuptools/setup_cfg_parser_test.go
new file mode 100644
index 0000000..f69f8c8
--- /dev/null
+++ b/internal/parsers/setuptools/setup_cfg_parser_test.go
@@ -0,0 +1,261 @@
+package setuptools
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/Checkmarx/manifest-parser/internal/testdata"
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+func TestSetupCfgParser_ParseExactVersion(t *testing.T) {
+ content := "[options]\ninstall_requires =\n flask==2.0.1\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "flask",
+ Version: "2.0.1",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 2,
+ StartIndex: 4,
+ EndIndex: 16,
+ }},
+ }
+ testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want})
+}
+
+func TestSetupCfgParser_ParseRangeVersion(t *testing.T) {
+ content := "[options]\ninstall_requires =\n requests>=2.26.0\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "requests",
+ Version: "latest",
+ FilePath: filePath,
+ Locations: []models.Location{{
+ Line: 2,
+ StartIndex: 4,
+ EndIndex: 20,
+ }},
+ }
+ testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want})
+}
+
+func TestSetupCfgParser_ParseMultipleDependencies(t *testing.T) {
+ content := "[options]\ninstall_requires =\n requests>=2.26.0\n flask==2.0.1\n six\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 3 {
+ t.Fatalf("expected 3 packages, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "pypi",
+ PackageName: "requests",
+ Version: "latest",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 20}},
+ },
+ {
+ PackageManager: "pypi",
+ PackageName: "flask",
+ Version: "2.0.1",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 16}},
+ },
+ {
+ PackageManager: "pypi",
+ PackageName: "six",
+ Version: "latest",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 4, StartIndex: 4, EndIndex: 7}},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestSetupCfgParser_ParseSetupRequires(t *testing.T) {
+ content := "[options]\nsetup_requires =\n setuptools>=42\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "setuptools",
+ Version: "latest",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 18}},
+ }
+ testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want})
+}
+
+func TestSetupCfgParser_ParseExtrasRequire(t *testing.T) {
+ content := "[options.extras_require]\ndev =\n pytest>=6.0\n black==22.3.0\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ expected := []models.Package{
+ {
+ PackageManager: "pypi",
+ PackageName: "pytest",
+ Version: "latest",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 15}},
+ },
+ {
+ PackageManager: "pypi",
+ PackageName: "black",
+ Version: "22.3.0",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 17}},
+ },
+ }
+ testdata.ValidatePackages(t, pkgs, expected)
+}
+
+func TestSetupCfgParser_ParseSkipCommentLine(t *testing.T) {
+ content := "[options]\ninstall_requires =\n # commented out\n flask==2.0.1\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "flask",
+ Version: "2.0.1",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 3, StartIndex: 4, EndIndex: 16}},
+ }
+ testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want})
+}
+
+func TestSetupCfgParser_ParseInlineComment(t *testing.T) {
+ content := "[options]\ninstall_requires =\n flask==2.0.1 # web framework\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ want := models.Package{
+ PackageManager: "pypi",
+ PackageName: "flask",
+ Version: "2.0.1",
+ FilePath: filePath,
+ Locations: []models.Location{{Line: 2, StartIndex: 4, EndIndex: 16}},
+ }
+ testdata.ValidatePackages(t, []models.Package{got}, []models.Package{want})
+}
+
+func TestSetupCfgParser_ParseNoDependencies(t *testing.T) {
+ content := "[metadata]\nname = my-package\nversion = 1.0.0\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.cfg")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 0 {
+ t.Fatalf("expected 0 packages, got %d", len(pkgs))
+ }
+}
+
+func TestSetupCfgParser_Parse_RealFile(t *testing.T) {
+ filePath := "../../testdata/setup.cfg"
+ parser := &SetupCfgParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ expected := []models.Package{
+ {PackageManager: "pypi", PackageName: "requests", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 6, StartIndex: 4, EndIndex: 20}}},
+ {PackageManager: "pypi", PackageName: "flask", Version: "2.0.1", FilePath: filePath, Locations: []models.Location{{Line: 7, StartIndex: 4, EndIndex: 16}}},
+ {PackageManager: "pypi", PackageName: "six", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 8, StartIndex: 4, EndIndex: 7}}},
+ {PackageManager: "pypi", PackageName: "Pillow", Version: "9.0.0", FilePath: filePath, Locations: []models.Location{{Line: 9, StartIndex: 4, EndIndex: 17}}},
+ {PackageManager: "pypi", PackageName: "cryptography", Version: "2.9.2", FilePath: filePath, Locations: []models.Location{{Line: 10, StartIndex: 4, EndIndex: 23}}},
+ {PackageManager: "pypi", PackageName: "setuptools", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 13, StartIndex: 4, EndIndex: 18}}},
+ {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 17, StartIndex: 4, EndIndex: 15}}},
+ {PackageManager: "pypi", PackageName: "black", Version: "22.3.0", FilePath: filePath, Locations: []models.Location{{Line: 18, StartIndex: 4, EndIndex: 17}}},
+ }
+
+ testdata.ValidatePackages(t, pkgs, expected)
+}
diff --git a/internal/parsers/setuptools/setup_py_parser.go b/internal/parsers/setuptools/setup_py_parser.go
new file mode 100644
index 0000000..5da630c
--- /dev/null
+++ b/internal/parsers/setuptools/setup_py_parser.go
@@ -0,0 +1,369 @@
+package setuptools
+
+import (
+ "log"
+ "os"
+ "regexp"
+ "strings"
+
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+type SetupPyParser struct{}
+
+type depWithPosition struct {
+ name string
+ version string
+ lineNum int
+ startIndex int
+ endIndex int
+}
+
+func extractVersionPy(line string) string {
+ var version string
+ switch {
+ case strings.Contains(line, "=="):
+ parts := strings.SplitN(line, "==", 2)
+ if len(parts) == 2 {
+ version = strings.TrimSpace(parts[1])
+ if strings.Contains(version, "*") {
+ version = "latest"
+ }
+ } else {
+ version = "latest"
+ }
+ default:
+ version = "latest"
+ }
+ return version
+}
+
+func extractPackageNamePy(line string, re *regexp.Regexp) (string, bool) {
+ if match := re.FindStringSubmatch(line); match != nil {
+ return match[1], true
+ }
+ return "", false
+}
+
+// findPositionInFile finds the exact line number and column position of text in the file
+func findPositionInFile(fullText string, depString string, searchStartPos int) (lineNum, startIndex, endIndex int) {
+ searchPos := strings.Index(fullText[searchStartPos:], depString)
+ if searchPos == -1 {
+ log.Printf("Warning: Could not locate '%s' in file after position %d", depString, searchStartPos)
+ return 0, 0, 0
+ }
+
+ actualPos := searchStartPos + searchPos
+
+ lineNum = 0
+ colPos := 0
+ for i := 0; i < actualPos && i < len(fullText); i++ {
+ if fullText[i] == '\n' {
+ lineNum++
+ colPos = 0
+ } else {
+ colPos++
+ }
+ }
+
+ startIndex = colPos
+ endIndex = colPos + len(depString)
+
+ return lineNum, startIndex, endIndex
+}
+
+// extractDepsFromListContent extracts dependencies from list/dict content and returns positions
+func extractDepsFromListContent(content string, fullText string, searchStartPos int) []depWithPosition {
+ var deps []depWithPosition
+
+ singleQuoteRe := regexp.MustCompile(`'([^']*)'`)
+ doubleQuoteRe := regexp.MustCompile(`"([^"]*)`)
+
+ singleMatches := singleQuoteRe.FindAllStringSubmatchIndex(content, -1)
+ doubleMatches := doubleQuoteRe.FindAllStringSubmatchIndex(content, -1)
+
+ type match struct {
+ dep string
+ startInContent int
+ endInContent int
+ }
+ var allMatches []match
+
+ for _, m := range singleMatches {
+ if len(m) >= 4 {
+ dep := content[m[2]:m[3]]
+ startPos := m[0]
+
+ endPos := m[1]
+ if endPos < len(content) {
+ afterQuote := endPos
+ for afterQuote < len(content) && (content[afterQuote] == ' ' || content[afterQuote] == '\t') {
+ afterQuote++
+ }
+ if afterQuote < len(content) && content[afterQuote] == ':' {
+ log.Printf("Skipping dict key: %s", dep)
+ continue
+ }
+ }
+
+ allMatches = append(allMatches, match{
+ dep: dep,
+ startInContent: startPos,
+ endInContent: endPos,
+ })
+ }
+ }
+
+ for _, m := range doubleMatches {
+ if len(m) >= 4 {
+ dep := content[m[2]:m[3]]
+ startPos := m[0]
+
+ endPos := m[1]
+ if endPos < len(content) {
+ afterQuote := endPos
+ for afterQuote < len(content) && (content[afterQuote] == ' ' || content[afterQuote] == '\t') {
+ afterQuote++
+ }
+ if afterQuote < len(content) && content[afterQuote] == ':' {
+ log.Printf("Skipping dict key: %s", dep)
+ continue
+ }
+ }
+
+ allMatches = append(allMatches, match{
+ dep: dep,
+ startInContent: startPos,
+ endInContent: endPos,
+ })
+ }
+ }
+
+ pkgNameRe := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>== len(text) {
+ return ""
+ }
+
+ openChar := text[startPos]
+ var closeChar byte
+ switch openChar {
+ case '[':
+ closeChar = ']'
+ case '(':
+ closeChar = ')'
+ case '{':
+ closeChar = '}'
+ default:
+ return ""
+ }
+
+ depth := 0
+ inString := false
+ stringChar := byte(0)
+ escaped := false
+
+ for i := startPos; i < len(text); i++ {
+ ch := text[i]
+
+ if escaped {
+ escaped = false
+ continue
+ }
+
+ if ch == '\\' {
+ escaped = true
+ continue
+ }
+
+ if (ch == '"' || ch == '\'') && !inString {
+ inString = true
+ stringChar = ch
+ continue
+ }
+
+ if inString && ch == stringChar {
+ inString = false
+ continue
+ }
+
+ if inString {
+ continue
+ }
+
+ if ch == openChar {
+ depth++
+ } else if ch == closeChar {
+ depth--
+ if depth == 0 {
+ return text[startPos+1 : i]
+ }
+ }
+ }
+
+ return ""
+}
+
+// extractDependencies extracts dependencies from setup() call text for a specific key
+func extractDependencies(setupText string, key string, fullText string, searchStartPos int) []depWithPosition {
+ var deps []depWithPosition
+
+ keyPattern := key + "="
+ keyIndex := strings.Index(setupText, keyPattern)
+ if keyIndex == -1 {
+ log.Printf("Debug: Key '%s' not found in setup() call", key)
+ return deps
+ }
+
+ log.Printf("Debug: Found %s at position %d", key, keyIndex)
+
+ startPos := keyIndex + len(keyPattern)
+ for startPos < len(setupText) && (setupText[startPos] == ' ' || setupText[startPos] == '\t') {
+ startPos++
+ }
+
+ if startPos >= len(setupText) {
+ log.Printf("Warning: No bracket found after %s", key)
+ return deps
+ }
+
+ content := extractListContent(setupText, startPos)
+ if content == "" {
+ log.Printf("Warning: Could not extract list content for %s", key)
+ return deps
+ }
+
+ log.Printf("Debug: Extracted %d characters from %s content", len(content), key)
+
+ deps = extractDepsFromListContent(content, fullText, searchStartPos)
+ log.Printf("Debug: Found %d dependencies in %s", len(deps), key)
+ return deps
+}
+
+func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) {
+ data, err := os.ReadFile(manifestFile)
+ if err != nil {
+ log.Printf("Error: Failed to read %s: %v", manifestFile, err)
+ return nil, err
+ }
+
+ log.Printf("Debug: Parsing setup.py file: %s (%d bytes)", manifestFile, len(data))
+
+ text := string(data)
+ var packages []models.Package
+
+ setupStart := strings.Index(text, "setup(")
+ if setupStart == -1 {
+ log.Printf("Warning: setup() call not found in %s", manifestFile)
+ setupStart = 0
+ } else {
+ log.Printf("Debug: Found setup() call at position %d", setupStart)
+ setupStart += len("setup")
+ }
+
+ setupContent := extractListContent(text, setupStart)
+ if setupContent == "" && setupStart > 0 {
+ log.Printf("Warning: Could not extract setup() content from %s", manifestFile)
+ setupContent = text[setupStart:]
+ } else if setupContent != "" {
+ log.Printf("Debug: Extracted setup() content, %d bytes", len(setupContent))
+ }
+
+ for _, key := range []string{"install_requires", "setup_requires", "tests_require"} {
+ keyPosInText := strings.Index(text, key+"=")
+ deps := extractDependencies(setupContent, key, text, keyPosInText)
+ if len(deps) == 0 {
+ log.Printf("Debug: No %s found in setup.py", key)
+ }
+ for _, dep := range deps {
+ packages = append(packages, models.Package{
+ PackageManager: "pypi",
+ PackageName: dep.name,
+ Version: dep.version,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: dep.lineNum,
+ StartIndex: dep.startIndex,
+ EndIndex: dep.endIndex,
+ }},
+ })
+ log.Printf("Debug: Found dependency %s@%s at line %d in %s", dep.name, dep.version, dep.lineNum, key)
+ }
+ }
+
+ extrasStart := strings.Index(setupContent, "extras_require")
+ if extrasStart != -1 {
+ log.Printf("Debug: Found extras_require at position %d", extrasStart)
+ eqIndex := strings.Index(setupContent[extrasStart:], "=")
+ if eqIndex != -1 {
+ dictStartPos := extrasStart + eqIndex + 1
+ for dictStartPos < len(setupContent) && (setupContent[dictStartPos] == ' ' || setupContent[dictStartPos] == '\t' || setupContent[dictStartPos] == '\n') {
+ dictStartPos++
+ }
+ if dictStartPos < len(setupContent) {
+ dictContent := extractListContent(setupContent, dictStartPos)
+ if dictContent != "" {
+ log.Printf("Debug: Extracted %d characters from extras_require", len(dictContent))
+ extrasStartInText := strings.Index(text, "extras_require")
+ deps := extractDepsFromListContent(dictContent, text, extrasStartInText)
+ log.Printf("Debug: Found %d dependencies in extras_require", len(deps))
+ for _, dep := range deps {
+ packages = append(packages, models.Package{
+ PackageManager: "pypi",
+ PackageName: dep.name,
+ Version: dep.version,
+ FilePath: manifestFile,
+ Locations: []models.Location{{
+ Line: dep.lineNum,
+ StartIndex: dep.startIndex,
+ EndIndex: dep.endIndex,
+ }},
+ })
+ log.Printf("Debug: Found dependency %s@%s at line %d in extras_require", dep.name, dep.version, dep.lineNum)
+ }
+ } else {
+ log.Printf("Warning: Could not extract dict content for extras_require")
+ }
+ } else {
+ log.Printf("Warning: No opening bracket found for extras_require")
+ }
+ } else {
+ log.Printf("Warning: No equals sign found after extras_require")
+ }
+ } else {
+ log.Printf("Debug: extras_require not found in setup() call")
+ }
+
+ log.Printf("Debug: Successfully parsed %s, found %d dependencies", manifestFile, len(packages))
+ return packages, nil
+}
diff --git a/internal/parsers/setuptools/setup_py_parser_test.go b/internal/parsers/setuptools/setup_py_parser_test.go
new file mode 100644
index 0000000..dc1e959
--- /dev/null
+++ b/internal/parsers/setuptools/setup_py_parser_test.go
@@ -0,0 +1,226 @@
+package setuptools
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/Checkmarx/manifest-parser/internal/testdata"
+ "github.com/Checkmarx/manifest-parser/pkg/parser/models"
+)
+
+func TestSetupPyParser_ParseSingleInstallRequire(t *testing.T) {
+ content := "from setuptools import setup\n\nsetup(\n install_requires=['flask==2.0.1'],\n)\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ if got.PackageManager != "pypi" || got.PackageName != "flask" || got.Version != "2.0.1" {
+ t.Errorf("got package %s %s, want flask 2.0.1", got.PackageName, got.Version)
+ }
+}
+
+func TestSetupPyParser_ParseRangeInstallRequire(t *testing.T) {
+ content := "from setuptools import setup\n\nsetup(\n install_requires=['requests>=2.26.0'],\n)\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ if got.PackageManager != "pypi" || got.PackageName != "requests" || got.Version != "latest" {
+ t.Errorf("got package %s %s, want requests latest", got.PackageName, got.Version)
+ }
+}
+
+func TestSetupPyParser_ParseMultipleDependencies(t *testing.T) {
+ content := `from setuptools import setup
+
+setup(
+ install_requires=[
+ 'requests>=2.26.0',
+ 'flask==2.0.1',
+ 'six',
+ ],
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 3 {
+ t.Fatalf("expected 3 packages, got %d", len(pkgs))
+ }
+
+ packageNames := make(map[string]string)
+ for _, pkg := range pkgs {
+ packageNames[pkg.PackageName] = pkg.Version
+ }
+
+ if packageNames["requests"] != "latest" {
+ t.Errorf("requests: expected latest, got %s", packageNames["requests"])
+ }
+ if packageNames["flask"] != "2.0.1" {
+ t.Errorf("flask: expected 2.0.1, got %s", packageNames["flask"])
+ }
+ if packageNames["six"] != "latest" {
+ t.Errorf("six: expected latest, got %s", packageNames["six"])
+ }
+}
+
+func TestSetupPyParser_ParseExtrasRequire(t *testing.T) {
+ content := `from setuptools import setup
+
+setup(
+ install_requires=['requests>=2.26.0'],
+ extras_require={
+ 'dev': ['pytest>=6.0', 'black==22.3.0'],
+ },
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) < 3 {
+ t.Fatalf("expected at least 3 packages, got %d", len(pkgs))
+ }
+
+ packageNames := make(map[string]string)
+ for _, pkg := range pkgs {
+ packageNames[pkg.PackageName] = pkg.Version
+ }
+
+ if packageNames["requests"] != "latest" {
+ t.Errorf("requests: expected latest, got %s", packageNames["requests"])
+ }
+ if packageNames["pytest"] != "latest" {
+ t.Errorf("pytest: expected latest, got %s", packageNames["pytest"])
+ }
+ if packageNames["black"] != "22.3.0" {
+ t.Errorf("black: expected 22.3.0, got %s", packageNames["black"])
+ }
+}
+
+func TestSetupPyParser_ParseTestsRequire(t *testing.T) {
+ content := "from setuptools import setup\n\nsetup(\n tests_require=['pytest'],\n)\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 1 {
+ t.Fatalf("expected 1 package, got %d", len(pkgs))
+ }
+
+ got := pkgs[0]
+ if got.PackageManager != "pypi" || got.PackageName != "pytest" {
+ t.Errorf("got package %s, want pytest", got.PackageName)
+ }
+}
+
+func TestSetupPyParser_ParseNoRequires(t *testing.T) {
+ content := "from setuptools import setup\n\nsetup(\n name='my-package',\n)\n"
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 0 {
+ t.Fatalf("expected 0 packages, got %d", len(pkgs))
+ }
+}
+
+func TestSetupPyParser_ParseWithDoubleQuotes(t *testing.T) {
+ content := `from setuptools import setup
+
+setup(
+ install_requires=[
+ "requests>=2.26.0",
+ "flask==2.0.1",
+ ],
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "setup.py")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ packageNames := make(map[string]string)
+ for _, pkg := range pkgs {
+ packageNames[pkg.PackageName] = pkg.Version
+ }
+
+ if packageNames["requests"] != "latest" {
+ t.Errorf("requests: expected latest, got %s", packageNames["requests"])
+ }
+ if packageNames["flask"] != "2.0.1" {
+ t.Errorf("flask: expected 2.0.1, got %s", packageNames["flask"])
+ }
+}
+
+func TestSetupPyParser_Parse_RealFile(t *testing.T) {
+ filePath := "../../testdata/setup.py"
+ parser := &SetupPyParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ expected := []models.Package{
+ {PackageManager: "pypi", PackageName: "requests", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 6, StartIndex: 9, EndIndex: 25}}},
+ {PackageManager: "pypi", PackageName: "flask", Version: "2.0.1", FilePath: filePath, Locations: []models.Location{{Line: 7, StartIndex: 9, EndIndex: 21}}},
+ {PackageManager: "pypi", PackageName: "six", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 8, StartIndex: 9, EndIndex: 12}}},
+ {PackageManager: "pypi", PackageName: "Pillow", Version: "9.0.0", FilePath: filePath, Locations: []models.Location{{Line: 9, StartIndex: 9, EndIndex: 22}}},
+ {PackageManager: "pypi", PackageName: "cryptography", Version: "2.9.2", FilePath: filePath, Locations: []models.Location{{Line: 10, StartIndex: 9, EndIndex: 28}}},
+ {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 19, StartIndex: 9, EndIndex: 15}}},
+ {PackageManager: "pypi", PackageName: "pytest", Version: "latest", FilePath: filePath, Locations: []models.Location{{Line: 14, StartIndex: 13, EndIndex: 24}}},
+ {PackageManager: "pypi", PackageName: "black", Version: "22.3.0", FilePath: filePath, Locations: []models.Location{{Line: 15, StartIndex: 13, EndIndex: 26}}},
+ }
+
+ testdata.ValidatePackages(t, pkgs, expected)
+}
diff --git a/internal/testdata/pyproject.toml b/internal/testdata/pyproject.toml
new file mode 100644
index 0000000..6f74403
--- /dev/null
+++ b/internal/testdata/pyproject.toml
@@ -0,0 +1,19 @@
+[tool.poetry]
+name = "test-app"
+version = "1.0.0"
+
+[tool.poetry.dependencies]
+python = "^3.9"
+requests = "2.28.2"
+flask = "^2.3.0"
+Pillow = "9.0.0"
+cryptography = "2.9.2"
+
+[tool.poetry.dev-dependencies]
+pytest = "^7.4.0"
+
+[project]
+dependencies = [
+ "numpy>=1.20.0",
+ "pandas==2.0.0",
+]
diff --git a/internal/testdata/setup.cfg b/internal/testdata/setup.cfg
new file mode 100644
index 0000000..dc28049
--- /dev/null
+++ b/internal/testdata/setup.cfg
@@ -0,0 +1,19 @@
+[metadata]
+name = my-package
+version = 0.1.0
+
+[options]
+install_requires =
+ requests>=2.26.0
+ flask==2.0.1
+ six
+ Pillow==9.0.0
+ cryptography==2.9.2
+
+setup_requires =
+ setuptools>=42
+
+[options.extras_require]
+dev =
+ pytest>=6.0
+ black==22.3.0
diff --git a/internal/testdata/setup.py b/internal/testdata/setup.py
new file mode 100644
index 0000000..6fa77a1
--- /dev/null
+++ b/internal/testdata/setup.py
@@ -0,0 +1,22 @@
+from setuptools import setup, find_packages
+
+setup(
+ name='my-package',
+ version='0.1.0',
+ install_requires=[
+ 'requests>=2.26.0',
+ 'flask==2.0.1',
+ 'six',
+ 'Pillow==9.0.0',
+ 'cryptography==2.9.2',
+ ],
+ extras_require={
+ 'dev': [
+ 'pytest>=6.0',
+ 'black==22.3.0',
+ ],
+ },
+ tests_require=[
+ 'pytest',
+ ],
+)
diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go
index e7aad0b..668169d 100644
--- a/pkg/parser/manifest-file-selector.go
+++ b/pkg/parser/manifest-file-selector.go
@@ -18,6 +18,9 @@ const (
GradleBuild
GradleVersionCatalog
SbtBuild
+ SetuptoolsSetupCfg
+ SetuptoolsSetupPy
+ PoetryPyproject
)
// selectManifestFile a method to select a manifest file type by its name
@@ -31,9 +34,10 @@ func selectManifestFile(manifest string) Manifest {
}
if manifestFileExtension == ".txt" {
- //check if file name starts with "requirement" or "packages"
+ // check if file name starts with "requirement", "packages", or "constraint"
if strings.HasPrefix(manifestFileName, "requirement") ||
- strings.HasPrefix(manifestFileName, "packages") {
+ strings.HasPrefix(manifestFileName, "packages") ||
+ strings.HasPrefix(manifestFileName, "constraint") {
return PypiRequirements
}
}
@@ -70,5 +74,17 @@ func selectManifestFile(manifest string) Manifest {
return GradleVersionCatalog
}
+ if manifestFileName == "setup.cfg" {
+ return SetuptoolsSetupCfg
+ }
+
+ if manifestFileName == "setup.py" {
+ return SetuptoolsSetupPy
+ }
+
+ if manifestFileName == "pyproject.toml" {
+ return PoetryPyproject
+ }
+
return -1
}
diff --git a/pkg/parser/manifest-file-selector_test.go b/pkg/parser/manifest-file-selector_test.go
index d5e7188..222a59d 100644
--- a/pkg/parser/manifest-file-selector_test.go
+++ b/pkg/parser/manifest-file-selector_test.go
@@ -93,3 +93,102 @@ func TestManifestFileSelector_ExpectSbtCustom(t *testing.T) {
t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
}
}
+
+func TestManifestFileSelector_ExpectPypiRequirementsTxt(t *testing.T) {
+ manifest := "requirements.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiRequirementsDev(t *testing.T) {
+ manifest := "requirements-dev.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiRequirementSingular(t *testing.T) {
+ manifest := "requirement.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiRequirementSingularDev(t *testing.T) {
+ manifest := "requirement-dev.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiRequirementsWithPath(t *testing.T) {
+ manifest := "/some/path/to/requirements-prod.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiConstraints(t *testing.T) {
+ manifest := "constraints.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiConstraintsDev(t *testing.T) {
+ manifest := "constraints-dev.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPypiConstraintsWithPath(t *testing.T) {
+ manifest := "/some/path/to/constraints-prod.txt"
+ got := selectManifestFile(manifest)
+ want := PypiRequirements
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectSetuptoolsSetupCfg(t *testing.T) {
+ manifest := "setup.cfg"
+ got := selectManifestFile(manifest)
+ want := SetuptoolsSetupCfg
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectSetuptoolsSetupPy(t *testing.T) {
+ manifest := "setup.py"
+ got := selectManifestFile(manifest)
+ want := SetuptoolsSetupPy
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
+
+func TestManifestFileSelector_ExpectPoetryPyproject(t *testing.T) {
+ manifest := "pyproject.toml"
+ got := selectManifestFile(manifest)
+ want := PoetryPyproject
+ if got != want {
+ t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want)
+ }
+}
diff --git a/pkg/parser/parser_factory.go b/pkg/parser/parser_factory.go
index 28c9e0a..03db997 100644
--- a/pkg/parser/parser_factory.go
+++ b/pkg/parser/parser_factory.go
@@ -6,8 +6,10 @@ import (
"github.com/Checkmarx/manifest-parser/internal/parsers/gradle"
"github.com/Checkmarx/manifest-parser/internal/parsers/maven"
"github.com/Checkmarx/manifest-parser/internal/parsers/npm"
+ "github.com/Checkmarx/manifest-parser/internal/parsers/poetry"
"github.com/Checkmarx/manifest-parser/internal/parsers/pypi"
"github.com/Checkmarx/manifest-parser/internal/parsers/sbt"
+ "github.com/Checkmarx/manifest-parser/internal/parsers/setuptools"
)
func ParsersFactory(manifest string) Parser {
@@ -34,6 +36,12 @@ func ParsersFactory(manifest string) Parser {
return &gradle.VersionCatalogParser{}
case SbtBuild:
return &sbt.SbtParser{}
+ case SetuptoolsSetupCfg:
+ return &setuptools.SetupCfgParser{}
+ case SetuptoolsSetupPy:
+ return &setuptools.SetupPyParser{}
+ case PoetryPyproject:
+ return &poetry.PoetryPyprojectParser{}
default:
return nil
}
diff --git a/test/resources/build.sbt b/test/resources/build.sbt
new file mode 100644
index 0000000..986bb8b
--- /dev/null
+++ b/test/resources/build.sbt
@@ -0,0 +1,11 @@
+name := "demo"
+scalaVersion := "2.13.12"
+
+val akkaVersion = "2.8.5"
+
+libraryDependencies ++= Seq(
+ "org.scala-lang" % "scala-library" % "2.13.12",
+ "com.typesafe.akka" %% "akka-actor" % akkaVersion,
+ "org.scalatest" %% "scalatest" % "3.2.18" % Test,
+ "ch.qos.logback" % "logback-classic" % "1.4.14"
+)
From badc164a422c783faf0b8cf68977b651f3c494d5 Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Mon, 8 Jun 2026 14:30:04 +0530
Subject: [PATCH 11/14] gradle and sbt wildcard support
---
internal/parsers/gradle/gradle_parser.go | 25 +++------
internal/parsers/gradle/gradle_parser_test.go | 52 ++++++++++++++++++
internal/parsers/sbt/sbt-parser.go | 6 ++-
internal/parsers/sbt/sbt-parser_test.go | 54 +++++++++++++++++++
4 files changed, 118 insertions(+), 19 deletions(-)
diff --git a/internal/parsers/gradle/gradle_parser.go b/internal/parsers/gradle/gradle_parser.go
index 5ae2abc..421c09f 100644
--- a/internal/parsers/gradle/gradle_parser.go
+++ b/internal/parsers/gradle/gradle_parser.go
@@ -260,8 +260,8 @@ func stripInlineComment(line string) string {
func dependencyStatementComplete(statement string) bool {
kw := configKeywords
patterns := []*regexp.Regexp{
- regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"\)]+)['"]`),
- regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"]+)['"]\s*\)`),
regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
regexp.MustCompile(`(?i)group\s*[:=]\s*['"]([^'"]+)['"].*name\s*[:=]\s*['"]([^'"]+)['"].*version\s*[:=]\s*['"]([^'"]+)['"]`),
@@ -282,8 +282,8 @@ func parseDependencyStatement(statement string, variables map[string]string) []m
kw := configKeywords
patterns := []*regexp.Regexp{
- regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"\)]+)['"]`),
- regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"\)]+)['"]\s*\)`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*['"]([^'"]+)['"]`),
+ regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*['"]([^'"]+)['"]\s*\)`),
regexp.MustCompile(`(?i)\b(` + kw + `)\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]`),
regexp.MustCompile(`(?i)\b(` + kw + `)\s*\(\s*group\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*name\s*[:=]\s*['"]([^'"]+)['"]\s*,\s*version\s*[:=]\s*['"]([^'"]+)['"]\s*\)`),
}
@@ -390,20 +390,9 @@ func cleanVersion(version string) string {
if version == "" {
return "latest"
}
- // Remove brackets for ranges, take the lower bound
- if strings.HasPrefix(version, "[") && strings.HasSuffix(version, "]") {
- version = strings.Trim(version, "[]")
- parts := strings.Split(version, ",")
- if len(parts) > 0 {
- version = strings.TrimSpace(parts[0])
- }
- }
- if strings.HasPrefix(version, "(") && strings.HasSuffix(version, ")") {
- version = strings.Trim(version, "()")
- parts := strings.Split(version, ",")
- if len(parts) > 0 {
- version = strings.TrimSpace(parts[0])
- }
+ // Check for any range or wildcard patterns
+ if strings.ContainsAny(version, "[]()^~*><") || strings.Contains(version, "+") {
+ return "latest"
}
// For now, keep classifiers as is
return version
diff --git a/internal/parsers/gradle/gradle_parser_test.go b/internal/parsers/gradle/gradle_parser_test.go
index 71d948e..20a4c1a 100644
--- a/internal/parsers/gradle/gradle_parser_test.go
+++ b/internal/parsers/gradle/gradle_parser_test.go
@@ -189,6 +189,58 @@ dependencies {
},
expectedError: false,
},
+ {
+ name: "gradle with version ranges",
+ content: `dependencies {
+ implementation 'org.springframework:spring-core:[1.0.0,2.0.0)'
+ implementation 'org.junit:junit:(1.0,2.0]'
+}`,
+ expectedPkgs: []models.Package{
+ {
+ PackageManager: "gradle",
+ PackageName: "org.springframework:spring-core",
+ Version: "latest",
+ Locations: []models.Location{
+ {Line: 1},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "org.junit:junit",
+ Version: "latest",
+ Locations: []models.Location{
+ {Line: 2},
+ },
+ },
+ },
+ expectedError: false,
+ },
+ {
+ name: "gradle with prefix wildcards",
+ content: `dependencies {
+ implementation 'org.springframework:spring-core:1.0.+'
+ implementation 'org.junit:junit:4.12.*'
+}`,
+ expectedPkgs: []models.Package{
+ {
+ PackageManager: "gradle",
+ PackageName: "org.springframework:spring-core",
+ Version: "latest",
+ Locations: []models.Location{
+ {Line: 1},
+ },
+ },
+ {
+ PackageManager: "gradle",
+ PackageName: "org.junit:junit",
+ Version: "latest",
+ Locations: []models.Location{
+ {Line: 2},
+ },
+ },
+ },
+ expectedError: false,
+ },
}
for _, tt := range tests {
diff --git a/internal/parsers/sbt/sbt-parser.go b/internal/parsers/sbt/sbt-parser.go
index 0946913..954e729 100644
--- a/internal/parsers/sbt/sbt-parser.go
+++ b/internal/parsers/sbt/sbt-parser.go
@@ -51,6 +51,10 @@ func resolveVersion(version string, vars map[string]string) string {
if version == "" {
return "latest"
}
+ // Check for any range or wildcard patterns
+ if strings.ContainsAny(version, "[]()^~*><") || strings.Contains(version, "+") {
+ return "latest"
+ }
// If it looks like a literal version (starts with digit or contains dots/hyphens typical of versions), return as-is
if len(version) > 0 && (version[0] >= '0' && version[0] <= '9') {
return version
@@ -214,7 +218,7 @@ func (p *SbtParser) Parse(manifestFile string) ([]models.Package, error) {
var version string
if quotedVersion != "" {
- version = quotedVersion
+ version = resolveVersion(quotedVersion, vars)
} else if bareVersion != "" {
version = resolveVersion(bareVersion, vars)
} else {
diff --git a/internal/parsers/sbt/sbt-parser_test.go b/internal/parsers/sbt/sbt-parser_test.go
index 9ce41fd..f0d07ce 100644
--- a/internal/parsers/sbt/sbt-parser_test.go
+++ b/internal/parsers/sbt/sbt-parser_test.go
@@ -417,6 +417,60 @@ func TestResolveVersion(t *testing.T) {
}
}
+func TestParseWithVersionRanges(t *testing.T) {
+ content := `libraryDependencies ++= Seq(
+ "org.springframework" % "spring-core" % "[1.0.0,2.0.0)",
+ "org.junit" % "junit" % "(1.0,2.0]"
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version 'latest' for range, got %q", pkgs[0].Version)
+ }
+ if pkgs[1].Version != "latest" {
+ t.Errorf("expected version 'latest' for range, got %q", pkgs[1].Version)
+ }
+}
+
+func TestParseWithPrefixWildcards(t *testing.T) {
+ content := `libraryDependencies ++= Seq(
+ "org.springframework" % "spring-core" % "1.0.+",
+ "org.junit" % "junit" % "4.12.*"
+)
+`
+ tmpDir := t.TempDir()
+ filePath := filepath.Join(tmpDir, "build.sbt")
+ os.WriteFile(filePath, []byte(content), 0644)
+
+ parser := &SbtParser{}
+ pkgs, err := parser.Parse(filePath)
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if len(pkgs) != 2 {
+ t.Fatalf("expected 2 packages, got %d", len(pkgs))
+ }
+
+ if pkgs[0].Version != "latest" {
+ t.Errorf("expected version 'latest' for wildcard, got %q", pkgs[0].Version)
+ }
+ if pkgs[1].Version != "latest" {
+ t.Errorf("expected version 'latest' for wildcard, got %q", pkgs[1].Version)
+ }
+}
+
func TestStripComments(t *testing.T) {
tests := []struct {
name string
From aafaf4cbca9d1cc3ab37d94d8caa9326cdb4dc58 Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Mon, 8 Jun 2026 15:02:06 +0530
Subject: [PATCH 12/14] adding sha for checkout actions
---
.github/workflows/release.yml | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 299461f..372757b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -16,9 +16,11 @@ on:
jobs:
tag-and-release:
runs-on: ubuntu-latest
+ permissions:
+ contents: write
steps:
- name: Checkout repository
- uses: actions/checkout@v4
+ uses: actions/checkout@a5ac7e51b41094c7bf3b3610b3f7ca9a3f1aead5 # v4
with:
fetch-depth: 0 # need full history for tags
@@ -59,7 +61,7 @@ jobs:
git push origin "${{ env.new_version }}"
- name: Create GitHub Release
- uses: actions/create-release@v1
+ uses: actions/create-release@e1c7b3e6cb9a8b5a88d8b756ade9127e60a26433
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
From 6b5ebcc42e9faae91296ff8f541ed18c37615acf Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Mon, 8 Jun 2026 15:15:02 +0530
Subject: [PATCH 13/14] Using correct sha for cehckout actions in release.yml
---
.github/workflows/release.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 372757b..da79c78 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,7 +20,7 @@ jobs:
contents: write
steps:
- name: Checkout repository
- uses: actions/checkout@a5ac7e51b41094c7bf3b3610b3f7ca9a3f1aead5 # v4
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0 # need full history for tags
@@ -61,7 +61,7 @@ jobs:
git push origin "${{ env.new_version }}"
- name: Create GitHub Release
- uses: actions/create-release@e1c7b3e6cb9a8b5a88d8b756ade9127e60a26433
+ uses: actions/create-release@0cb9c9b65d5d1901c1f53e5e66eaf4afd303e70e # v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
From a264fda49be3920c87dbed1b20491181e86a103a Mon Sep 17 00:00:00 2001
From: atishj99 <141334503+cx-atish-jadhav@users.noreply.github.com>
Date: Thu, 11 Jun 2026 15:29:22 +0530
Subject: [PATCH 14/14] log removal and minor fixes
---
.../parsers/poetry/poetry-pyproject-parser.go | 3 +-
.../parsers/setuptools/setup_cfg_parser.go | 10 -----
.../parsers/setuptools/setup_py_parser.go | 39 -------------------
3 files changed, 2 insertions(+), 50 deletions(-)
diff --git a/internal/parsers/poetry/poetry-pyproject-parser.go b/internal/parsers/poetry/poetry-pyproject-parser.go
index 96458e9..8032363 100644
--- a/internal/parsers/poetry/poetry-pyproject-parser.go
+++ b/internal/parsers/poetry/poetry-pyproject-parser.go
@@ -115,6 +115,7 @@ func parseLockFile(manifestDir string) map[string]string {
if strings.HasPrefix(trimmed, "name = ") {
currentPackageName = strings.TrimSpace(strings.TrimPrefix(trimmed, "name = "))
currentPackageName = strings.Trim(currentPackageName, "\"")
+ currentPackageName = strings.ToLower(currentPackageName)
continue
}
@@ -140,7 +141,7 @@ func resolveVersionWithLock(pkgName, version string, lockVersions map[string]str
return strings.TrimSpace(version[2:])
}
- if lockVersion, found := lockVersions[pkgName]; found {
+ if lockVersion, found := lockVersions[strings.ToLower(pkgName)]; found {
return lockVersion
}
diff --git a/internal/parsers/setuptools/setup_cfg_parser.go b/internal/parsers/setuptools/setup_cfg_parser.go
index 2e2f08c..737d5a8 100644
--- a/internal/parsers/setuptools/setup_cfg_parser.go
+++ b/internal/parsers/setuptools/setup_cfg_parser.go
@@ -2,7 +2,6 @@ package setuptools
import (
"bufio"
- "log"
"os"
"regexp"
"strings"
@@ -61,13 +60,10 @@ func computeIndices(raw, pkgName string) (int, int) {
func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
file, err := os.Open(manifestFile)
if err != nil {
- log.Printf("Error: Failed to open %s: %v", manifestFile, err)
return nil, err
}
defer file.Close()
- log.Printf("Debug: Parsing setup.cfg file: %s", manifestFile)
-
var packages []models.Package
scanner := bufio.NewScanner(file)
lineNum := 0
@@ -90,7 +86,6 @@ func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") {
currentSection = line[1 : len(line)-1]
currentKey = ""
- log.Printf("Debug: Found section [%s] at line %d", currentSection, lineNum)
lineNum++
continue
}
@@ -114,7 +109,6 @@ func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
if ok {
version := extractVersion(depLine)
startCol, endCol := computeIndices(raw, pkgName)
- log.Printf("Debug: Found dependency %s@%s at line %d in section [%s]", pkgName, version, lineNum, currentSection)
packages = append(packages, models.Package{
PackageManager: "pypi",
PackageName: pkgName,
@@ -126,8 +120,6 @@ func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
EndIndex: endCol,
}},
})
- } else {
- log.Printf("Warning: Could not parse package name from line %d: %s", lineNum, depLine)
}
}
lineNum++
@@ -174,10 +166,8 @@ func (p *SetupCfgParser) Parse(manifestFile string) ([]models.Package, error) {
}
if err := scanner.Err(); err != nil {
- log.Printf("Error: Scanner error while reading %s: %v", manifestFile, err)
return nil, err
}
- log.Printf("Debug: Successfully parsed %s, found %d dependencies", manifestFile, len(packages))
return packages, nil
}
diff --git a/internal/parsers/setuptools/setup_py_parser.go b/internal/parsers/setuptools/setup_py_parser.go
index 5da630c..32ba437 100644
--- a/internal/parsers/setuptools/setup_py_parser.go
+++ b/internal/parsers/setuptools/setup_py_parser.go
@@ -1,7 +1,6 @@
package setuptools
import (
- "log"
"os"
"regexp"
"strings"
@@ -49,7 +48,6 @@ func extractPackageNamePy(line string, re *regexp.Regexp) (string, bool) {
func findPositionInFile(fullText string, depString string, searchStartPos int) (lineNum, startIndex, endIndex int) {
searchPos := strings.Index(fullText[searchStartPos:], depString)
if searchPos == -1 {
- log.Printf("Warning: Could not locate '%s' in file after position %d", depString, searchStartPos)
return 0, 0, 0
}
@@ -101,7 +99,6 @@ func extractDepsFromListContent(content string, fullText string, searchStartPos
afterQuote++
}
if afterQuote < len(content) && content[afterQuote] == ':' {
- log.Printf("Skipping dict key: %s", dep)
continue
}
}
@@ -126,7 +123,6 @@ func extractDepsFromListContent(content string, fullText string, searchStartPos
afterQuote++
}
if afterQuote < len(content) && content[afterQuote] == ':' {
- log.Printf("Skipping dict key: %s", dep)
continue
}
}
@@ -149,7 +145,6 @@ func extractDepsFromListContent(content string, fullText string, searchStartPos
pkgName, ok := extractPackageNamePy(depLine, pkgNameRe)
if !ok {
- log.Printf("Warning: Could not extract package name from: %s", depLine)
continue
}
@@ -241,70 +236,51 @@ func extractDependencies(setupText string, key string, fullText string, searchSt
keyPattern := key + "="
keyIndex := strings.Index(setupText, keyPattern)
if keyIndex == -1 {
- log.Printf("Debug: Key '%s' not found in setup() call", key)
return deps
}
- log.Printf("Debug: Found %s at position %d", key, keyIndex)
-
startPos := keyIndex + len(keyPattern)
for startPos < len(setupText) && (setupText[startPos] == ' ' || setupText[startPos] == '\t') {
startPos++
}
if startPos >= len(setupText) {
- log.Printf("Warning: No bracket found after %s", key)
return deps
}
content := extractListContent(setupText, startPos)
if content == "" {
- log.Printf("Warning: Could not extract list content for %s", key)
return deps
}
- log.Printf("Debug: Extracted %d characters from %s content", len(content), key)
-
deps = extractDepsFromListContent(content, fullText, searchStartPos)
- log.Printf("Debug: Found %d dependencies in %s", len(deps), key)
return deps
}
func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) {
data, err := os.ReadFile(manifestFile)
if err != nil {
- log.Printf("Error: Failed to read %s: %v", manifestFile, err)
return nil, err
}
- log.Printf("Debug: Parsing setup.py file: %s (%d bytes)", manifestFile, len(data))
-
text := string(data)
var packages []models.Package
setupStart := strings.Index(text, "setup(")
if setupStart == -1 {
- log.Printf("Warning: setup() call not found in %s", manifestFile)
setupStart = 0
} else {
- log.Printf("Debug: Found setup() call at position %d", setupStart)
setupStart += len("setup")
}
setupContent := extractListContent(text, setupStart)
if setupContent == "" && setupStart > 0 {
- log.Printf("Warning: Could not extract setup() content from %s", manifestFile)
setupContent = text[setupStart:]
- } else if setupContent != "" {
- log.Printf("Debug: Extracted setup() content, %d bytes", len(setupContent))
}
for _, key := range []string{"install_requires", "setup_requires", "tests_require"} {
keyPosInText := strings.Index(text, key+"=")
deps := extractDependencies(setupContent, key, text, keyPosInText)
- if len(deps) == 0 {
- log.Printf("Debug: No %s found in setup.py", key)
- }
for _, dep := range deps {
packages = append(packages, models.Package{
PackageManager: "pypi",
@@ -317,13 +293,11 @@ func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) {
EndIndex: dep.endIndex,
}},
})
- log.Printf("Debug: Found dependency %s@%s at line %d in %s", dep.name, dep.version, dep.lineNum, key)
}
}
extrasStart := strings.Index(setupContent, "extras_require")
if extrasStart != -1 {
- log.Printf("Debug: Found extras_require at position %d", extrasStart)
eqIndex := strings.Index(setupContent[extrasStart:], "=")
if eqIndex != -1 {
dictStartPos := extrasStart + eqIndex + 1
@@ -333,10 +307,8 @@ func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) {
if dictStartPos < len(setupContent) {
dictContent := extractListContent(setupContent, dictStartPos)
if dictContent != "" {
- log.Printf("Debug: Extracted %d characters from extras_require", len(dictContent))
extrasStartInText := strings.Index(text, "extras_require")
deps := extractDepsFromListContent(dictContent, text, extrasStartInText)
- log.Printf("Debug: Found %d dependencies in extras_require", len(deps))
for _, dep := range deps {
packages = append(packages, models.Package{
PackageManager: "pypi",
@@ -349,21 +321,10 @@ func (p *SetupPyParser) Parse(manifestFile string) ([]models.Package, error) {
EndIndex: dep.endIndex,
}},
})
- log.Printf("Debug: Found dependency %s@%s at line %d in extras_require", dep.name, dep.version, dep.lineNum)
}
- } else {
- log.Printf("Warning: Could not extract dict content for extras_require")
}
- } else {
- log.Printf("Warning: No opening bracket found for extras_require")
}
- } else {
- log.Printf("Warning: No equals sign found after extras_require")
}
- } else {
- log.Printf("Debug: extras_require not found in setup() call")
}
-
- log.Printf("Debug: Successfully parsed %s, found %d dependencies", manifestFile, len(packages))
return packages, nil
}