diff --git a/internal/parsers/pypi/pypi-parser.go b/internal/parsers/pypi/pypi-parser.go index f984480..4e421ea 100644 --- a/internal/parsers/pypi/pypi-parser.go +++ b/internal/parsers/pypi/pypi-parser.go @@ -10,9 +10,114 @@ import ( "github.com/Checkmarx/manifest-parser/pkg/parser/models" ) -// PypiParser implements parsing of requirements.txt +// PypiParser implements parsing of requirements.txt and related Python dependency files. +// Supports formats generated by pip freeze, pip-compile, pip-tools, uv export, and Poetry export. type PypiParser struct{} +// logicalLine represents a single dependency entry that may span multiple physical lines +// when line continuations (\) are used. +type logicalLine struct { + content string // joined and hash-stripped content + firstLine int // 0-indexed line number of the first physical line + rawFirst string // raw text of the first physical line (for index computation) +} + +// pipOptionPrefixes lists prefixes of pip CLI option lines that should be skipped. +var pipOptionPrefixes = []string{ + "-i ", "--index-url", "--extra-index-url", + "-r ", "--requirement", + "-c ", "--constraint", + "-e ", "--editable", + "-f ", "--find-links", + "--no-binary", "--only-binary", + "--pre", "--trusted-host", + "--hash=", +} + +// isPipOptionLine returns true if the trimmed line is a pip CLI option rather than a package spec. +func isPipOptionLine(trimmed string) bool { + for _, prefix := range pipOptionPrefixes { + if strings.HasPrefix(trimmed, prefix) { + return true + } + } + return false +} + +// stripHashOptions removes --hash= tokens from a line. +func stripHashOptions(line string) string { + tokens := strings.Fields(line) + var filtered []string + for _, tok := range tokens { + if !strings.HasPrefix(tok, "--hash=") { + filtered = append(filtered, tok) + } + } + return strings.Join(filtered, " ") +} + +// preprocessLines joins physical lines connected by trailing backslashes into logical lines, +// and strips --hash= options from the result. +func preprocessLines(lines []string) []logicalLine { + var result []logicalLine + var accumulator []string + firstLine := -1 + rawFirst := "" + + for i, raw := range lines { + trimmed := strings.TrimSpace(raw) + + if firstLine == -1 { + firstLine = i + rawFirst = raw + } + + if strings.HasSuffix(trimmed, "\\") { + // Strip the trailing backslash and accumulate + trimmed = strings.TrimSuffix(trimmed, "\\") + trimmed = strings.TrimSpace(trimmed) + if trimmed != "" { + accumulator = append(accumulator, trimmed) + } + continue + } + + // Line does not end with \, so this completes the logical line + if trimmed != "" { + accumulator = append(accumulator, trimmed) + } + + joined := strings.Join(accumulator, " ") + joined = stripHashOptions(joined) + joined = strings.TrimSpace(joined) + + result = append(result, logicalLine{ + content: joined, + firstLine: firstLine, + rawFirst: rawFirst, + }) + + // Reset for next logical line + accumulator = nil + firstLine = -1 + rawFirst = "" + } + + // Handle any remaining accumulated content (file ended with \) + if len(accumulator) > 0 { + joined := strings.Join(accumulator, " ") + joined = stripHashOptions(joined) + joined = strings.TrimSpace(joined) + result = append(result, logicalLine{ + content: joined, + firstLine: firstLine, + rawFirst: rawFirst, + }) + } + + return result +} + func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFile string) (string, bool) { if match := re.FindStringSubmatch(line); match != nil { return match[1], true @@ -24,6 +129,13 @@ func extractPackageName(line string, re *regexp.Regexp, lineNum int, manifestFil func extractVersion(line string) string { var version string switch { + case strings.Contains(line, "==="): + parts := strings.SplitN(line, "===", 2) + if len(parts) == 2 { + version = strings.TrimSpace(parts[1]) + } else { + version = "latest" + } case strings.Contains(line, "=="): parts := strings.SplitN(line, "==", 2) if len(parts) == 2 { @@ -40,6 +152,52 @@ func extractVersion(line string) string { return version } +// vcsSchemes lists VCS prefixes used in pip requirements. +var vcsSchemes = []string{"git+", "hg+", "svn+", "bzr+"} + +// isVCSRequirement returns true if the line is a VCS-based requirement. +func isVCSRequirement(line string) bool { + for _, scheme := range vcsSchemes { + if strings.HasPrefix(line, scheme) { + return true + } + } + return false +} + +// extractVCSPackageName extracts the package name from a VCS requirement line +// using the #egg= fragment. Returns empty string if not found. +func extractVCSPackageName(line string) string { + if idx := strings.Index(line, "#egg="); idx >= 0 { + egg := line[idx+5:] + // egg name may be followed by & or whitespace + if ampIdx := strings.IndexAny(egg, "& \t"); ampIdx >= 0 { + egg = egg[:ampIdx] + } + return strings.TrimSpace(egg) + } + return "" +} + +// isURLRequirement returns true if the line contains a PEP 508 URL requirement (pkg @ URL). +func isURLRequirement(line string) bool { + return strings.Contains(line, " @ ") +} + +// extractURLPackageName extracts the package name from a URL requirement (pkg @ https://...). +func extractURLPackageName(line string) string { + parts := strings.SplitN(line, " @ ", 2) + if len(parts) == 2 { + name := strings.TrimSpace(parts[0]) + // Strip extras like pkg[extra] → pkg + if bracketIdx := strings.Index(name, "["); bracketIdx >= 0 { + name = name[:bracketIdx] + } + return name + } + return "" +} + func computeIndices(raw, pkgName string) (int, int) { // Find the start index of the package name startIdx := strings.Index(raw, pkgName) @@ -74,19 +232,55 @@ func (p *PypiParser) Parse(manifestFile string) ([]models.Package, error) { } defer file.Close() - var packages []models.Package + // Read all lines into a slice + var lines []string scanner := bufio.NewScanner(file) - lineNum := 0 + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + if err := scanner.Err(); err != nil { + return nil, err + } + // Preprocess: join continuation lines and strip hash options + logicalLines := preprocessLines(lines) + + var packages []models.Package re := regexp.MustCompile(`^([a-zA-Z0-9_\-\.]+)(?:\[.*\])?(?:[>==4.2,<6.0\nmylib===1.0.dev5\n-r other-requirements.txt\n--index-url https://pypi.org/simple\n" + tmpDir := t.TempDir() + filePath := filepath.Join(tmpDir, "requirements.txt") + os.WriteFile(filePath, []byte(content), 0644) + + parser := &PypiParser{} + pkgs, err := parser.Parse(filePath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(pkgs) != 5 { + t.Fatalf("expected 5 packages, got %d", len(pkgs)) + } + + // flask==3.1.0 + if pkgs[0].PackageName != "flask" || pkgs[0].Version != "3.1.0" { + t.Errorf("pkg 0: got %q==%q, want flask==3.1.0", pkgs[0].PackageName, pkgs[0].Version) + } + // requests @ URL + if pkgs[1].PackageName != "requests" || pkgs[1].Version != "latest" { + t.Errorf("pkg 1: got %q==%q, want requests==latest", pkgs[1].PackageName, pkgs[1].Version) + } + // git+...#egg=custom-pkg + if pkgs[2].PackageName != "custom-pkg" || pkgs[2].Version != "latest" { + t.Errorf("pkg 2: got %q==%q, want custom-pkg==latest", pkgs[2].PackageName, pkgs[2].Version) + } + // django>=3.2,<4.0 + if pkgs[3].PackageName != "django" || pkgs[3].Version != "latest" { + t.Errorf("pkg 3: got %q==%q, want django==latest", pkgs[3].PackageName, pkgs[3].Version) + } + // mylib===1.0.dev5 + if pkgs[4].PackageName != "mylib" || pkgs[4].Version != "1.0.dev5" { + t.Errorf("pkg 4: got %q==%q, want mylib==1.0.dev5", pkgs[4].PackageName, pkgs[4].Version) + } +} diff --git a/pkg/parser/manifest-file-selector.go b/pkg/parser/manifest-file-selector.go index 2710f99..98e5dab 100644 --- a/pkg/parser/manifest-file-selector.go +++ b/pkg/parser/manifest-file-selector.go @@ -28,9 +28,10 @@ func selectManifestFile(manifest string) Manifest { } if manifestFileExtension == ".txt" { - //check if file name starts with "requirement" or "packages" + //check if file name starts with "requirement", "packages", or "constraint" if strings.HasPrefix(manifestFileName, "requirement") || - strings.HasPrefix(manifestFileName, "packages") { + strings.HasPrefix(manifestFileName, "packages") || + strings.HasPrefix(manifestFileName, "constraint") { return PypiRequirements } } diff --git a/pkg/parser/manifest-file-selector_test.go b/pkg/parser/manifest-file-selector_test.go index 8d4d91c..fbdf1aa 100644 --- a/pkg/parser/manifest-file-selector_test.go +++ b/pkg/parser/manifest-file-selector_test.go @@ -66,3 +66,75 @@ func TestManifestFileSelector_ExpectGoMod(t *testing.T) { t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) } } + +func TestManifestFileSelector_ExpectPypiRequirementsTxt(t *testing.T) { + manifest := "requirements.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementsDev(t *testing.T) { + manifest := "requirements-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementSingular(t *testing.T) { + manifest := "requirement.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementSingularDev(t *testing.T) { + manifest := "requirement-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiRequirementsWithPath(t *testing.T) { + manifest := "/some/path/to/requirements-prod.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraints(t *testing.T) { + manifest := "constraints.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraintsDev(t *testing.T) { + manifest := "constraints-dev.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} + +func TestManifestFileSelector_ExpectPypiConstraintsWithPath(t *testing.T) { + manifest := "/some/path/to/constraints-prod.txt" + got := selectManifestFile(manifest) + want := PypiRequirements + if got != want { + t.Errorf("selectManifestFile(%q) = %v; want %v", manifest, got, want) + } +} diff --git a/test/resources/requirements-pip-compile.txt b/test/resources/requirements-pip-compile.txt new file mode 100644 index 0000000..4a12b22 --- /dev/null +++ b/test/resources/requirements-pip-compile.txt @@ -0,0 +1,14 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile requirements.in +# +asgiref==3.8.1 + # via django +django==5.2.13 + # via -r requirements.in +sqlparse==0.5.5 + # via django +tzdata==2025.3 + # via django diff --git a/test/resources/requirements-pip-freeze.txt b/test/resources/requirements-pip-freeze.txt new file mode 100644 index 0000000..77210e9 --- /dev/null +++ b/test/resources/requirements-pip-freeze.txt @@ -0,0 +1,4 @@ +asgiref==3.8.1 +Django==5.2.13 +sqlparse==0.5.5 +tzdata==2025.3 diff --git a/test/resources/requirements-uv-export.txt b/test/resources/requirements-uv-export.txt new file mode 100644 index 0000000..281b691 --- /dev/null +++ b/test/resources/requirements-uv-export.txt @@ -0,0 +1,33 @@ +# This file was autogenerated by uv via the following command: +# uv export --no-dev --output-file requirements.txt +asgiref==3.8.1 \ + --hash=sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47 \ + --hash=sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590 + # via + # django + # sample-app +django==5.2.13 \ + --hash=sha256:a5cc92645b8eb50e38cdd2f9e6a12db171c61e3e6172a1a51b85e8ebc2291b42 \ + --hash=sha256:b5bb1d13cfe3b22e8a31d7a0bae2777a9c019a81d59ef4f72c8581f0d3e35f0e + # via sample-app +pycryptodome==3.21.0 \ + --hash=sha256:12ce0e6d32c4a63433cf26e9f5be9fd3a1c2cbe2bce1c3a834e3b5a43e8e82e0 \ + --hash=sha256:4d2cd4a5c4b939f2b5e2f8611a8b5c7f8c5a2de1f75c3e7c5e1c8f5a3c2b1e0a \ + --hash=sha256:7e3c5c2f1a4b8d9e0f1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a + # via sample-app +sqlparse==0.5.5 \ + --hash=sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca \ + --hash=sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946e4a7cf1a8b6e26cdc4b4 + # via + # django + # sample-app +typing-extensions==4.12.2 \ + --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ + --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 + # via + # asgiref + # sample-app +tzdata==2025.3 ; sys_platform == 'win32' \ + --hash=sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1 \ + --hash=sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7 + # via django