Skip to content
7 changes: 6 additions & 1 deletion cli/docs/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ const (
RunNative = "run-native"

// Unique git flags
gitPrefix = "git-"
InputFile = "input-file"
ScmType = "scm-type"
ScmApiUrl = "scm-api-url"
Expand All @@ -174,6 +175,8 @@ const (
RepoName = "repo-name"
Months = "months"
DetailedSummary = "detailed-summary"
CacheValidity = "cache-validity"
GitThreads = gitPrefix + Threads
)

// Mapping between security commands (key) and their flags (key).
Expand Down Expand Up @@ -225,7 +228,7 @@ var commandFlags = map[string][]string{
CurationOutput, WorkingDirs, Threads, RequirementsFile, InsecureTls, useWrapperAudit, UseIncludedBuilds, SolutionPath, DockerImageName, IncludeCachedPackages, LegacyPeerDeps, RunNative,
},
GitCountContributors: {
InputFile, ScmType, ScmApiUrl, Token, Owner, RepoName, Months, DetailedSummary, InsecureTls,
InputFile, ScmType, ScmApiUrl, Token, Owner, RepoName, Months, DetailedSummary, InsecureTls, GitThreads, CacheValidity,
},
SastServer: {
Port,
Expand Down Expand Up @@ -373,6 +376,8 @@ var flagsMap = map[string]components.Flag{
RepoName: components.NewStringFlag(RepoName, "List of semicolon-separated(;) repositories names to analyze, If not provided all repositories related to the provided owner will be analyzed."),
Months: components.NewStringFlag(Months, "Number of months to analyze.", components.WithIntDefaultValue(contributors.DefaultContContributorsMonths)),
DetailedSummary: components.NewBoolFlag(DetailedSummary, "Set to true to get a contributors detailed summary."),
CacheValidity: components.NewStringFlag(CacheValidity, "Number of days a cached repository result remains valid. Set to 0 to ignore cache and force a full re-scan.", components.WithIntDefaultValue(contributors.DefaultCacheValidity)),
GitThreads: components.NewStringFlag(Threads, "Number of parallel threads for scanning repositories.", components.WithIntDefaultValue(contributors.DefaultThreads)),
}

func GetCommandFlags(cmdKey string) []components.Flag {
Expand Down
18 changes: 18 additions & 0 deletions cli/gitcommands.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,24 @@ func GetCountContributorsParams(c *components.Context) (*contributors.CountContr
}
// DetailedSummery
params.DetailedSummery = c.GetBoolFlagValue(flags.DetailedSummary)
// CacheValidity
cacheValidity, err := c.WithDefaultIntFlagValue(flags.CacheValidity, contributors.DefaultCacheValidity)
if err != nil {
return nil, err
}
if cacheValidity < 0 {
return nil, errorutils.CheckErrorf("Invalid value for '--%s=%d'. Must be 0 (skip cache) or a positive number of days.", flags.CacheValidity, cacheValidity)
}
params.CacheValidity = cacheValidity
// Threads
threads, err := c.GetIntFlagValue(flags.Threads)
if err != nil {
return nil, err
}
if threads <= 0 {
return nil, errorutils.CheckErrorf("Invalid value for '--%s=%d'. If set, should be a positive number.", flags.Threads, threads)
}
params.Threads = threads
return &params, nil
}

Expand Down
140 changes: 140 additions & 0 deletions commands/git/contributors/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package contributors

import (
"crypto/sha256"
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"

secutils "github.com/jfrog/jfrog-cli-security/utils"
"github.com/jfrog/jfrog-client-go/utils/log"
)

// cacheContributorEntry serializes one entry of the uniqueContributors map.
// Go's encoding/json does not support struct keys in maps, so we use a slice instead.
type cacheContributorEntry struct {
Key BasicContributor `json:"key"`
Value Contributor `json:"value"`
}

const (
DefaultCacheValidity = 3 // days
)

// repoCacheFile holds the data persisted to disk for one fully-scanned repository.
type repoCacheFile struct {
Repo string `json:"repo"`
ScannedAt string `json:"scanned_at"`
Months int `json:"last_months_analyzed"`
UniqueContributors []cacheContributorEntry `json:"unique_contributors"`
DetailedContributors map[string]map[string]ContributorDetailedSummary `json:"detailed_contributors,omitempty"`
DetailedRepos map[string]map[string]RepositoryDetailedSummary `json:"detailed_repos,omitempty"`
TotalCommits int `json:"total_commits"`
Skipped bool `json:"skipped,omitempty"`
}

// getRepoCacheDir returns (and creates) the cache directory for a specific combination of
// scm-type / scm-api-url / owner / months so caches never collide across configurations.
func getRepoCacheDir(params BasicGitServerParams, months int) (string, error) {
base, err := secutils.GetContributorsCacheDir()
if err != nil {
return "", fmt.Errorf("failed to determine contributors cache directory: %w", err)
}
key := fmt.Sprintf("%d|%s|%s|%d", params.ScmType, params.ScmApiUrl, params.Owner, months)
hash := fmt.Sprintf("%x", sha256.Sum256([]byte(key)))
dir := filepath.Join(base, hash)
if err = os.MkdirAll(dir, 0700); err != nil {
return "", fmt.Errorf("failed to create cache directory %s: %w", dir, err)
}
return dir, nil
}

// readRepoCache reads the cache entry for repo. Returns nil when the file does not exist
// or when the entry is older than maxAge. maxAge == 0 means "always expired" (skip cache).
func readRepoCache(cacheDir, repo string, maxAge time.Duration) *repoScanResult {
if maxAge <= 0 {
return nil
}
path := filepath.Join(cacheDir, sanitizeFilename(repo)+".json")
data, err := os.ReadFile(path)
if err != nil {
// File simply doesn't exist yet — not an error worth logging.
return nil
}
var entry repoCacheFile
if err = json.Unmarshal(data, &entry); err != nil {
log.Warn(fmt.Sprintf("Contributors cache: failed to parse cache file %s: %v", path, err))
return nil
}
scannedAt, err := time.Parse(time.RFC3339, entry.ScannedAt)
if err != nil {
log.Warn(fmt.Sprintf("Contributors cache: invalid scanned_at in %s: %v", path, err))
return nil
}
if time.Since(scannedAt) > maxAge {
log.Debug(fmt.Sprintf("Contributors cache: entry for %q expired (scanned %s ago)", repo, time.Since(scannedAt).Round(time.Second)))
return nil
}
log.Debug(fmt.Sprintf("Contributors cache: using cached data for repo %q (scanned at %s)", repo, entry.ScannedAt))
uniqueContributors := make(map[BasicContributor]Contributor, len(entry.UniqueContributors))
for _, e := range entry.UniqueContributors {
uniqueContributors[e.Key] = e.Value
}
return &repoScanResult{
repo: entry.Repo,
uniqueContributors: uniqueContributors,
detailedContributors: entry.DetailedContributors,
detailedRepos: entry.DetailedRepos,
totalCommits: entry.TotalCommits,
skipped: entry.Skipped,
}
}

// writeRepoCache persists the scan result for repo to disk atomically (write tmp → rename).
func writeRepoCache(cacheDir, repo string, result repoScanResult, months int) {
uniqueEntries := make([]cacheContributorEntry, 0, len(result.uniqueContributors))
for k, v := range result.uniqueContributors {
uniqueEntries = append(uniqueEntries, cacheContributorEntry{Key: k, Value: v})
}
entry := repoCacheFile{
Repo: result.repo,
ScannedAt: time.Now().UTC().Format(time.RFC3339),
Months: months,
UniqueContributors: uniqueEntries,
DetailedContributors: result.detailedContributors,
DetailedRepos: result.detailedRepos,
TotalCommits: result.totalCommits,
Skipped: result.skipped,
}
data, err := json.Marshal(entry)
if err != nil {
log.Warn(fmt.Sprintf("Contributors cache: failed to marshal cache for repo %q: %v", repo, err))
return
}
finalPath := filepath.Join(cacheDir, sanitizeFilename(repo)+".json")
tmpPath := finalPath + ".tmp"
if err = os.WriteFile(tmpPath, data, 0600); err != nil {
log.Warn(fmt.Sprintf("Contributors cache: failed to write tmp file %s: %v", tmpPath, err))
return
}
if err = os.Rename(tmpPath, finalPath); err != nil {
log.Warn(fmt.Sprintf("Contributors cache: failed to rename %s → %s: %v", tmpPath, finalPath, err))
_ = os.Remove(tmpPath)
}
}

// sanitizeFilename replaces characters that are unsafe in file names (e.g. '/' in repo paths).
func sanitizeFilename(name string) string {
safe := make([]byte, len(name))
for i := range name {
c := name[i]
if c == '/' || c == '\\' || c == ':' || c == '*' || c == '?' || c == '"' || c == '<' || c == '>' || c == '|' {
safe[i] = '_'
} else {
safe[i] = c
}
}
return string(safe)
}
63 changes: 63 additions & 0 deletions commands/git/contributors/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package contributors

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestWriteAndReadRepoCache(t *testing.T) {
dir := t.TempDir()
result := repoScanResult{
repo: "my-org/my-repo",
totalCommits: 42,
uniqueContributors: map[BasicContributor]Contributor{
{Email: "alice@example.com", Repo: "my-org/my-repo"}: {
Email: "alice@example.com",
Name: "Alice",
RepoLastCommit: RepoLastCommit{
Repo: "my-org/my-repo",
LastCommit: LastCommit{Date: "2024-01-01T00:00:00Z", Hash: "abc123"},
},
},
},
}

writeRepoCache(dir, "my-org/my-repo", result, 3)

got := readRepoCache(dir, "my-org/my-repo", 24*time.Hour)
require.NotNil(t, got)
assert.Equal(t, result.repo, got.repo)
assert.Equal(t, result.totalCommits, got.totalCommits)
// uniqueContributors round-trips through a []cacheContributorEntry slice, verify key presence.
for k, v := range result.uniqueContributors {
gotVal, ok := got.uniqueContributors[k]
assert.True(t, ok, "expected key %v in cached result", k)
assert.Equal(t, v, gotVal)
}
}

func TestReadRepoCache_Expired(t *testing.T) {
dir := t.TempDir()
result := repoScanResult{repo: "repo", totalCommits: 1}
writeRepoCache(dir, "repo", result, 1)

// maxAge of 1 nanosecond is guaranteed to be exceeded by the time we read.
got := readRepoCache(dir, "repo", 1*time.Nanosecond)
assert.Nil(t, got)
}

func TestReadRepoCache_ZeroMaxAge(t *testing.T) {
dir := t.TempDir()
// maxAge == 0 short-circuits before any file I/O.
got := readRepoCache(dir, "any-repo", 0)
assert.Nil(t, got)
}

func TestReadRepoCache_Missing(t *testing.T) {
dir := t.TempDir()
got := readRepoCache(dir, "nonexistent-repo", 24*time.Hour)
assert.Nil(t, got)
}
Loading
Loading