From 9d0a921a13f42581ff2908ad3e59c487c1b51d38 Mon Sep 17 00:00:00 2001 From: RoseSecurity Date: Wed, 25 Feb 2026 15:48:58 -0500 Subject: [PATCH] perf: parallelize terraform module analysis - Replace sequential directory walking with worker pool pattern - Merge FindTFDirs and FindFiles into single ScanRepository function - Use runtime.NumCPU() to limit concurrent goroutines - Add .terragrunt-cache and node_modules to skip list - Reduce filesystem traversals from two passes to one --- internal/analyze.go | 105 ++++++++++++++++++++++++++------------------ pkg/utils/dirs.go | 94 +++++++++++++++++++-------------------- 2 files changed, 110 insertions(+), 89 deletions(-) diff --git a/internal/analyze.go b/internal/analyze.go index 68f0af4..617de6d 100644 --- a/internal/analyze.go +++ b/internal/analyze.go @@ -1,6 +1,9 @@ package internal import ( + "runtime" + "sync" + "github.com/RoseSecurity/terrafetch/pkg/utils" log "github.com/charmbracelet/log" "github.com/hashicorp/terraform-config-inspect/tfconfig" @@ -20,66 +23,84 @@ type Analytics struct { } func AnalyzeRepository(rootDir string) ([]Analytics, error) { - dirs, err := utils.FindTFDirs(rootDir) + scan, err := utils.ScanRepository(rootDir) if err != nil { return nil, ErrFailedToFindDir } - if len(dirs) == 0 { + if len(scan.TFDirs) == 0 { return nil, ErrNoTerraformFiles } - var totalVars, totalResources, totalOutputs, totalDataSources, totalModules, totalProviders, totalSensitiveVars, totalSensitiveOutputs int + // Parallelize module analysis with a worker pool + var wg sync.WaitGroup + results := make(chan Analytics, len(scan.TFDirs)) + sem := make(chan struct{}, runtime.NumCPU()) + + for dir := range scan.TFDirs { + wg.Add(1) + go func(d string) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() - for dir := range dirs { - if !isTerraformDirectory(dir) { - continue - } + if !isTerraformDirectory(d) { + return + } - repo, diags := tfconfig.LoadModule(dir) - if diags.HasErrors() { - log.Warn("could not load %v", dir) - } + repo, diags := tfconfig.LoadModule(d) + if diags.HasErrors() { + log.Warn("could not load %v", d) + return + } - totalVars += len(repo.Variables) - totalOutputs += len(repo.Outputs) - totalResources += len(repo.ManagedResources) - totalDataSources += len(repo.DataResources) - totalModules += len(repo.ModuleCalls) - totalProviders += len(repo.RequiredProviders) + var a Analytics + a.VariableCount = len(repo.Variables) + a.OutputCount = len(repo.Outputs) + a.ResourceCount = len(repo.ManagedResources) + a.DataSourceCount = len(repo.DataResources) + a.ModuleCount = len(repo.ModuleCalls) + a.ProviderCount = len(repo.RequiredProviders) - for _, v := range repo.Variables { - if v.Sensitive { - totalSensitiveVars++ + for _, v := range repo.Variables { + if v.Sensitive { + a.SensitiveVariableCount++ + } } - } - for _, v := range repo.Outputs { - if v.Sensitive { - totalSensitiveOutputs++ + for _, v := range repo.Outputs { + if v.Sensitive { + a.SensitiveOutputCount++ + } } - } + + results <- a + }(dir) } - totalTfFiles, totalDocFiles, err := utils.FindFiles(rootDir) - if err != nil { - log.Error("could not count terraform files %v", err) + // Close results channel after all workers finish + go func() { + wg.Wait() + close(results) + }() + + // Aggregate results from all workers + var total Analytics + for a := range results { + total.VariableCount += a.VariableCount + total.SensitiveVariableCount += a.SensitiveVariableCount + total.ResourceCount += a.ResourceCount + total.OutputCount += a.OutputCount + total.SensitiveOutputCount += a.SensitiveOutputCount + total.DataSourceCount += a.DataSourceCount + total.ProviderCount += a.ProviderCount + total.ModuleCount += a.ModuleCount } - return []Analytics{ - { - VariableCount: totalVars, - SensitiveVariableCount: totalSensitiveVars, - ResourceCount: totalResources, - OutputCount: totalOutputs, - SensitiveOutputCount: totalSensitiveOutputs, - DataSourceCount: totalDataSources, - ProviderCount: totalProviders, - ModuleCount: totalModules, - FileCount: totalTfFiles, - DocCount: totalDocFiles, - }, - }, nil + total.FileCount = scan.TFCount + total.DocCount = scan.DocCount + + return []Analytics{total}, nil } // isTerraformDirectory returns if a directory contains Terraform code diff --git a/pkg/utils/dirs.go b/pkg/utils/dirs.go index 696da42..1232788 100644 --- a/pkg/utils/dirs.go +++ b/pkg/utils/dirs.go @@ -8,72 +8,72 @@ import ( log "github.com/charmbracelet/log" ) -// FindTFDirs returns an array of directories containing Terraform code -func FindTFDirs(dir string) (map[string]struct{}, error) { - tfDirs := make(map[string]struct{}) - - err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error { - if err != nil { - log.Error("error accessing path %q: %v", path, err) - return nil - } - - if d.IsDir() && d.Name() == ".terraform" { - return filepath.SkipDir - } - - if !d.IsDir() && filepath.Ext(d.Name()) == ".tf" { - parent := filepath.Dir(path) - tfDirs[parent] = struct{}{} - } - - return nil - }) - if err != nil { - log.Error("error walking the path %q: %v", dir, err) - return nil, err - } - - return tfDirs, nil +// ScanResult holds the results of a single filesystem walk. +type ScanResult struct { + TFDirs map[string]struct{} + TFCount int + DocCount int } -func FindFiles(root string) (int, int, error) { - var tfCount, docCount int +// skipDirs contains directories that should never be descended into. +var skipDirs = map[string]bool{ + ".terraform": true, + ".terragrunt-cache": true, + ".git": true, + "vendor": true, + "test": true, + "node_modules": true, +} - skipDirs := map[string]bool{ - ".terraform": true, - ".git": true, - "vendor": true, - "test": true, +// ScanRepository walks the directory tree once, collecting Terraform module +// directories, file counts, and documentation file counts in a single pass. +func ScanRepository(root string) (*ScanResult, error) { + result := &ScanResult{ + TFDirs: make(map[string]struct{}), } + sep := string(filepath.Separator) + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { if err != nil { - return err + log.Error("error accessing path %q: %v", path, err) + return nil } - // Skip ignored directories + // Skip irrelevant directories as early as possible if d.IsDir() && skipDirs[d.Name()] { - return fs.SkipDir + return filepath.SkipDir + } + + if d.IsDir() { + return nil } name := d.Name() + ext := filepath.Ext(name) - if !d.IsDir() && (strings.HasSuffix(name, ".tf") || strings.HasSuffix(name, ".tofu")) { - tfCount++ + // Count Terraform files and track their parent directories + if ext == ".tf" || ext == ".tofu" { + result.TFCount++ + parent := filepath.Dir(path) + result.TFDirs[parent] = struct{}{} } - // Match documentation files - if !d.IsDir() && - (strings.HasPrefix(strings.ToLower(name), "readme") || - strings.HasPrefix(strings.ToLower(name), "contributing") || - strings.Contains(path, string(filepath.Separator)+"docs"+string(filepath.Separator)) || - strings.Contains(path, string(filepath.Separator)+"examples"+string(filepath.Separator))) { - docCount++ + // Count documentation files + lower := strings.ToLower(name) + if strings.HasPrefix(lower, "readme") || + strings.HasPrefix(lower, "contributing") || + strings.Contains(path, sep+"docs"+sep) || + strings.Contains(path, sep+"examples"+sep) { + result.DocCount++ } return nil }) + if err != nil { + log.Error("error walking the path %q: %v", root, err) + return nil, err + } - return tfCount, docCount, err + return result, nil }