diff --git a/Makefile b/Makefile index f1a5350..a4c5f8b 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ install: # Ensures gotestsum is installed before running tests. # Depends on install. # ------------------------------------------------------------------------------ -test: install +test: @if ! command -v gotestsum >/dev/null 2>&1; then \ echo "Installing gotestsum..."; \ go install gotest.tools/gotestsum@latest; \ diff --git a/cmd/scrapeycli/main.go b/cmd/scrapeycli/main.go index 3cd3f9e..e456373 100644 --- a/cmd/scrapeycli/main.go +++ b/cmd/scrapeycli/main.go @@ -46,17 +46,22 @@ func init() { flag.BoolVar(&verbose, "v", false, "Enable verbose output (shorthand)") } +// Helper functions to create pointers for literal values. +func ptrString(s string) *string { return &s } +func ptrInt(i int) *int { return &i } +func ptrFloat64(f float64) *float64 { return &f } + /* main is the entry point of Scrapey CLI. It parses command-line flags, prints a welcome message, loads the configuration, -handles overrides, and prints confirmation messages for each step. +applies CLI overrides using a ConfigOverride object, and prints confirmation messages. */ func main() { // Parse CLI flags. flag.Parse() - // Store the verbose flag in global state + // Store the verbose flag in global state. config.Verbose = verbose // Print a welcome message in cyan using our PrintColored utility. @@ -75,26 +80,48 @@ func main() { os.Exit(1) } - // Construct a partial Config struct for CLI overrides. - cliOverrides := config.Config{} + // Construct a partial ConfigOverride struct for CLI overrides. + cliOverrides := config.ConfigOverride{} // Apply URL override if provided. if url != "" { - cliOverrides.URL.Base = url + cliOverrides.URL = &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + }{ + Base: ptrString(url), + } } // Apply maxDepth override if provided. if maxDepth > 0 { - cliOverrides.ScrapingOptions.MaxDepth = maxDepth + if cliOverrides.ScrapingOptions == nil { + cliOverrides.ScrapingOptions = &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{} + } + cliOverrides.ScrapingOptions.MaxDepth = ptrInt(maxDepth) } // Apply rateLimit override if provided. if rateLimit > 0 { - cliOverrides.ScrapingOptions.RateLimit = rateLimit + if cliOverrides.ScrapingOptions == nil { + cliOverrides.ScrapingOptions = &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{} + } + cliOverrides.ScrapingOptions.RateLimit = ptrFloat64(rateLimit) } // Apply all CLI overrides dynamically. - cfg.OverrideWithCLI(cliOverrides) + cfg.OverrideConfig(cliOverrides) // Print confirmation of loaded config. utils.PrintColored("Scrapey CLI initialization complete.", "", color.FgGreen) diff --git a/configs/default.json b/configs/default.json index 7b23a3d..f1298dc 100644 --- a/configs/default.json +++ b/configs/default.json @@ -1,4 +1,5 @@ { + "version": "1.0", "url": { "base": "https://example.com", "routes": ["/route1", "/route2", "*"], diff --git a/go.mod b/go.mod index d0924e3..100f748 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,10 @@ module github.com/heinrichb/scrapey-cli go 1.23.4 -require github.com/fatih/color v1.18.0 +require ( + bou.ke/monkey v1.0.2 + github.com/fatih/color v1.18.0 +) require ( github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index 33148a4..a1b828d 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +bou.ke/monkey v1.0.2 h1:kWcnsrCNUatbxncxR/ThdYqbytgOIArtYWqcQLQzKLI= +bou.ke/monkey v1.0.2/go.mod h1:OqickVX3tNx6t33n1xvtTtu85YN5s6cKwVug+oHMaIA= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= diff --git a/pkg/config/config.go b/pkg/config/config.go index c093d7a..6ee0820 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,12 +1,9 @@ -// File: pkg/config/config.go - package config import ( "encoding/json" "fmt" "os" - "reflect" "github.com/fatih/color" "github.com/heinrichb/scrapey-cli/pkg/utils" @@ -35,7 +32,8 @@ Usage: The configuration is loaded from a JSON file to guide the crawler and parser. */ type Config struct { - URL struct { + Version string `json:"version"` + URL struct { Base string `json:"base"` Routes []string `json:"routes"` IncludeBase bool `json:"includeBase"` @@ -64,6 +62,42 @@ type Config struct { } `json:"dataFormatting"` } +/* +ConfigOverride represents a partial configuration used for overriding values. +All fields are pointers, so that nil indicates "no override" while a non-nil value, +even if zero, is used to override the corresponding Config field. +*/ +type ConfigOverride struct { + Version *string `json:"version"` + URL *struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + } `json:"url"` + ParseRules *struct { + Title *string `json:"title,omitempty"` + MetaDescription *string `json:"metaDescription,omitempty"` + ArticleContent *string `json:"articleContent,omitempty"` + Author *string `json:"author,omitempty"` + DatePublished *string `json:"datePublished,omitempty"` + } `json:"parseRules"` + Storage *struct { + OutputFormats *[]string `json:"outputFormats"` + SavePath *string `json:"savePath"` + FileName *string `json:"fileName"` + } `json:"storage"` + ScrapingOptions *struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + } `json:"scrapingOptions"` + DataFormatting *struct { + CleanWhitespace *bool `json:"cleanWhitespace"` + RemoveHTML *bool `json:"removeHTML"` + } `json:"dataFormatting"` +} + /* ApplyDefaults populates missing fields in the Config struct with default values. @@ -153,71 +187,130 @@ func Load(filePath string) (*Config, error) { } /* -OverrideWithCLI dynamically overrides config values based on the provided `overrides` struct. +OverrideConfig applies values from the provided `overrides` object to the existing configuration. +Only fields with non-nil pointers in the overrides object are applied; all other fields remain unchanged. Parameters: - - overrides: A partial Config struct containing only the fields to override. + - overrides: A ConfigOverride struct containing only the fields to override. + A nil pointer indicates that no override should occur for that field. Usage: - cfg.OverrideWithCLI(Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` + cfg.OverrideConfig(ConfigOverride{ + URL: &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` }{ - Base: "https://example.org", + Base: ptrString("https://example.org"), }, - ScrapingOptions: struct { - MaxDepth int `json:"maxDepth"` - RateLimit float64 `json:"rateLimit"` - RetryAttempts int `json:"retryAttempts"` - UserAgent string `json:"userAgent"` + ScrapingOptions: &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` }{ - MaxDepth: 5, + MaxDepth: ptrInt(5), }, }) Notes: - - Only **non-zero** values in `overrides` are applied. - - Uses **reflection** to dynamically override values while maintaining type safety. - - Since every top‑level field in Config is a struct, only that branch is executed. + - Only fields with non-nil pointers in `overrides` are applied. + - This allows partial configuration overrides without unintentionally overwriting existing values. + - Both struct and non-struct fields are overridden if provided. */ -func (cfg *Config) OverrideWithCLI(overrides Config) { - cfgValue := reflect.ValueOf(cfg).Elem() - overridesValue := reflect.ValueOf(overrides) - - for i := 0; i < overridesValue.NumField(); i++ { - field := overridesValue.Type().Field(i) - overrideField := overridesValue.Field(i) - configField := cfgValue.FieldByName(field.Name) - - if !configField.IsValid() || !configField.CanSet() { - continue - } - - // Since all fields in Config are structs, we only need to handle that branch. - if overrideField.Kind() == reflect.Struct { - for j := 0; j < overrideField.NumField(); j++ { - subField := overrideField.Type().Field(j) - overrideSubField := overrideField.Field(j) - configSubField := configField.FieldByName(subField.Name) - - if !configSubField.IsValid() || !configSubField.CanSet() { - continue - } - - // Skip empty slices. - if overrideSubField.Kind() == reflect.Slice && overrideSubField.Len() == 0 { - continue - } - - if !overrideSubField.IsZero() { - utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), - fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) - configSubField.Set(overrideSubField) - } - } +func (cfg *Config) OverrideConfig(overrides ConfigOverride) { + // Override non-struct field: Version. + if overrides.Version != nil { + utils.PrintColored("Overriding Version: ", *overrides.Version, color.FgHiMagenta) + cfg.Version = *overrides.Version + } + + // Override URL fields. + if overrides.URL != nil { + if overrides.URL.Base != nil { + utils.PrintColored("Overriding URL.Base: ", *overrides.URL.Base, color.FgHiMagenta) + cfg.URL.Base = *overrides.URL.Base + } + if overrides.URL.Routes != nil { + utils.PrintColored("Overriding URL.Routes: ", fmt.Sprint(*overrides.URL.Routes), color.FgHiMagenta) + cfg.URL.Routes = *overrides.URL.Routes + } + if overrides.URL.IncludeBase != nil { + utils.PrintColored("Overriding URL.IncludeBase: ", fmt.Sprint(*overrides.URL.IncludeBase), color.FgHiMagenta) + cfg.URL.IncludeBase = *overrides.URL.IncludeBase + } + } + + // Override ParseRules fields. + if overrides.ParseRules != nil { + if overrides.ParseRules.Title != nil { + utils.PrintColored("Overriding ParseRules.Title: ", *overrides.ParseRules.Title, color.FgHiMagenta) + cfg.ParseRules.Title = *overrides.ParseRules.Title + } + if overrides.ParseRules.MetaDescription != nil { + utils.PrintColored("Overriding ParseRules.MetaDescription: ", *overrides.ParseRules.MetaDescription, color.FgHiMagenta) + cfg.ParseRules.MetaDescription = *overrides.ParseRules.MetaDescription + } + if overrides.ParseRules.ArticleContent != nil { + utils.PrintColored("Overriding ParseRules.ArticleContent: ", *overrides.ParseRules.ArticleContent, color.FgHiMagenta) + cfg.ParseRules.ArticleContent = *overrides.ParseRules.ArticleContent + } + if overrides.ParseRules.Author != nil { + utils.PrintColored("Overriding ParseRules.Author: ", *overrides.ParseRules.Author, color.FgHiMagenta) + cfg.ParseRules.Author = *overrides.ParseRules.Author + } + if overrides.ParseRules.DatePublished != nil { + utils.PrintColored("Overriding ParseRules.DatePublished: ", *overrides.ParseRules.DatePublished, color.FgHiMagenta) + cfg.ParseRules.DatePublished = *overrides.ParseRules.DatePublished + } + } + + // Override Storage fields. + if overrides.Storage != nil { + if overrides.Storage.OutputFormats != nil { + utils.PrintColored("Overriding Storage.OutputFormats: ", fmt.Sprint(*overrides.Storage.OutputFormats), color.FgHiMagenta) + cfg.Storage.OutputFormats = *overrides.Storage.OutputFormats + } + if overrides.Storage.SavePath != nil { + utils.PrintColored("Overriding Storage.SavePath: ", *overrides.Storage.SavePath, color.FgHiMagenta) + cfg.Storage.SavePath = *overrides.Storage.SavePath + } + if overrides.Storage.FileName != nil { + utils.PrintColored("Overriding Storage.FileName: ", *overrides.Storage.FileName, color.FgHiMagenta) + cfg.Storage.FileName = *overrides.Storage.FileName + } + } + + // Override ScrapingOptions fields. + if overrides.ScrapingOptions != nil { + if overrides.ScrapingOptions.MaxDepth != nil { + utils.PrintColored("Overriding ScrapingOptions.MaxDepth: ", fmt.Sprint(*overrides.ScrapingOptions.MaxDepth), color.FgHiMagenta) + cfg.ScrapingOptions.MaxDepth = *overrides.ScrapingOptions.MaxDepth + } + if overrides.ScrapingOptions.RateLimit != nil { + utils.PrintColored("Overriding ScrapingOptions.RateLimit: ", fmt.Sprint(*overrides.ScrapingOptions.RateLimit), color.FgHiMagenta) + cfg.ScrapingOptions.RateLimit = *overrides.ScrapingOptions.RateLimit + } + if overrides.ScrapingOptions.RetryAttempts != nil { + utils.PrintColored("Overriding ScrapingOptions.RetryAttempts: ", fmt.Sprint(*overrides.ScrapingOptions.RetryAttempts), color.FgHiMagenta) + cfg.ScrapingOptions.RetryAttempts = *overrides.ScrapingOptions.RetryAttempts + } + if overrides.ScrapingOptions.UserAgent != nil { + utils.PrintColored("Overriding ScrapingOptions.UserAgent: ", *overrides.ScrapingOptions.UserAgent, color.FgHiMagenta) + cfg.ScrapingOptions.UserAgent = *overrides.ScrapingOptions.UserAgent + } + } + + // Override DataFormatting fields. + if overrides.DataFormatting != nil { + if overrides.DataFormatting.CleanWhitespace != nil { + utils.PrintColored("Overriding DataFormatting.CleanWhitespace: ", fmt.Sprint(*overrides.DataFormatting.CleanWhitespace), color.FgHiMagenta) + cfg.DataFormatting.CleanWhitespace = *overrides.DataFormatting.CleanWhitespace + } + if overrides.DataFormatting.RemoveHTML != nil { + utils.PrintColored("Overriding DataFormatting.RemoveHTML: ", fmt.Sprint(*overrides.DataFormatting.RemoveHTML), color.FgHiMagenta) + cfg.DataFormatting.RemoveHTML = *overrides.DataFormatting.RemoveHTML } } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 1e8627e..35124d1 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1,150 +1,486 @@ +// File: pkg/config/config_test.go + package config import ( - "io" + "fmt" "os" "reflect" "strings" "testing" + + "bou.ke/monkey" + "github.com/heinrichb/scrapey-cli/pkg/utils" ) -func captureOutput(f func()) string { - oldStdout := os.Stdout - r, w, _ := os.Pipe() - os.Stdout = w +// Helper functions to easily create pointer values. +func ptrString(s string) *string { return &s } +func ptrInt(i int) *int { return &i } +func ptrFloat64(f float64) *float64 { return &f } +func ptrBool(b bool) *bool { return &b } - f() +// TestApplyDefaults tests the ApplyDefaults function to ensure that missing fields are set to default values. +func TestApplyDefaults(t *testing.T) { + cases := []struct { + desc string + setup func(cfg *Config) + validate func(t *testing.T, cfg *Config) + }{ + { + desc: "All fields missing should be set to defaults", + setup: func(cfg *Config) {}, + validate: func(t *testing.T, cfg *Config) { + if cfg.URL.Base != "https://example.com" { + t.Errorf("Expected URL.Base to be 'https://example.com', got '%s'", cfg.URL.Base) + } + if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { + t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) + } + if cfg.ScrapingOptions.MaxDepth != 2 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 2, got %d", cfg.ScrapingOptions.MaxDepth) + } + if cfg.ScrapingOptions.RateLimit != 1.5 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 1.5, got %f", cfg.ScrapingOptions.RateLimit) + } + if cfg.ScrapingOptions.RetryAttempts != 3 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 3, got %d", cfg.ScrapingOptions.RetryAttempts) + } + expectedUA := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + if cfg.ScrapingOptions.UserAgent != expectedUA { + t.Errorf("Expected ScrapingOptions.UserAgent to be '%s', got '%s'", expectedUA, cfg.ScrapingOptions.UserAgent) + } + if len(cfg.Storage.OutputFormats) != 1 || cfg.Storage.OutputFormats[0] != "json" { + t.Errorf("Expected Storage.OutputFormats to be ['json'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.SavePath != "output/" { + t.Errorf("Expected Storage.SavePath to be 'output/', got '%s'", cfg.Storage.SavePath) + } + if cfg.Storage.FileName != "scraped_data" { + t.Errorf("Expected Storage.FileName to be 'scraped_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + { + desc: "Pre-set fields remain unchanged and missing fields get defaults", + setup: func(cfg *Config) { + cfg.URL.Base = "https://preset.com" + cfg.Storage.SavePath = "custom_output/" + }, + validate: func(t *testing.T, cfg *Config) { + if cfg.URL.Base != "https://preset.com" { + t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) + } + if cfg.Storage.SavePath != "custom_output/" { + t.Errorf("Expected Storage.SavePath to be 'custom_output/', got '%s'", cfg.Storage.SavePath) + } + if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { + t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) + } + if cfg.ScrapingOptions.MaxDepth != 2 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 2, got %d", cfg.ScrapingOptions.MaxDepth) + } + if len(cfg.Storage.OutputFormats) != 1 || cfg.Storage.OutputFormats[0] != "json" { + t.Errorf("Expected Storage.OutputFormats to be ['json'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.FileName != "scraped_data" { + t.Errorf("Expected Storage.FileName to be 'scraped_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + { + desc: "No change if all fields are pre-set", + setup: func(cfg *Config) { + cfg.URL.Base = "https://preset.com" + cfg.URL.Routes = []string{"/preset"} + cfg.ScrapingOptions.MaxDepth = 10 + cfg.ScrapingOptions.RateLimit = 3.0 + cfg.ScrapingOptions.RetryAttempts = 5 + cfg.ScrapingOptions.UserAgent = "CustomAgent" + cfg.Storage.OutputFormats = []string{"xml"} + cfg.Storage.SavePath = "preset_output/" + cfg.Storage.FileName = "preset_data" + }, + validate: func(t *testing.T, cfg *Config) { + if cfg.URL.Base != "https://preset.com" { + t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) + } + if !reflect.DeepEqual(cfg.URL.Routes, []string{"/preset"}) { + t.Errorf("Expected URL.Routes to be ['/preset'], got %v", cfg.URL.Routes) + } + if cfg.ScrapingOptions.MaxDepth != 10 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 10, got %d", cfg.ScrapingOptions.MaxDepth) + } + if cfg.ScrapingOptions.RateLimit != 3.0 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 3.0, got %f", cfg.ScrapingOptions.RateLimit) + } + if cfg.ScrapingOptions.RetryAttempts != 5 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 5, got %d", cfg.ScrapingOptions.RetryAttempts) + } + if cfg.ScrapingOptions.UserAgent != "CustomAgent" { + t.Errorf("Expected ScrapingOptions.UserAgent to be 'CustomAgent', got '%s'", cfg.ScrapingOptions.UserAgent) + } + if !reflect.DeepEqual(cfg.Storage.OutputFormats, []string{"xml"}) { + t.Errorf("Expected Storage.OutputFormats to be ['xml'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.SavePath != "preset_output/" { + t.Errorf("Expected Storage.SavePath to be 'preset_output/', got '%s'", cfg.Storage.SavePath) + } + if cfg.Storage.FileName != "preset_data" { + t.Errorf("Expected Storage.FileName to be 'preset_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + } - w.Close() - var buf strings.Builder - io.Copy(&buf, r) - os.Stdout = oldStdout - return buf.String() + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + cfg := &Config{} + if tc.setup != nil { + tc.setup(cfg) + } + cfg.ApplyDefaults() + tc.validate(t, cfg) + }) + } } -func TestLoadConfig(t *testing.T) { +// TestLoad tests the Load function with various file conditions. +func TestLoad(t *testing.T) { + var capturedColored string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + capturedColored += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() + + var capturedNonEmpty string + patchNonEmpty := monkey.Patch(utils.PrintNonEmptyFields, func(prefix string, cfg interface{}) { + capturedNonEmpty += "nonEmptyFieldsCalled" + }) + defer patchNonEmpty.Unpatch() + cases := []struct { desc string - filename string - expectedErr bool - setup func(string) + fileSetup func(fileName string) + verbose bool + expectErr bool + checkOutput func(t *testing.T, colored, nonEmpty string) }{ { - "Missing config file", - "nonexistent.json", - true, - nil, + desc: "Missing config file", + fileSetup: nil, + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + if colored != "" { + t.Errorf("Expected no colored output for missing file, got: %s", colored) + } + }, }, { - "Unreadable config file", - "unreadable_config.json", - true, - func(name string) { os.Chmod(name, 0000); defer os.Chmod(name, 0644) }, + desc: "Unreadable config file", + fileSetup: func(name string) { + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + }, }, { - "Invalid JSON format", - "invalid_config.json", - true, - func(name string) { os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"`), 0644) }, + desc: "Invalid JSON format", + fileSetup: func(name string) { + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + }, }, { - "Valid JSON with verbose mode", - "valid_config.json", - false, - func(name string) { os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644) }, + desc: "Valid JSON without verbose mode", + fileSetup: func(name string) { + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: false, + expectErr: false, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + if nonEmpty != "" { + t.Errorf("Expected no non-empty output when verbose is false, got: %s", nonEmpty) + } + }, + }, + { + desc: "Valid JSON with verbose mode", + fileSetup: func(name string) { + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: true, + expectErr: false, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + if nonEmpty != "nonEmptyFieldsCalled" { + t.Errorf("Expected non-empty output when verbose is true, got: %s", nonEmpty) + } + }, }, } - for _, c := range cases { - t.Run(c.desc, func(t *testing.T) { - if c.setup != nil { - tmpFile, _ := os.CreateTemp("", c.filename) - defer os.Remove(tmpFile.Name()) - c.setup(tmpFile.Name()) - c.filename = tmpFile.Name() + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + capturedColored = "" + patchNonEmpty.Unpatch() + patchNonEmpty = monkey.Patch(utils.PrintNonEmptyFields, func(prefix string, cfg interface{}) { + capturedNonEmpty += "nonEmptyFieldsCalled" + }) + defer patchNonEmpty.Unpatch() + Verbose = tc.verbose + + var fileName string + if tc.fileSetup != nil { + tmpFile, err := os.CreateTemp("", "config_*.json") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + fileName = tmpFile.Name() + tmpFile.Close() + tc.fileSetup(fileName) + os.Chmod(fileName, 0644) + defer os.Remove(fileName) + } else { + fileName = "nonexistent_config.json" } - _, err := Load(c.filename) - if (err != nil) != c.expectedErr { - t.Fatalf("Unexpected error state: %v", err) + if tc.desc == "Unreadable config file" { + patchReadFile := monkey.Patch(os.ReadFile, func(name string) ([]byte, error) { + return nil, fmt.Errorf("simulated read error") + }) + defer patchReadFile.Unpatch() } + + cfg, err := Load(fileName) + if tc.expectErr { + if err == nil { + t.Errorf("Expected error but got nil") + } + return + } else { + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } + } + if cfg.URL.Base == "" { + t.Errorf("Expected URL.Base to be set, got empty") + } + tc.checkOutput(t, capturedColored, capturedNonEmpty) }) } } -func TestOverrideWithCLI(t *testing.T) { - cfg := &Config{} - cfg.ApplyDefaults() - +// TestOverrideConfig combines the previous TestOverrideConfigFull and TestOverrideConfigNil into a single test. +// It verifies that a full override updates all fields and that a nil override leaves the config unchanged. +func TestOverrideConfig(t *testing.T) { cases := []struct { - desc string - override Config - expectFunc func(*Config) bool - expectOutput string + desc string + overrideSetup func() ConfigOverride + validate func(t *testing.T, base *Config, captured string) }{ { - "Override URL.Base", - Config{URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{Base: "https://override.com"}}, - func(c *Config) bool { return c.URL.Base == "https://override.com" }, - "Overriding URL.Base: ", - }, - { - "Override non-empty slice", - Config{Storage: struct { - OutputFormats []string `json:"outputFormats"` - SavePath string `json:"savePath"` - FileName string `json:"fileName"` - }{OutputFormats: []string{"csv"}}}, - func(c *Config) bool { return reflect.DeepEqual(c.Storage.OutputFormats, []string{"csv"}) }, - "Overriding Storage.OutputFormats: ", - }, - { - "Override boolean", - Config{URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{IncludeBase: true}}, - func(c *Config) bool { return c.URL.IncludeBase }, - "Overriding URL.IncludeBase: ", + desc: "Full override applies all changes", + overrideSetup: func() ConfigOverride { + return ConfigOverride{ + Version: ptrString("v2.0"), + URL: &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + }{ + Base: ptrString("https://override.com"), + Routes: &[]string{"/new", "/extra"}, + IncludeBase: ptrBool(true), + }, + ParseRules: &struct { + Title *string `json:"title,omitempty"` + MetaDescription *string `json:"metaDescription,omitempty"` + ArticleContent *string `json:"articleContent,omitempty"` + Author *string `json:"author,omitempty"` + DatePublished *string `json:"datePublished,omitempty"` + }{ + Title: ptrString("New Title"), + MetaDescription: ptrString("New Meta"), + ArticleContent: ptrString("New Content"), + Author: ptrString("New Author"), + DatePublished: ptrString("2022-01-01"), + }, + Storage: &struct { + OutputFormats *[]string `json:"outputFormats"` + SavePath *string `json:"savePath"` + FileName *string `json:"fileName"` + }{ + OutputFormats: &[]string{"csv"}, + SavePath: ptrString("new_output/"), + FileName: ptrString("new_data"), + }, + ScrapingOptions: &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{ + MaxDepth: ptrInt(5), + RateLimit: ptrFloat64(2.0), + RetryAttempts: ptrInt(4), + UserAgent: ptrString("OverrideAgent"), + }, + DataFormatting: &struct { + CleanWhitespace *bool `json:"cleanWhitespace"` + RemoveHTML *bool `json:"removeHTML"` + }{ + CleanWhitespace: ptrBool(true), + RemoveHTML: ptrBool(true), + }, + } + }, + validate: func(t *testing.T, base *Config, captured string) { + if base.Version != "v2.0" { + t.Errorf("Expected Version to be 'v2.0', got '%s'", base.Version) + } + if base.URL.Base != "https://override.com" { + t.Errorf("Expected URL.Base to be 'https://override.com', got '%s'", base.URL.Base) + } + if !reflect.DeepEqual(base.URL.Routes, []string{"/new", "/extra"}) { + t.Errorf("Expected URL.Routes to be ['/new', '/extra'], got %v", base.URL.Routes) + } + if !base.URL.IncludeBase { + t.Errorf("Expected URL.IncludeBase to be true") + } + if base.ParseRules.Title != "New Title" { + t.Errorf("Expected ParseRules.Title to be 'New Title', got '%s'", base.ParseRules.Title) + } + if base.ParseRules.MetaDescription != "New Meta" { + t.Errorf("Expected ParseRules.MetaDescription to be 'New Meta', got '%s'", base.ParseRules.MetaDescription) + } + if base.ParseRules.ArticleContent != "New Content" { + t.Errorf("Expected ParseRules.ArticleContent to be 'New Content', got '%s'", base.ParseRules.ArticleContent) + } + if base.ParseRules.Author != "New Author" { + t.Errorf("Expected ParseRules.Author to be 'New Author', got '%s'", base.ParseRules.Author) + } + if base.ParseRules.DatePublished != "2022-01-01" { + t.Errorf("Expected ParseRules.DatePublished to be '2022-01-01', got '%s'", base.ParseRules.DatePublished) + } + if !reflect.DeepEqual(base.Storage.OutputFormats, []string{"csv"}) { + t.Errorf("Expected Storage.OutputFormats to be ['csv'], got %v", base.Storage.OutputFormats) + } + if base.Storage.SavePath != "new_output/" { + t.Errorf("Expected Storage.SavePath to be 'new_output/', got '%s'", base.Storage.SavePath) + } + if base.Storage.FileName != "new_data" { + t.Errorf("Expected Storage.FileName to be 'new_data', got '%s'", base.Storage.FileName) + } + if base.ScrapingOptions.MaxDepth != 5 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 5, got %d", base.ScrapingOptions.MaxDepth) + } + if base.ScrapingOptions.RateLimit != 2.0 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 2.0, got %f", base.ScrapingOptions.RateLimit) + } + if base.ScrapingOptions.RetryAttempts != 4 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 4, got %d", base.ScrapingOptions.RetryAttempts) + } + if base.ScrapingOptions.UserAgent != "OverrideAgent" { + t.Errorf("Expected ScrapingOptions.UserAgent to be 'OverrideAgent', got '%s'", base.ScrapingOptions.UserAgent) + } + if !base.DataFormatting.CleanWhitespace { + t.Errorf("Expected DataFormatting.CleanWhitespace to be true") + } + if !base.DataFormatting.RemoveHTML { + t.Errorf("Expected DataFormatting.RemoveHTML to be true") + } + + // Verify that PrintColored was called for each overridden field. + expectedSubstrs := []string{ + "Overriding Version: v2.0", + "Overriding URL.Base: https://override.com", + "Overriding URL.Routes: [", + "Overriding URL.IncludeBase: true", + "Overriding ParseRules.Title: New Title", + "Overriding ParseRules.MetaDescription: New Meta", + "Overriding ParseRules.ArticleContent: New Content", + "Overriding ParseRules.Author: New Author", + "Overriding ParseRules.DatePublished: 2022-01-01", + "Overriding Storage.OutputFormats: [", + "Overriding Storage.SavePath: new_output/", + "Overriding Storage.FileName: new_data", + "Overriding ScrapingOptions.MaxDepth: 5", + "Overriding ScrapingOptions.RateLimit: 2", + "Overriding ScrapingOptions.RetryAttempts: 4", + "Overriding ScrapingOptions.UserAgent: OverrideAgent", + "Overriding DataFormatting.CleanWhitespace: true", + "Overriding DataFormatting.RemoveHTML: true", + } + for _, substr := range expectedSubstrs { + if !strings.Contains(captured, substr) { + t.Errorf("Expected output to contain '%s', got '%s'", substr, captured) + } + } + }, }, { - "Override multiple values", - Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{ - Base: "https://multiple.com", - Routes: []string{"/new"}, - IncludeBase: true, - }, - ScrapingOptions: struct { - MaxDepth int `json:"maxDepth"` - RateLimit float64 `json:"rateLimit"` - RetryAttempts int `json:"retryAttempts"` - UserAgent string `json:"userAgent"` - }{MaxDepth: 5}, - }, - func(c *Config) bool { return c.URL.Base == "https://multiple.com" && c.ScrapingOptions.MaxDepth == 5 }, - "Overriding URL.Base: ", + desc: "Nil override leaves config unchanged", + overrideSetup: func() ConfigOverride { + return ConfigOverride{} + }, + validate: func(t *testing.T, base *Config, captured string) { + // Build a default config to compare. + defaultConfig := &Config{} + defaultConfig.ApplyDefaults() + if !reflect.DeepEqual(base, defaultConfig) { + t.Errorf("Expected config to remain unchanged when overrides are nil. Got %+v, expected %+v", base, defaultConfig) + } + // No PrintColored calls should be made. + if captured != "" { + t.Errorf("Expected no output from PrintColored when no overrides are applied, got '%s'", captured) + } + }, }, } - for _, c := range cases { - t.Run(c.desc, func(t *testing.T) { - output := captureOutput(func() { cfg.OverrideWithCLI(c.override) }) - if !c.expectFunc(cfg) { - t.Errorf("Expected override not applied") - } - if !strings.Contains(output, c.expectOutput) { - t.Errorf("Expected output to contain '%s', got '%s'", c.expectOutput, output) - } + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + var captured string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + captured += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() + + // Create a base config with defaults applied. + base := &Config{} + base.ApplyDefaults() + + // Apply the override from this test case. + override := tc.overrideSetup() + base.OverrideConfig(override) + + tc.validate(t, base, captured) }) } }