From 8048f37b6c17bf08885a93ed534412953890671a Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sun, 16 Feb 2025 15:03:19 -0600 Subject: [PATCH 1/5] Load covered --- Makefile | 2 +- go.mod | 5 +- go.sum | 2 + pkg/config/config_test.go | 376 +++++++++++++++++++++++++++++--------- 4 files changed, 301 insertions(+), 84 deletions(-) diff --git a/Makefile b/Makefile index f1a5350..a4c5f8b 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,7 @@ install: # Ensures gotestsum is installed before running tests. # Depends on install. # ------------------------------------------------------------------------------ -test: install +test: @if ! command -v gotestsum >/dev/null 2>&1; then \ echo "Installing gotestsum..."; \ go install gotest.tools/gotestsum@latest; \ diff --git a/go.mod b/go.mod index d0924e3..100f748 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,10 @@ module github.com/heinrichb/scrapey-cli go 1.23.4 -require github.com/fatih/color v1.18.0 +require ( + bou.ke/monkey v1.0.2 + github.com/fatih/color v1.18.0 +) require ( github.com/mattn/go-colorable v0.1.13 // indirect diff --git a/go.sum b/go.sum index 33148a4..a1b828d 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +bou.ke/monkey v1.0.2 h1:kWcnsrCNUatbxncxR/ThdYqbytgOIArtYWqcQLQzKLI= +bou.ke/monkey v1.0.2/go.mod h1:OqickVX3tNx6t33n1xvtTtu85YN5s6cKwVug+oHMaIA= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 1e8627e..ac35ed0 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1,120 +1,261 @@ +// File: pkg/config/config_test.go + package config import ( - "io" + "fmt" "os" "reflect" "strings" "testing" -) -func captureOutput(f func()) string { - oldStdout := os.Stdout - r, w, _ := os.Pipe() - os.Stdout = w + "bou.ke/monkey" + "github.com/heinrichb/scrapey-cli/pkg/utils" +) - f() +// TestLoad tests the Load function in a single function with multiple cases. +// We cover scenarios like missing file, unreadable file, invalid JSON, +// and valid JSON with verbose mode on/off. +func TestLoad(t *testing.T) { + // Patch utils.PrintColored and utils.PrintNonEmptyFields. + var capturedColored string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + capturedColored += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() - w.Close() - var buf strings.Builder - io.Copy(&buf, r) - os.Stdout = oldStdout - return buf.String() -} + var capturedNonEmpty string + patchNonEmpty := monkey.Patch(utils.PrintNonEmptyFields, func(prefix string, cfg interface{}) { + capturedNonEmpty += "nonEmptyFieldsCalled" + }) + defer patchNonEmpty.Unpatch() -func TestLoadConfig(t *testing.T) { + // Define table test cases for Load. cases := []struct { desc string - filename string - expectedErr bool - setup func(string) + fileSetup func(fileName string) // Setup the file (write contents, change permissions) + verbose bool // Set global Verbose before calling Load. + expectErr bool // Expect Load() to return an error. + checkOutput func(t *testing.T, colored, nonEmpty string) }{ { - "Missing config file", - "nonexistent.json", - true, - nil, + desc: "Missing config file", + fileSetup: nil, // Do not create the file so that it is missing. + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + // For a missing file, no printing should occur. + if colored != "" { + t.Errorf("Expected no colored output for missing file, got: %s", colored) + } + }, + }, + { + desc: "Unreadable config file", + fileSetup: func(name string) { + // Create a file with valid JSON. + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + // We'll patch os.ReadFile below to simulate a read error. + }, + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + // Expect that PrintColored is called. + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + }, }, { - "Unreadable config file", - "unreadable_config.json", - true, - func(name string) { os.Chmod(name, 0000); defer os.Chmod(name, 0644) }, + desc: "Invalid JSON format", + fileSetup: func(name string) { + // Write invalid JSON. + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: false, + expectErr: true, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + // Even with invalid JSON, colored output should be produced. + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + }, }, { - "Invalid JSON format", - "invalid_config.json", - true, - func(name string) { os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"`), 0644) }, + desc: "Valid JSON without verbose mode", + fileSetup: func(name string) { + // Write valid minimal JSON. + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: false, + expectErr: false, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + // When verbose is false, only colored output is expected. + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + if nonEmpty != "" { + t.Errorf("Expected no non-empty output when verbose is false, got: %s", nonEmpty) + } + }, }, { - "Valid JSON with verbose mode", - "valid_config.json", - false, - func(name string) { os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644) }, + desc: "Valid JSON with verbose mode", + fileSetup: func(name string) { + // Write valid minimal JSON. + if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { + t.Fatalf("Failed to write file: %v", err) + } + }, + verbose: true, + expectErr: false, + checkOutput: func(t *testing.T, colored, nonEmpty string) { + // With verbose mode on, both colored and non-empty outputs should be present. + if !strings.Contains(colored, "Loaded config from: ") { + t.Errorf("Expected colored output, got: %s", colored) + } + if nonEmpty != "nonEmptyFieldsCalled" { + t.Errorf("Expected non-empty output when verbose is true, got: %s", nonEmpty) + } + }, }, } - for _, c := range cases { - t.Run(c.desc, func(t *testing.T) { - if c.setup != nil { - tmpFile, _ := os.CreateTemp("", c.filename) - defer os.Remove(tmpFile.Name()) - c.setup(tmpFile.Name()) - c.filename = tmpFile.Name() + // Run test cases. + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + // Reset captured outputs. + capturedColored = "" + // Reset capturedNonEmpty inside the patch by re-patching. + patchNonEmpty.Unpatch() + patchNonEmpty = monkey.Patch(utils.PrintNonEmptyFields, func(prefix string, cfg interface{}) { + capturedNonEmpty += "nonEmptyFieldsCalled" + }) + defer patchNonEmpty.Unpatch() + + // Set the global Verbose flag as needed. + Verbose = tc.verbose + + // Prepare file. If no setup, use a name that does not exist. + var fileName string + if tc.fileSetup != nil { + tmpFile, err := os.CreateTemp("", "config_*.json") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + fileName = tmpFile.Name() + tmpFile.Close() // Close so that file can be manipulated. + tc.fileSetup(fileName) + // For cleanup and permission safety. + os.Chmod(fileName, 0644) + defer os.Remove(fileName) + } else { + fileName = "nonexistent_config.json" + } + + // For the unreadable file test, patch os.ReadFile to simulate a read error. + if tc.desc == "Unreadable config file" { + patchReadFile := monkey.Patch(os.ReadFile, func(name string) ([]byte, error) { + return nil, fmt.Errorf("simulated read error") + }) + defer patchReadFile.Unpatch() + } + + cfg, err := Load(fileName) + if tc.expectErr { + if err == nil { + t.Errorf("Expected error but got nil") + } + // Skip further checks if error was expected. + return + } else { + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } } - _, err := Load(c.filename) - if (err != nil) != c.expectedErr { - t.Fatalf("Unexpected error state: %v", err) + // Ensure ApplyDefaults populated required fields (e.g. URL.Base). + if cfg.URL.Base == "" { + t.Errorf("Expected URL.Base to be set, got empty") } + // Validate captured output. + tc.checkOutput(t, capturedColored, capturedNonEmpty) }) } } +// TestOverrideWithCLI tests the OverrideWithCLI function in a single function with multiple cases. +// We patch utils.PrintColored to capture its output in a global variable. +// In addition to our previous override cases, we add tests for empty-slice overrides +// and for when no override is applied. func TestOverrideWithCLI(t *testing.T) { - cfg := &Config{} - cfg.ApplyDefaults() + // Patch utils.PrintColored. + var captured string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + captured += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() + // Define table test cases for OverrideWithCLI. cases := []struct { desc string override Config - expectFunc func(*Config) bool - expectOutput string + preSetup func(*Config) // Optionally modify the initial config. + expectFunc func(*Config) bool // Checks that the override was applied. + expectOutput string // Expected substring in the printed output. }{ { - "Override URL.Base", - Config{URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{Base: "https://override.com"}}, - func(c *Config) bool { return c.URL.Base == "https://override.com" }, - "Overriding URL.Base: ", + desc: "Override URL.Base", + override: Config{ + URL: struct { + Base string `json:"base"` + Routes []string `json:"routes"` + IncludeBase bool `json:"includeBase"` + }{Base: "https://override.com"}, + }, + expectFunc: func(c *Config) bool { return c.URL.Base == "https://override.com" }, + expectOutput: "Overriding URL.Base: ", }, { - "Override non-empty slice", - Config{Storage: struct { - OutputFormats []string `json:"outputFormats"` - SavePath string `json:"savePath"` - FileName string `json:"fileName"` - }{OutputFormats: []string{"csv"}}}, - func(c *Config) bool { return reflect.DeepEqual(c.Storage.OutputFormats, []string{"csv"}) }, - "Overriding Storage.OutputFormats: ", + desc: "Override non-empty slice", + override: Config{ + Storage: struct { + OutputFormats []string `json:"outputFormats"` + SavePath string `json:"savePath"` + FileName string `json:"fileName"` + }{OutputFormats: []string{"csv"}}, + }, + preSetup: func(c *Config) { + // Pre-populate with a different slice. + c.Storage.OutputFormats = []string{"json"} + }, + expectFunc: func(c *Config) bool { + return reflect.DeepEqual(c.Storage.OutputFormats, []string{"csv"}) + }, + expectOutput: "Overriding Storage.OutputFormats: ", }, { - "Override boolean", - Config{URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{IncludeBase: true}}, - func(c *Config) bool { return c.URL.IncludeBase }, - "Overriding URL.IncludeBase: ", + desc: "Override boolean", + override: Config{ + URL: struct { + Base string `json:"base"` + Routes []string `json:"routes"` + IncludeBase bool `json:"includeBase"` + }{IncludeBase: true}, + }, + expectFunc: func(c *Config) bool { return c.URL.IncludeBase }, + expectOutput: "Overriding URL.IncludeBase: ", }, { - "Override multiple values", - Config{ + desc: "Override multiple values", + override: Config{ URL: struct { Base string `json:"base"` Routes []string `json:"routes"` @@ -131,19 +272,90 @@ func TestOverrideWithCLI(t *testing.T) { UserAgent string `json:"userAgent"` }{MaxDepth: 5}, }, - func(c *Config) bool { return c.URL.Base == "https://multiple.com" && c.ScrapingOptions.MaxDepth == 5 }, - "Overriding URL.Base: ", + expectFunc: func(c *Config) bool { + return c.URL.Base == "https://multiple.com" && c.ScrapingOptions.MaxDepth == 5 + }, + expectOutput: "Overriding URL.Base: ", + }, + { + desc: "Empty slice override does not trigger override", + override: Config{ + Storage: struct { + OutputFormats []string `json:"outputFormats"` + SavePath string `json:"savePath"` + FileName string `json:"fileName"` + }{OutputFormats: []string{}}, // Empty slice; should be skipped. + }, + preSetup: func(c *Config) { + // Set a non-empty default to confirm it is not overridden. + c.Storage.OutputFormats = []string{"json"} + }, + expectFunc: func(c *Config) bool { + // Expect no change. + return reflect.DeepEqual(c.Storage.OutputFormats, []string{"json"}) + }, + expectOutput: "", // No override message expected. + }, + { + desc: "No override applied when all fields are zero", + override: Config{ + URL: struct { + Base string `json:"base"` + Routes []string `json:"routes"` + IncludeBase bool `json:"includeBase"` + }{}, // all zero values + Storage: struct { + OutputFormats []string `json:"outputFormats"` + SavePath string `json:"savePath"` + FileName string `json:"fileName"` + }{}, + ParseRules: struct { + Title string `json:"title,omitempty"` + MetaDescription string `json:"metaDescription,omitempty"` + ArticleContent string `json:"articleContent,omitempty"` + Author string `json:"author,omitempty"` + DatePublished string `json:"datePublished,omitempty"` + }{}, + ScrapingOptions: struct { + MaxDepth int `json:"maxDepth"` + RateLimit float64 `json:"rateLimit"` + RetryAttempts int `json:"retryAttempts"` + UserAgent string `json:"userAgent"` + }{}, + DataFormatting: struct { + CleanWhitespace bool `json:"cleanWhitespace"` + RemoveHTML bool `json:"removeHTML"` + }{}, + }, + expectFunc: func(c *Config) bool { + // Expect no changes: the defaults remain. + return c.URL.Base != "" && len(c.Storage.OutputFormats) > 0 + }, + expectOutput: "", // No output expected. }, } - for _, c := range cases { - t.Run(c.desc, func(t *testing.T) { - output := captureOutput(func() { cfg.OverrideWithCLI(c.override) }) - if !c.expectFunc(cfg) { - t.Errorf("Expected override not applied") + // Run test cases. + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + captured = "" // Reset captured output. + // Create a fresh config with defaults applied. + testCfg := &Config{} + testCfg.ApplyDefaults() + // If any pre-setup is needed, run it. + if tc.preSetup != nil { + tc.preSetup(testCfg) + } + + // Directly call OverrideWithCLI. + testCfg.OverrideWithCLI(tc.override) + // Verify that the override was applied (or not applied) as expected. + if !tc.expectFunc(testCfg) { + t.Errorf("Expected override condition not met. Got %+v", testCfg) } - if !strings.Contains(output, c.expectOutput) { - t.Errorf("Expected output to contain '%s', got '%s'", c.expectOutput, output) + // Verify that the patched PrintColored was called with the expected substring. + if !strings.Contains(captured, tc.expectOutput) { + t.Errorf("Expected output to contain '%s', got '%s'", tc.expectOutput, captured) } }) } From c1c368f88eb288ea063761da648abee4526ecb8b Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sun, 16 Feb 2025 16:19:54 -0600 Subject: [PATCH 2/5] Refactoring override config --- cmd/scrapeycli/main.go | 2 +- configs/default.json | 1 + pkg/config/config.go | 26 ++--- pkg/config/config_test.go | 218 +++++++++++++++++++++++++++++--------- 4 files changed, 183 insertions(+), 64 deletions(-) diff --git a/cmd/scrapeycli/main.go b/cmd/scrapeycli/main.go index 3cd3f9e..e84bbb6 100644 --- a/cmd/scrapeycli/main.go +++ b/cmd/scrapeycli/main.go @@ -94,7 +94,7 @@ func main() { } // Apply all CLI overrides dynamically. - cfg.OverrideWithCLI(cliOverrides) + cfg.OverrideConfig(cliOverrides) // Print confirmation of loaded config. utils.PrintColored("Scrapey CLI initialization complete.", "", color.FgGreen) diff --git a/configs/default.json b/configs/default.json index 7b23a3d..f1298dc 100644 --- a/configs/default.json +++ b/configs/default.json @@ -1,4 +1,5 @@ { + "version": "1.0", "url": { "base": "https://example.com", "routes": ["/route1", "/route2", "*"], diff --git a/pkg/config/config.go b/pkg/config/config.go index c093d7a..ec32b61 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -35,7 +35,8 @@ Usage: The configuration is loaded from a JSON file to guide the crawler and parser. */ type Config struct { - URL struct { + Version string `json:"version"` + URL struct { Base string `json:"base"` Routes []string `json:"routes"` IncludeBase bool `json:"includeBase"` @@ -183,7 +184,7 @@ Notes: - Uses **reflection** to dynamically override values while maintaining type safety. - Since every top‑level field in Config is a struct, only that branch is executed. */ -func (cfg *Config) OverrideWithCLI(overrides Config) { +func (cfg *Config) OverrideConfig(overrides Config) { cfgValue := reflect.ValueOf(cfg).Elem() overridesValue := reflect.ValueOf(overrides) @@ -196,7 +197,7 @@ func (cfg *Config) OverrideWithCLI(overrides Config) { continue } - // Since all fields in Config are structs, we only need to handle that branch. + // If the override field is a struct, iterate over its subfields and apply every value (even if zero or empty). if overrideField.Kind() == reflect.Struct { for j := 0; j < overrideField.NumField(); j++ { subField := overrideField.Type().Field(j) @@ -207,17 +208,16 @@ func (cfg *Config) OverrideWithCLI(overrides Config) { continue } - // Skip empty slices. - if overrideSubField.Kind() == reflect.Slice && overrideSubField.Len() == 0 { - continue - } - - if !overrideSubField.IsZero() { - utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), - fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) - configSubField.Set(overrideSubField) - } + // Always override the subfield value, regardless of its value. + utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), + fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) + configSubField.Set(overrideSubField) } + } else { + // For non-struct fields, override unconditionally. + utils.PrintColored(fmt.Sprintf("Overriding %s: ", field.Name), + fmt.Sprint(overrideField.Interface()), color.FgHiMagenta) + configField.Set(overrideField) } } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index ac35ed0..1be7404 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -13,6 +13,145 @@ import ( "github.com/heinrichb/scrapey-cli/pkg/utils" ) +// TestApplyDefaults tests the ApplyDefaults function to ensure that missing fields are set to default values. +// This test function uses multiple cases to verify that defaults are correctly applied. +func TestApplyDefaults(t *testing.T) { + cases := []struct { + desc string + setup func(cfg *Config) // Optionally pre-set some fields. + validate func(t *testing.T, cfg *Config) + }{ + { + desc: "All fields missing should be set to defaults", + setup: func(cfg *Config) { + // Leave all fields at their zero values. + }, + validate: func(t *testing.T, cfg *Config) { + // Check URL defaults. + if cfg.URL.Base != "https://example.com" { + t.Errorf("Expected URL.Base to be 'https://example.com', got '%s'", cfg.URL.Base) + } + if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { + t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) + } + // Check ScrapingOptions defaults. + if cfg.ScrapingOptions.MaxDepth != 2 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 2, got %d", cfg.ScrapingOptions.MaxDepth) + } + if cfg.ScrapingOptions.RateLimit != 1.5 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 1.5, got %f", cfg.ScrapingOptions.RateLimit) + } + if cfg.ScrapingOptions.RetryAttempts != 3 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 3, got %d", cfg.ScrapingOptions.RetryAttempts) + } + expectedUA := "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + if cfg.ScrapingOptions.UserAgent != expectedUA { + t.Errorf("Expected ScrapingOptions.UserAgent to be '%s', got '%s'", expectedUA, cfg.ScrapingOptions.UserAgent) + } + // Check Storage defaults. + if len(cfg.Storage.OutputFormats) != 1 || cfg.Storage.OutputFormats[0] != "json" { + t.Errorf("Expected Storage.OutputFormats to be ['json'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.SavePath != "output/" { + t.Errorf("Expected Storage.SavePath to be 'output/', got '%s'", cfg.Storage.SavePath) + } + if cfg.Storage.FileName != "scraped_data" { + t.Errorf("Expected Storage.FileName to be 'scraped_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + { + desc: "Pre-set fields remain unchanged and missing fields get defaults", + setup: func(cfg *Config) { + // Pre-set some fields. + cfg.URL.Base = "https://preset.com" + cfg.Storage.SavePath = "custom_output/" + }, + validate: func(t *testing.T, cfg *Config) { + // Pre-set values should be retained. + if cfg.URL.Base != "https://preset.com" { + t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) + } + if cfg.Storage.SavePath != "custom_output/" { + t.Errorf("Expected Storage.SavePath to be 'custom_output/', got '%s'", cfg.Storage.SavePath) + } + // Other fields should be set to defaults. + if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { + t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) + } + if cfg.ScrapingOptions.MaxDepth != 2 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 2, got %d", cfg.ScrapingOptions.MaxDepth) + } + if len(cfg.Storage.OutputFormats) != 1 || cfg.Storage.OutputFormats[0] != "json" { + t.Errorf("Expected Storage.OutputFormats to be ['json'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.FileName != "scraped_data" { + t.Errorf("Expected Storage.FileName to be 'scraped_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + { + desc: "No change if all fields are pre-set", + setup: func(cfg *Config) { + // Set all fields explicitly. + cfg.URL.Base = "https://preset.com" + cfg.URL.Routes = []string{"/preset"} + cfg.ScrapingOptions.MaxDepth = 10 + cfg.ScrapingOptions.RateLimit = 3.0 + cfg.ScrapingOptions.RetryAttempts = 5 + cfg.ScrapingOptions.UserAgent = "CustomAgent" + cfg.Storage.OutputFormats = []string{"xml"} + cfg.Storage.SavePath = "preset_output/" + cfg.Storage.FileName = "preset_data" + }, + validate: func(t *testing.T, cfg *Config) { + // Expect all pre-set fields to remain unchanged. + if cfg.URL.Base != "https://preset.com" { + t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) + } + if !reflect.DeepEqual(cfg.URL.Routes, []string{"/preset"}) { + t.Errorf("Expected URL.Routes to be ['/preset'], got %v", cfg.URL.Routes) + } + if cfg.ScrapingOptions.MaxDepth != 10 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 10, got %d", cfg.ScrapingOptions.MaxDepth) + } + if cfg.ScrapingOptions.RateLimit != 3.0 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 3.0, got %f", cfg.ScrapingOptions.RateLimit) + } + if cfg.ScrapingOptions.RetryAttempts != 5 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 5, got %d", cfg.ScrapingOptions.RetryAttempts) + } + if cfg.ScrapingOptions.UserAgent != "CustomAgent" { + t.Errorf("Expected ScrapingOptions.UserAgent to be 'CustomAgent', got '%s'", cfg.ScrapingOptions.UserAgent) + } + if !reflect.DeepEqual(cfg.Storage.OutputFormats, []string{"xml"}) { + t.Errorf("Expected Storage.OutputFormats to be ['xml'], got %v", cfg.Storage.OutputFormats) + } + if cfg.Storage.SavePath != "preset_output/" { + t.Errorf("Expected Storage.SavePath to be 'preset_output/', got '%s'", cfg.Storage.SavePath) + } + if cfg.Storage.FileName != "preset_data" { + t.Errorf("Expected Storage.FileName to be 'preset_data', got '%s'", cfg.Storage.FileName) + } + }, + }, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + cfg := &Config{} + // Allow test-specific pre-setup. + if tc.setup != nil { + tc.setup(cfg) + } + // Call ApplyDefaults. + cfg.ApplyDefaults() + // Validate that defaults have been applied as expected. + tc.validate(t, cfg) + }) + } +} + // TestLoad tests the Load function in a single function with multiple cases. // We cover scenarios like missing file, unreadable file, invalid JSON, // and valid JSON with verbose mode on/off. @@ -191,19 +330,19 @@ func TestLoad(t *testing.T) { } } -// TestOverrideWithCLI tests the OverrideWithCLI function in a single function with multiple cases. +// TestOverrideConfig tests the OverrideConfig function in a single function with multiple cases. // We patch utils.PrintColored to capture its output in a global variable. -// In addition to our previous override cases, we add tests for empty-slice overrides -// and for when no override is applied. -func TestOverrideWithCLI(t *testing.T) { - // Patch utils.PrintColored. +// In addition to our previous override cases, we add tests for empty-slice overrides, +// for when no override is applied, and for non-struct fields (like the new Version field). +func TestOverrideConfig(t *testing.T) { + // Patch utils.PrintColored to capture printed messages. var captured string patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { captured += fmt.Sprint(a...) }) defer patchColored.Unpatch() - // Define table test cases for OverrideWithCLI. + // Define table test cases for OverrideConfig. cases := []struct { desc string override Config @@ -220,7 +359,10 @@ func TestOverrideWithCLI(t *testing.T) { IncludeBase bool `json:"includeBase"` }{Base: "https://override.com"}, }, - expectFunc: func(c *Config) bool { return c.URL.Base == "https://override.com" }, + preSetup: nil, + expectFunc: func(c *Config) bool { + return c.URL.Base == "https://override.com" + }, expectOutput: "Overriding URL.Base: ", }, { @@ -233,7 +375,6 @@ func TestOverrideWithCLI(t *testing.T) { }{OutputFormats: []string{"csv"}}, }, preSetup: func(c *Config) { - // Pre-populate with a different slice. c.Storage.OutputFormats = []string{"json"} }, expectFunc: func(c *Config) bool { @@ -250,6 +391,7 @@ func TestOverrideWithCLI(t *testing.T) { IncludeBase bool `json:"includeBase"` }{IncludeBase: true}, }, + preSetup: nil, expectFunc: func(c *Config) bool { return c.URL.IncludeBase }, expectOutput: "Overriding URL.IncludeBase: ", }, @@ -272,66 +414,43 @@ func TestOverrideWithCLI(t *testing.T) { UserAgent string `json:"userAgent"` }{MaxDepth: 5}, }, + preSetup: nil, expectFunc: func(c *Config) bool { return c.URL.Base == "https://multiple.com" && c.ScrapingOptions.MaxDepth == 5 }, expectOutput: "Overriding URL.Base: ", }, { - desc: "Empty slice override does not trigger override", + desc: "Override empty slice (applies override)", override: Config{ Storage: struct { OutputFormats []string `json:"outputFormats"` SavePath string `json:"savePath"` FileName string `json:"fileName"` - }{OutputFormats: []string{}}, // Empty slice; should be skipped. + }{OutputFormats: []string{}}, // Even empty slice should override. }, preSetup: func(c *Config) { - // Set a non-empty default to confirm it is not overridden. c.Storage.OutputFormats = []string{"json"} }, expectFunc: func(c *Config) bool { - // Expect no change. - return reflect.DeepEqual(c.Storage.OutputFormats, []string{"json"}) + // Expect the override to apply, resulting in an empty slice. + return reflect.DeepEqual(c.Storage.OutputFormats, []string{}) }, - expectOutput: "", // No override message expected. + expectOutput: "Overriding Storage.OutputFormats: ", }, { - desc: "No override applied when all fields are zero", + desc: "Override non-struct field (Version)", override: Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{}, // all zero values - Storage: struct { - OutputFormats []string `json:"outputFormats"` - SavePath string `json:"savePath"` - FileName string `json:"fileName"` - }{}, - ParseRules: struct { - Title string `json:"title,omitempty"` - MetaDescription string `json:"metaDescription,omitempty"` - ArticleContent string `json:"articleContent,omitempty"` - Author string `json:"author,omitempty"` - DatePublished string `json:"datePublished,omitempty"` - }{}, - ScrapingOptions: struct { - MaxDepth int `json:"maxDepth"` - RateLimit float64 `json:"rateLimit"` - RetryAttempts int `json:"retryAttempts"` - UserAgent string `json:"userAgent"` - }{}, - DataFormatting: struct { - CleanWhitespace bool `json:"cleanWhitespace"` - RemoveHTML bool `json:"removeHTML"` - }{}, + Version: "v2.0", + }, + preSetup: func(c *Config) { + c.Version = "v1.0" }, expectFunc: func(c *Config) bool { - // Expect no changes: the defaults remain. - return c.URL.Base != "" && len(c.Storage.OutputFormats) > 0 + // Expect the version to be overridden to "v2.0". + return c.Version == "v2.0" }, - expectOutput: "", // No output expected. + expectOutput: "Overriding Version: ", }, } @@ -342,18 +461,17 @@ func TestOverrideWithCLI(t *testing.T) { // Create a fresh config with defaults applied. testCfg := &Config{} testCfg.ApplyDefaults() - // If any pre-setup is needed, run it. if tc.preSetup != nil { tc.preSetup(testCfg) } - // Directly call OverrideWithCLI. - testCfg.OverrideWithCLI(tc.override) - // Verify that the override was applied (or not applied) as expected. + // Apply the override. + testCfg.OverrideConfig(tc.override) + // Verify that the override was applied. if !tc.expectFunc(testCfg) { t.Errorf("Expected override condition not met. Got %+v", testCfg) } - // Verify that the patched PrintColored was called with the expected substring. + // Verify that PrintColored was called with the expected message. if !strings.Contains(captured, tc.expectOutput) { t.Errorf("Expected output to contain '%s', got '%s'", tc.expectOutput, captured) } From a5388daa901fc4966790c613dcd86fb918c8b901 Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sun, 16 Feb 2025 16:28:18 -0600 Subject: [PATCH 3/5] 100% coverage, bug with empty values --- pkg/config/config.go | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index ec32b61..fe4a583 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -154,14 +154,16 @@ func Load(filePath string) (*Config, error) { } /* -OverrideWithCLI dynamically overrides config values based on the provided `overrides` struct. +OverrideConfig applies all values from the provided `overrides` struct to the existing configuration. Parameters: - - overrides: A partial Config struct containing only the fields to override. + - overrides: A partial Config struct containing the fields to override. All values provided—including zero values, + empty slices, and non-struct fields (e.g., Version)—are applied exactly as given. Usage: - cfg.OverrideWithCLI(Config{ + cfg.OverrideConfig(Config{ + Version: "v2.0", URL: struct { Base string `json:"base"` Routes []string `json:"routes"` @@ -180,35 +182,27 @@ Usage: }) Notes: - - Only **non-zero** values in `overrides` are applied. + - All values provided in `overrides` are applied, regardless of whether they are non-zero. - Uses **reflection** to dynamically override values while maintaining type safety. - - Since every top‑level field in Config is a struct, only that branch is executed. + - Both struct and non-struct fields are overridden. */ func (cfg *Config) OverrideConfig(overrides Config) { cfgValue := reflect.ValueOf(cfg).Elem() overridesValue := reflect.ValueOf(overrides) + // Since every top-level field in Config is exported and settable, + // we do not check for validity or settable status. for i := 0; i < overridesValue.NumField(); i++ { field := overridesValue.Type().Field(i) overrideField := overridesValue.Field(i) configField := cfgValue.FieldByName(field.Name) - if !configField.IsValid() || !configField.CanSet() { - continue - } - - // If the override field is a struct, iterate over its subfields and apply every value (even if zero or empty). if overrideField.Kind() == reflect.Struct { + // For struct fields, override every subfield. for j := 0; j < overrideField.NumField(); j++ { subField := overrideField.Type().Field(j) overrideSubField := overrideField.Field(j) configSubField := configField.FieldByName(subField.Name) - - if !configSubField.IsValid() || !configSubField.CanSet() { - continue - } - - // Always override the subfield value, regardless of its value. utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) configSubField.Set(overrideSubField) From 364c20fb86907914bb50dcface40af3f97cb5b09 Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sun, 16 Feb 2025 16:46:46 -0600 Subject: [PATCH 4/5] 100% coverage, no bugs --- cmd/scrapeycli/main.go | 41 ++++- pkg/config/config.go | 195 +++++++++++++++----- pkg/config/config_test.go | 365 +++++++++++++++++++------------------- 3 files changed, 365 insertions(+), 236 deletions(-) diff --git a/cmd/scrapeycli/main.go b/cmd/scrapeycli/main.go index e84bbb6..e456373 100644 --- a/cmd/scrapeycli/main.go +++ b/cmd/scrapeycli/main.go @@ -46,17 +46,22 @@ func init() { flag.BoolVar(&verbose, "v", false, "Enable verbose output (shorthand)") } +// Helper functions to create pointers for literal values. +func ptrString(s string) *string { return &s } +func ptrInt(i int) *int { return &i } +func ptrFloat64(f float64) *float64 { return &f } + /* main is the entry point of Scrapey CLI. It parses command-line flags, prints a welcome message, loads the configuration, -handles overrides, and prints confirmation messages for each step. +applies CLI overrides using a ConfigOverride object, and prints confirmation messages. */ func main() { // Parse CLI flags. flag.Parse() - // Store the verbose flag in global state + // Store the verbose flag in global state. config.Verbose = verbose // Print a welcome message in cyan using our PrintColored utility. @@ -75,22 +80,44 @@ func main() { os.Exit(1) } - // Construct a partial Config struct for CLI overrides. - cliOverrides := config.Config{} + // Construct a partial ConfigOverride struct for CLI overrides. + cliOverrides := config.ConfigOverride{} // Apply URL override if provided. if url != "" { - cliOverrides.URL.Base = url + cliOverrides.URL = &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + }{ + Base: ptrString(url), + } } // Apply maxDepth override if provided. if maxDepth > 0 { - cliOverrides.ScrapingOptions.MaxDepth = maxDepth + if cliOverrides.ScrapingOptions == nil { + cliOverrides.ScrapingOptions = &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{} + } + cliOverrides.ScrapingOptions.MaxDepth = ptrInt(maxDepth) } // Apply rateLimit override if provided. if rateLimit > 0 { - cliOverrides.ScrapingOptions.RateLimit = rateLimit + if cliOverrides.ScrapingOptions == nil { + cliOverrides.ScrapingOptions = &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{} + } + cliOverrides.ScrapingOptions.RateLimit = ptrFloat64(rateLimit) } // Apply all CLI overrides dynamically. diff --git a/pkg/config/config.go b/pkg/config/config.go index fe4a583..6ee0820 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,12 +1,9 @@ -// File: pkg/config/config.go - package config import ( "encoding/json" "fmt" "os" - "reflect" "github.com/fatih/color" "github.com/heinrichb/scrapey-cli/pkg/utils" @@ -65,6 +62,42 @@ type Config struct { } `json:"dataFormatting"` } +/* +ConfigOverride represents a partial configuration used for overriding values. +All fields are pointers, so that nil indicates "no override" while a non-nil value, +even if zero, is used to override the corresponding Config field. +*/ +type ConfigOverride struct { + Version *string `json:"version"` + URL *struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + } `json:"url"` + ParseRules *struct { + Title *string `json:"title,omitempty"` + MetaDescription *string `json:"metaDescription,omitempty"` + ArticleContent *string `json:"articleContent,omitempty"` + Author *string `json:"author,omitempty"` + DatePublished *string `json:"datePublished,omitempty"` + } `json:"parseRules"` + Storage *struct { + OutputFormats *[]string `json:"outputFormats"` + SavePath *string `json:"savePath"` + FileName *string `json:"fileName"` + } `json:"storage"` + ScrapingOptions *struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + } `json:"scrapingOptions"` + DataFormatting *struct { + CleanWhitespace *bool `json:"cleanWhitespace"` + RemoveHTML *bool `json:"removeHTML"` + } `json:"dataFormatting"` +} + /* ApplyDefaults populates missing fields in the Config struct with default values. @@ -154,64 +187,130 @@ func Load(filePath string) (*Config, error) { } /* -OverrideConfig applies all values from the provided `overrides` struct to the existing configuration. +OverrideConfig applies values from the provided `overrides` object to the existing configuration. +Only fields with non-nil pointers in the overrides object are applied; all other fields remain unchanged. Parameters: - - overrides: A partial Config struct containing the fields to override. All values provided—including zero values, - empty slices, and non-struct fields (e.g., Version)—are applied exactly as given. + - overrides: A ConfigOverride struct containing only the fields to override. + A nil pointer indicates that no override should occur for that field. Usage: - cfg.OverrideConfig(Config{ - Version: "v2.0", - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` + cfg.OverrideConfig(ConfigOverride{ + URL: &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` }{ - Base: "https://example.org", + Base: ptrString("https://example.org"), }, - ScrapingOptions: struct { - MaxDepth int `json:"maxDepth"` - RateLimit float64 `json:"rateLimit"` - RetryAttempts int `json:"retryAttempts"` - UserAgent string `json:"userAgent"` + ScrapingOptions: &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` }{ - MaxDepth: 5, + MaxDepth: ptrInt(5), }, }) Notes: - - All values provided in `overrides` are applied, regardless of whether they are non-zero. - - Uses **reflection** to dynamically override values while maintaining type safety. - - Both struct and non-struct fields are overridden. + - Only fields with non-nil pointers in `overrides` are applied. + - This allows partial configuration overrides without unintentionally overwriting existing values. + - Both struct and non-struct fields are overridden if provided. */ -func (cfg *Config) OverrideConfig(overrides Config) { - cfgValue := reflect.ValueOf(cfg).Elem() - overridesValue := reflect.ValueOf(overrides) - - // Since every top-level field in Config is exported and settable, - // we do not check for validity or settable status. - for i := 0; i < overridesValue.NumField(); i++ { - field := overridesValue.Type().Field(i) - overrideField := overridesValue.Field(i) - configField := cfgValue.FieldByName(field.Name) - - if overrideField.Kind() == reflect.Struct { - // For struct fields, override every subfield. - for j := 0; j < overrideField.NumField(); j++ { - subField := overrideField.Type().Field(j) - overrideSubField := overrideField.Field(j) - configSubField := configField.FieldByName(subField.Name) - utils.PrintColored(fmt.Sprintf("Overriding %s.%s: ", field.Name, subField.Name), - fmt.Sprint(overrideSubField.Interface()), color.FgHiMagenta) - configSubField.Set(overrideSubField) - } - } else { - // For non-struct fields, override unconditionally. - utils.PrintColored(fmt.Sprintf("Overriding %s: ", field.Name), - fmt.Sprint(overrideField.Interface()), color.FgHiMagenta) - configField.Set(overrideField) +func (cfg *Config) OverrideConfig(overrides ConfigOverride) { + // Override non-struct field: Version. + if overrides.Version != nil { + utils.PrintColored("Overriding Version: ", *overrides.Version, color.FgHiMagenta) + cfg.Version = *overrides.Version + } + + // Override URL fields. + if overrides.URL != nil { + if overrides.URL.Base != nil { + utils.PrintColored("Overriding URL.Base: ", *overrides.URL.Base, color.FgHiMagenta) + cfg.URL.Base = *overrides.URL.Base + } + if overrides.URL.Routes != nil { + utils.PrintColored("Overriding URL.Routes: ", fmt.Sprint(*overrides.URL.Routes), color.FgHiMagenta) + cfg.URL.Routes = *overrides.URL.Routes + } + if overrides.URL.IncludeBase != nil { + utils.PrintColored("Overriding URL.IncludeBase: ", fmt.Sprint(*overrides.URL.IncludeBase), color.FgHiMagenta) + cfg.URL.IncludeBase = *overrides.URL.IncludeBase + } + } + + // Override ParseRules fields. + if overrides.ParseRules != nil { + if overrides.ParseRules.Title != nil { + utils.PrintColored("Overriding ParseRules.Title: ", *overrides.ParseRules.Title, color.FgHiMagenta) + cfg.ParseRules.Title = *overrides.ParseRules.Title + } + if overrides.ParseRules.MetaDescription != nil { + utils.PrintColored("Overriding ParseRules.MetaDescription: ", *overrides.ParseRules.MetaDescription, color.FgHiMagenta) + cfg.ParseRules.MetaDescription = *overrides.ParseRules.MetaDescription + } + if overrides.ParseRules.ArticleContent != nil { + utils.PrintColored("Overriding ParseRules.ArticleContent: ", *overrides.ParseRules.ArticleContent, color.FgHiMagenta) + cfg.ParseRules.ArticleContent = *overrides.ParseRules.ArticleContent + } + if overrides.ParseRules.Author != nil { + utils.PrintColored("Overriding ParseRules.Author: ", *overrides.ParseRules.Author, color.FgHiMagenta) + cfg.ParseRules.Author = *overrides.ParseRules.Author + } + if overrides.ParseRules.DatePublished != nil { + utils.PrintColored("Overriding ParseRules.DatePublished: ", *overrides.ParseRules.DatePublished, color.FgHiMagenta) + cfg.ParseRules.DatePublished = *overrides.ParseRules.DatePublished + } + } + + // Override Storage fields. + if overrides.Storage != nil { + if overrides.Storage.OutputFormats != nil { + utils.PrintColored("Overriding Storage.OutputFormats: ", fmt.Sprint(*overrides.Storage.OutputFormats), color.FgHiMagenta) + cfg.Storage.OutputFormats = *overrides.Storage.OutputFormats + } + if overrides.Storage.SavePath != nil { + utils.PrintColored("Overriding Storage.SavePath: ", *overrides.Storage.SavePath, color.FgHiMagenta) + cfg.Storage.SavePath = *overrides.Storage.SavePath + } + if overrides.Storage.FileName != nil { + utils.PrintColored("Overriding Storage.FileName: ", *overrides.Storage.FileName, color.FgHiMagenta) + cfg.Storage.FileName = *overrides.Storage.FileName + } + } + + // Override ScrapingOptions fields. + if overrides.ScrapingOptions != nil { + if overrides.ScrapingOptions.MaxDepth != nil { + utils.PrintColored("Overriding ScrapingOptions.MaxDepth: ", fmt.Sprint(*overrides.ScrapingOptions.MaxDepth), color.FgHiMagenta) + cfg.ScrapingOptions.MaxDepth = *overrides.ScrapingOptions.MaxDepth + } + if overrides.ScrapingOptions.RateLimit != nil { + utils.PrintColored("Overriding ScrapingOptions.RateLimit: ", fmt.Sprint(*overrides.ScrapingOptions.RateLimit), color.FgHiMagenta) + cfg.ScrapingOptions.RateLimit = *overrides.ScrapingOptions.RateLimit + } + if overrides.ScrapingOptions.RetryAttempts != nil { + utils.PrintColored("Overriding ScrapingOptions.RetryAttempts: ", fmt.Sprint(*overrides.ScrapingOptions.RetryAttempts), color.FgHiMagenta) + cfg.ScrapingOptions.RetryAttempts = *overrides.ScrapingOptions.RetryAttempts + } + if overrides.ScrapingOptions.UserAgent != nil { + utils.PrintColored("Overriding ScrapingOptions.UserAgent: ", *overrides.ScrapingOptions.UserAgent, color.FgHiMagenta) + cfg.ScrapingOptions.UserAgent = *overrides.ScrapingOptions.UserAgent + } + } + + // Override DataFormatting fields. + if overrides.DataFormatting != nil { + if overrides.DataFormatting.CleanWhitespace != nil { + utils.PrintColored("Overriding DataFormatting.CleanWhitespace: ", fmt.Sprint(*overrides.DataFormatting.CleanWhitespace), color.FgHiMagenta) + cfg.DataFormatting.CleanWhitespace = *overrides.DataFormatting.CleanWhitespace + } + if overrides.DataFormatting.RemoveHTML != nil { + utils.PrintColored("Overriding DataFormatting.RemoveHTML: ", fmt.Sprint(*overrides.DataFormatting.RemoveHTML), color.FgHiMagenta) + cfg.DataFormatting.RemoveHTML = *overrides.DataFormatting.RemoveHTML } } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 1be7404..457a4af 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -13,28 +13,29 @@ import ( "github.com/heinrichb/scrapey-cli/pkg/utils" ) +// Helper functions to easily create pointer values. +func ptrString(s string) *string { return &s } +func ptrInt(i int) *int { return &i } +func ptrFloat64(f float64) *float64 { return &f } +func ptrBool(b bool) *bool { return &b } + // TestApplyDefaults tests the ApplyDefaults function to ensure that missing fields are set to default values. -// This test function uses multiple cases to verify that defaults are correctly applied. func TestApplyDefaults(t *testing.T) { cases := []struct { desc string - setup func(cfg *Config) // Optionally pre-set some fields. + setup func(cfg *Config) validate func(t *testing.T, cfg *Config) }{ { - desc: "All fields missing should be set to defaults", - setup: func(cfg *Config) { - // Leave all fields at their zero values. - }, + desc: "All fields missing should be set to defaults", + setup: func(cfg *Config) {}, validate: func(t *testing.T, cfg *Config) { - // Check URL defaults. if cfg.URL.Base != "https://example.com" { t.Errorf("Expected URL.Base to be 'https://example.com', got '%s'", cfg.URL.Base) } if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) } - // Check ScrapingOptions defaults. if cfg.ScrapingOptions.MaxDepth != 2 { t.Errorf("Expected ScrapingOptions.MaxDepth to be 2, got %d", cfg.ScrapingOptions.MaxDepth) } @@ -48,7 +49,6 @@ func TestApplyDefaults(t *testing.T) { if cfg.ScrapingOptions.UserAgent != expectedUA { t.Errorf("Expected ScrapingOptions.UserAgent to be '%s', got '%s'", expectedUA, cfg.ScrapingOptions.UserAgent) } - // Check Storage defaults. if len(cfg.Storage.OutputFormats) != 1 || cfg.Storage.OutputFormats[0] != "json" { t.Errorf("Expected Storage.OutputFormats to be ['json'], got %v", cfg.Storage.OutputFormats) } @@ -63,19 +63,16 @@ func TestApplyDefaults(t *testing.T) { { desc: "Pre-set fields remain unchanged and missing fields get defaults", setup: func(cfg *Config) { - // Pre-set some fields. cfg.URL.Base = "https://preset.com" cfg.Storage.SavePath = "custom_output/" }, validate: func(t *testing.T, cfg *Config) { - // Pre-set values should be retained. if cfg.URL.Base != "https://preset.com" { t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) } if cfg.Storage.SavePath != "custom_output/" { t.Errorf("Expected Storage.SavePath to be 'custom_output/', got '%s'", cfg.Storage.SavePath) } - // Other fields should be set to defaults. if len(cfg.URL.Routes) != 1 || cfg.URL.Routes[0] != "/" { t.Errorf("Expected URL.Routes to be ['/'], got %v", cfg.URL.Routes) } @@ -93,7 +90,6 @@ func TestApplyDefaults(t *testing.T) { { desc: "No change if all fields are pre-set", setup: func(cfg *Config) { - // Set all fields explicitly. cfg.URL.Base = "https://preset.com" cfg.URL.Routes = []string{"/preset"} cfg.ScrapingOptions.MaxDepth = 10 @@ -105,7 +101,6 @@ func TestApplyDefaults(t *testing.T) { cfg.Storage.FileName = "preset_data" }, validate: func(t *testing.T, cfg *Config) { - // Expect all pre-set fields to remain unchanged. if cfg.URL.Base != "https://preset.com" { t.Errorf("Expected URL.Base to be 'https://preset.com', got '%s'", cfg.URL.Base) } @@ -140,23 +135,17 @@ func TestApplyDefaults(t *testing.T) { for _, tc := range cases { t.Run(tc.desc, func(t *testing.T) { cfg := &Config{} - // Allow test-specific pre-setup. if tc.setup != nil { tc.setup(cfg) } - // Call ApplyDefaults. cfg.ApplyDefaults() - // Validate that defaults have been applied as expected. tc.validate(t, cfg) }) } } -// TestLoad tests the Load function in a single function with multiple cases. -// We cover scenarios like missing file, unreadable file, invalid JSON, -// and valid JSON with verbose mode on/off. +// TestLoad tests the Load function with various file conditions. func TestLoad(t *testing.T) { - // Patch utils.PrintColored and utils.PrintNonEmptyFields. var capturedColored string patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { capturedColored += fmt.Sprint(a...) @@ -169,21 +158,19 @@ func TestLoad(t *testing.T) { }) defer patchNonEmpty.Unpatch() - // Define table test cases for Load. cases := []struct { desc string - fileSetup func(fileName string) // Setup the file (write contents, change permissions) - verbose bool // Set global Verbose before calling Load. - expectErr bool // Expect Load() to return an error. + fileSetup func(fileName string) + verbose bool + expectErr bool checkOutput func(t *testing.T, colored, nonEmpty string) }{ { desc: "Missing config file", - fileSetup: nil, // Do not create the file so that it is missing. + fileSetup: nil, verbose: false, expectErr: true, checkOutput: func(t *testing.T, colored, nonEmpty string) { - // For a missing file, no printing should occur. if colored != "" { t.Errorf("Expected no colored output for missing file, got: %s", colored) } @@ -192,16 +179,13 @@ func TestLoad(t *testing.T) { { desc: "Unreadable config file", fileSetup: func(name string) { - // Create a file with valid JSON. if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { t.Fatalf("Failed to write file: %v", err) } - // We'll patch os.ReadFile below to simulate a read error. }, verbose: false, expectErr: true, checkOutput: func(t *testing.T, colored, nonEmpty string) { - // Expect that PrintColored is called. if !strings.Contains(colored, "Loaded config from: ") { t.Errorf("Expected colored output, got: %s", colored) } @@ -210,7 +194,6 @@ func TestLoad(t *testing.T) { { desc: "Invalid JSON format", fileSetup: func(name string) { - // Write invalid JSON. if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"`), 0644); err != nil { t.Fatalf("Failed to write file: %v", err) } @@ -218,7 +201,6 @@ func TestLoad(t *testing.T) { verbose: false, expectErr: true, checkOutput: func(t *testing.T, colored, nonEmpty string) { - // Even with invalid JSON, colored output should be produced. if !strings.Contains(colored, "Loaded config from: ") { t.Errorf("Expected colored output, got: %s", colored) } @@ -227,7 +209,6 @@ func TestLoad(t *testing.T) { { desc: "Valid JSON without verbose mode", fileSetup: func(name string) { - // Write valid minimal JSON. if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { t.Fatalf("Failed to write file: %v", err) } @@ -235,7 +216,6 @@ func TestLoad(t *testing.T) { verbose: false, expectErr: false, checkOutput: func(t *testing.T, colored, nonEmpty string) { - // When verbose is false, only colored output is expected. if !strings.Contains(colored, "Loaded config from: ") { t.Errorf("Expected colored output, got: %s", colored) } @@ -247,7 +227,6 @@ func TestLoad(t *testing.T) { { desc: "Valid JSON with verbose mode", fileSetup: func(name string) { - // Write valid minimal JSON. if err := os.WriteFile(name, []byte(`{"url": {"base": "http://example.org"}}`), 0644); err != nil { t.Fatalf("Failed to write file: %v", err) } @@ -255,7 +234,6 @@ func TestLoad(t *testing.T) { verbose: true, expectErr: false, checkOutput: func(t *testing.T, colored, nonEmpty string) { - // With verbose mode on, both colored and non-empty outputs should be present. if !strings.Contains(colored, "Loaded config from: ") { t.Errorf("Expected colored output, got: %s", colored) } @@ -266,22 +244,16 @@ func TestLoad(t *testing.T) { }, } - // Run test cases. for _, tc := range cases { t.Run(tc.desc, func(t *testing.T) { - // Reset captured outputs. capturedColored = "" - // Reset capturedNonEmpty inside the patch by re-patching. patchNonEmpty.Unpatch() patchNonEmpty = monkey.Patch(utils.PrintNonEmptyFields, func(prefix string, cfg interface{}) { capturedNonEmpty += "nonEmptyFieldsCalled" }) defer patchNonEmpty.Unpatch() - - // Set the global Verbose flag as needed. Verbose = tc.verbose - // Prepare file. If no setup, use a name that does not exist. var fileName string if tc.fileSetup != nil { tmpFile, err := os.CreateTemp("", "config_*.json") @@ -289,16 +261,14 @@ func TestLoad(t *testing.T) { t.Fatalf("Failed to create temp file: %v", err) } fileName = tmpFile.Name() - tmpFile.Close() // Close so that file can be manipulated. + tmpFile.Close() tc.fileSetup(fileName) - // For cleanup and permission safety. os.Chmod(fileName, 0644) defer os.Remove(fileName) } else { fileName = "nonexistent_config.json" } - // For the unreadable file test, patch os.ReadFile to simulate a read error. if tc.desc == "Unreadable config file" { patchReadFile := monkey.Patch(os.ReadFile, func(name string) ([]byte, error) { return nil, fmt.Errorf("simulated read error") @@ -311,7 +281,6 @@ func TestLoad(t *testing.T) { if err == nil { t.Errorf("Expected error but got nil") } - // Skip further checks if error was expected. return } else { if err != nil { @@ -319,162 +288,196 @@ func TestLoad(t *testing.T) { return } } - - // Ensure ApplyDefaults populated required fields (e.g. URL.Base). if cfg.URL.Base == "" { t.Errorf("Expected URL.Base to be set, got empty") } - // Validate captured output. tc.checkOutput(t, capturedColored, capturedNonEmpty) }) } } -// TestOverrideConfig tests the OverrideConfig function in a single function with multiple cases. -// We patch utils.PrintColored to capture its output in a global variable. -// In addition to our previous override cases, we add tests for empty-slice overrides, -// for when no override is applied, and for non-struct fields (like the new Version field). -func TestOverrideConfig(t *testing.T) { - // Patch utils.PrintColored to capture printed messages. +// TestOverrideConfigFull tests the new OverrideConfig function using the ConfigOverride type. +// It creates a base config, applies a full override and verifies that all fields have been updated accordingly. +func TestOverrideConfigFull(t *testing.T) { var captured string patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { captured += fmt.Sprint(a...) }) defer patchColored.Unpatch() - // Define table test cases for OverrideConfig. - cases := []struct { - desc string - override Config - preSetup func(*Config) // Optionally modify the initial config. - expectFunc func(*Config) bool // Checks that the override was applied. - expectOutput string // Expected substring in the printed output. - }{ - { - desc: "Override URL.Base", - override: Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{Base: "https://override.com"}, - }, - preSetup: nil, - expectFunc: func(c *Config) bool { - return c.URL.Base == "https://override.com" - }, - expectOutput: "Overriding URL.Base: ", + // Create a base config with default values. + base := &Config{} + base.ApplyDefaults() + + // Create an override with non-nil pointers for every field. + overrides := ConfigOverride{ + Version: ptrString("v2.0"), + URL: &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + }{ + Base: ptrString("https://override.com"), + Routes: &[]string{"/new", "/extra"}, + IncludeBase: ptrBool(true), }, - { - desc: "Override non-empty slice", - override: Config{ - Storage: struct { - OutputFormats []string `json:"outputFormats"` - SavePath string `json:"savePath"` - FileName string `json:"fileName"` - }{OutputFormats: []string{"csv"}}, - }, - preSetup: func(c *Config) { - c.Storage.OutputFormats = []string{"json"} - }, - expectFunc: func(c *Config) bool { - return reflect.DeepEqual(c.Storage.OutputFormats, []string{"csv"}) - }, - expectOutput: "Overriding Storage.OutputFormats: ", + ParseRules: &struct { + Title *string `json:"title,omitempty"` + MetaDescription *string `json:"metaDescription,omitempty"` + ArticleContent *string `json:"articleContent,omitempty"` + Author *string `json:"author,omitempty"` + DatePublished *string `json:"datePublished,omitempty"` + }{ + Title: ptrString("New Title"), + MetaDescription: ptrString("New Meta"), + ArticleContent: ptrString("New Content"), + Author: ptrString("New Author"), + DatePublished: ptrString("2022-01-01"), }, - { - desc: "Override boolean", - override: Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{IncludeBase: true}, - }, - preSetup: nil, - expectFunc: func(c *Config) bool { return c.URL.IncludeBase }, - expectOutput: "Overriding URL.IncludeBase: ", + Storage: &struct { + OutputFormats *[]string `json:"outputFormats"` + SavePath *string `json:"savePath"` + FileName *string `json:"fileName"` + }{ + OutputFormats: &[]string{"csv"}, + SavePath: ptrString("new_output/"), + FileName: ptrString("new_data"), }, - { - desc: "Override multiple values", - override: Config{ - URL: struct { - Base string `json:"base"` - Routes []string `json:"routes"` - IncludeBase bool `json:"includeBase"` - }{ - Base: "https://multiple.com", - Routes: []string{"/new"}, - IncludeBase: true, - }, - ScrapingOptions: struct { - MaxDepth int `json:"maxDepth"` - RateLimit float64 `json:"rateLimit"` - RetryAttempts int `json:"retryAttempts"` - UserAgent string `json:"userAgent"` - }{MaxDepth: 5}, - }, - preSetup: nil, - expectFunc: func(c *Config) bool { - return c.URL.Base == "https://multiple.com" && c.ScrapingOptions.MaxDepth == 5 - }, - expectOutput: "Overriding URL.Base: ", + ScrapingOptions: &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{ + MaxDepth: ptrInt(5), + RateLimit: ptrFloat64(2.0), + RetryAttempts: ptrInt(4), + UserAgent: ptrString("OverrideAgent"), }, - { - desc: "Override empty slice (applies override)", - override: Config{ - Storage: struct { - OutputFormats []string `json:"outputFormats"` - SavePath string `json:"savePath"` - FileName string `json:"fileName"` - }{OutputFormats: []string{}}, // Even empty slice should override. - }, - preSetup: func(c *Config) { - c.Storage.OutputFormats = []string{"json"} - }, - expectFunc: func(c *Config) bool { - // Expect the override to apply, resulting in an empty slice. - return reflect.DeepEqual(c.Storage.OutputFormats, []string{}) - }, - expectOutput: "Overriding Storage.OutputFormats: ", - }, - { - desc: "Override non-struct field (Version)", - override: Config{ - Version: "v2.0", - }, - preSetup: func(c *Config) { - c.Version = "v1.0" - }, - expectFunc: func(c *Config) bool { - // Expect the version to be overridden to "v2.0". - return c.Version == "v2.0" - }, - expectOutput: "Overriding Version: ", + DataFormatting: &struct { + CleanWhitespace *bool `json:"cleanWhitespace"` + RemoveHTML *bool `json:"removeHTML"` + }{ + CleanWhitespace: ptrBool(true), + RemoveHTML: ptrBool(true), }, } - // Run test cases. - for _, tc := range cases { - t.Run(tc.desc, func(t *testing.T) { - captured = "" // Reset captured output. - // Create a fresh config with defaults applied. - testCfg := &Config{} - testCfg.ApplyDefaults() - if tc.preSetup != nil { - tc.preSetup(testCfg) - } + // Apply the override. + base.OverrideConfig(overrides) - // Apply the override. - testCfg.OverrideConfig(tc.override) - // Verify that the override was applied. - if !tc.expectFunc(testCfg) { - t.Errorf("Expected override condition not met. Got %+v", testCfg) - } - // Verify that PrintColored was called with the expected message. - if !strings.Contains(captured, tc.expectOutput) { - t.Errorf("Expected output to contain '%s', got '%s'", tc.expectOutput, captured) - } - }) + // Verify that each field has been updated. + if base.Version != "v2.0" { + t.Errorf("Expected Version to be 'v2.0', got '%s'", base.Version) + } + if base.URL.Base != "https://override.com" { + t.Errorf("Expected URL.Base to be 'https://override.com', got '%s'", base.URL.Base) + } + if !reflect.DeepEqual(base.URL.Routes, []string{"/new", "/extra"}) { + t.Errorf("Expected URL.Routes to be ['/new', '/extra'], got %v", base.URL.Routes) + } + if !base.URL.IncludeBase { + t.Errorf("Expected URL.IncludeBase to be true") + } + if base.ParseRules.Title != "New Title" { + t.Errorf("Expected ParseRules.Title to be 'New Title', got '%s'", base.ParseRules.Title) + } + if base.ParseRules.MetaDescription != "New Meta" { + t.Errorf("Expected ParseRules.MetaDescription to be 'New Meta', got '%s'", base.ParseRules.MetaDescription) + } + if base.ParseRules.ArticleContent != "New Content" { + t.Errorf("Expected ParseRules.ArticleContent to be 'New Content', got '%s'", base.ParseRules.ArticleContent) + } + if base.ParseRules.Author != "New Author" { + t.Errorf("Expected ParseRules.Author to be 'New Author', got '%s'", base.ParseRules.Author) + } + if base.ParseRules.DatePublished != "2022-01-01" { + t.Errorf("Expected ParseRules.DatePublished to be '2022-01-01', got '%s'", base.ParseRules.DatePublished) + } + if !reflect.DeepEqual(base.Storage.OutputFormats, []string{"csv"}) { + t.Errorf("Expected Storage.OutputFormats to be ['csv'], got %v", base.Storage.OutputFormats) + } + if base.Storage.SavePath != "new_output/" { + t.Errorf("Expected Storage.SavePath to be 'new_output/', got '%s'", base.Storage.SavePath) + } + if base.Storage.FileName != "new_data" { + t.Errorf("Expected Storage.FileName to be 'new_data', got '%s'", base.Storage.FileName) + } + if base.ScrapingOptions.MaxDepth != 5 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 5, got %d", base.ScrapingOptions.MaxDepth) + } + if base.ScrapingOptions.RateLimit != 2.0 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 2.0, got %f", base.ScrapingOptions.RateLimit) + } + if base.ScrapingOptions.RetryAttempts != 4 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 4, got %d", base.ScrapingOptions.RetryAttempts) + } + if base.ScrapingOptions.UserAgent != "OverrideAgent" { + t.Errorf("Expected ScrapingOptions.UserAgent to be 'OverrideAgent', got '%s'", base.ScrapingOptions.UserAgent) + } + if !base.DataFormatting.CleanWhitespace { + t.Errorf("Expected DataFormatting.CleanWhitespace to be true") + } + if !base.DataFormatting.RemoveHTML { + t.Errorf("Expected DataFormatting.RemoveHTML to be true") + } + + // Optionally, you can verify that PrintColored was called for each overridden field. + expectedSubstrs := []string{ + "Overriding Version: v2.0", + "Overriding URL.Base: https://override.com", + "Overriding URL.Routes: [", + "Overriding URL.IncludeBase: true", + "Overriding ParseRules.Title: New Title", + "Overriding ParseRules.MetaDescription: New Meta", + "Overriding ParseRules.ArticleContent: New Content", + "Overriding ParseRules.Author: New Author", + "Overriding ParseRules.DatePublished: 2022-01-01", + "Overriding Storage.OutputFormats: [", + "Overriding Storage.SavePath: new_output/", + "Overriding Storage.FileName: new_data", + "Overriding ScrapingOptions.MaxDepth: 5", + "Overriding ScrapingOptions.RateLimit: 2", + "Overriding ScrapingOptions.RetryAttempts: 4", + "Overriding ScrapingOptions.UserAgent: OverrideAgent", + "Overriding DataFormatting.CleanWhitespace: true", + "Overriding DataFormatting.RemoveHTML: true", + } + for _, substr := range expectedSubstrs { + if !strings.Contains(captured, substr) { + t.Errorf("Expected output to contain '%s', got '%s'", substr, captured) + } + } +} + +// TestOverrideConfigNil tests that passing a ConfigOverride with all nil values does not change the config. +func TestOverrideConfigNil(t *testing.T) { + var captured string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + captured += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() + + // Create a base config with default values. + base := &Config{} + base.ApplyDefaults() + + // Create an override with all nil pointers. + overrides := ConfigOverride{} + + // Apply the override. + base.OverrideConfig(overrides) + + // Verify that no fields have changed (i.e. remain equal to their defaults). + defaultConfig := &Config{} + defaultConfig.ApplyDefaults() + + if !reflect.DeepEqual(base, defaultConfig) { + t.Errorf("Expected config to remain unchanged when overrides are nil. Got %+v, expected %+v", base, defaultConfig) + } + + // Since nothing is overridden, captured output should be empty. + if captured != "" { + t.Errorf("Expected no output from PrintColored when no overrides are applied, got '%s'", captured) } } From 1be941ba3a56f41e01d3f6019376d57bb9c3f727 Mon Sep 17 00:00:00 2001 From: Brennen Heinrich Date: Sun, 16 Feb 2025 16:53:39 -0600 Subject: [PATCH 5/5] Combining tests --- pkg/config/config_test.go | 353 +++++++++++++++++++------------------- 1 file changed, 178 insertions(+), 175 deletions(-) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 457a4af..35124d1 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -296,188 +296,191 @@ func TestLoad(t *testing.T) { } } -// TestOverrideConfigFull tests the new OverrideConfig function using the ConfigOverride type. -// It creates a base config, applies a full override and verifies that all fields have been updated accordingly. -func TestOverrideConfigFull(t *testing.T) { - var captured string - patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { - captured += fmt.Sprint(a...) - }) - defer patchColored.Unpatch() - - // Create a base config with default values. - base := &Config{} - base.ApplyDefaults() +// TestOverrideConfig combines the previous TestOverrideConfigFull and TestOverrideConfigNil into a single test. +// It verifies that a full override updates all fields and that a nil override leaves the config unchanged. +func TestOverrideConfig(t *testing.T) { + cases := []struct { + desc string + overrideSetup func() ConfigOverride + validate func(t *testing.T, base *Config, captured string) + }{ + { + desc: "Full override applies all changes", + overrideSetup: func() ConfigOverride { + return ConfigOverride{ + Version: ptrString("v2.0"), + URL: &struct { + Base *string `json:"base"` + Routes *[]string `json:"routes"` + IncludeBase *bool `json:"includeBase"` + }{ + Base: ptrString("https://override.com"), + Routes: &[]string{"/new", "/extra"}, + IncludeBase: ptrBool(true), + }, + ParseRules: &struct { + Title *string `json:"title,omitempty"` + MetaDescription *string `json:"metaDescription,omitempty"` + ArticleContent *string `json:"articleContent,omitempty"` + Author *string `json:"author,omitempty"` + DatePublished *string `json:"datePublished,omitempty"` + }{ + Title: ptrString("New Title"), + MetaDescription: ptrString("New Meta"), + ArticleContent: ptrString("New Content"), + Author: ptrString("New Author"), + DatePublished: ptrString("2022-01-01"), + }, + Storage: &struct { + OutputFormats *[]string `json:"outputFormats"` + SavePath *string `json:"savePath"` + FileName *string `json:"fileName"` + }{ + OutputFormats: &[]string{"csv"}, + SavePath: ptrString("new_output/"), + FileName: ptrString("new_data"), + }, + ScrapingOptions: &struct { + MaxDepth *int `json:"maxDepth"` + RateLimit *float64 `json:"rateLimit"` + RetryAttempts *int `json:"retryAttempts"` + UserAgent *string `json:"userAgent"` + }{ + MaxDepth: ptrInt(5), + RateLimit: ptrFloat64(2.0), + RetryAttempts: ptrInt(4), + UserAgent: ptrString("OverrideAgent"), + }, + DataFormatting: &struct { + CleanWhitespace *bool `json:"cleanWhitespace"` + RemoveHTML *bool `json:"removeHTML"` + }{ + CleanWhitespace: ptrBool(true), + RemoveHTML: ptrBool(true), + }, + } + }, + validate: func(t *testing.T, base *Config, captured string) { + if base.Version != "v2.0" { + t.Errorf("Expected Version to be 'v2.0', got '%s'", base.Version) + } + if base.URL.Base != "https://override.com" { + t.Errorf("Expected URL.Base to be 'https://override.com', got '%s'", base.URL.Base) + } + if !reflect.DeepEqual(base.URL.Routes, []string{"/new", "/extra"}) { + t.Errorf("Expected URL.Routes to be ['/new', '/extra'], got %v", base.URL.Routes) + } + if !base.URL.IncludeBase { + t.Errorf("Expected URL.IncludeBase to be true") + } + if base.ParseRules.Title != "New Title" { + t.Errorf("Expected ParseRules.Title to be 'New Title', got '%s'", base.ParseRules.Title) + } + if base.ParseRules.MetaDescription != "New Meta" { + t.Errorf("Expected ParseRules.MetaDescription to be 'New Meta', got '%s'", base.ParseRules.MetaDescription) + } + if base.ParseRules.ArticleContent != "New Content" { + t.Errorf("Expected ParseRules.ArticleContent to be 'New Content', got '%s'", base.ParseRules.ArticleContent) + } + if base.ParseRules.Author != "New Author" { + t.Errorf("Expected ParseRules.Author to be 'New Author', got '%s'", base.ParseRules.Author) + } + if base.ParseRules.DatePublished != "2022-01-01" { + t.Errorf("Expected ParseRules.DatePublished to be '2022-01-01', got '%s'", base.ParseRules.DatePublished) + } + if !reflect.DeepEqual(base.Storage.OutputFormats, []string{"csv"}) { + t.Errorf("Expected Storage.OutputFormats to be ['csv'], got %v", base.Storage.OutputFormats) + } + if base.Storage.SavePath != "new_output/" { + t.Errorf("Expected Storage.SavePath to be 'new_output/', got '%s'", base.Storage.SavePath) + } + if base.Storage.FileName != "new_data" { + t.Errorf("Expected Storage.FileName to be 'new_data', got '%s'", base.Storage.FileName) + } + if base.ScrapingOptions.MaxDepth != 5 { + t.Errorf("Expected ScrapingOptions.MaxDepth to be 5, got %d", base.ScrapingOptions.MaxDepth) + } + if base.ScrapingOptions.RateLimit != 2.0 { + t.Errorf("Expected ScrapingOptions.RateLimit to be 2.0, got %f", base.ScrapingOptions.RateLimit) + } + if base.ScrapingOptions.RetryAttempts != 4 { + t.Errorf("Expected ScrapingOptions.RetryAttempts to be 4, got %d", base.ScrapingOptions.RetryAttempts) + } + if base.ScrapingOptions.UserAgent != "OverrideAgent" { + t.Errorf("Expected ScrapingOptions.UserAgent to be 'OverrideAgent', got '%s'", base.ScrapingOptions.UserAgent) + } + if !base.DataFormatting.CleanWhitespace { + t.Errorf("Expected DataFormatting.CleanWhitespace to be true") + } + if !base.DataFormatting.RemoveHTML { + t.Errorf("Expected DataFormatting.RemoveHTML to be true") + } - // Create an override with non-nil pointers for every field. - overrides := ConfigOverride{ - Version: ptrString("v2.0"), - URL: &struct { - Base *string `json:"base"` - Routes *[]string `json:"routes"` - IncludeBase *bool `json:"includeBase"` - }{ - Base: ptrString("https://override.com"), - Routes: &[]string{"/new", "/extra"}, - IncludeBase: ptrBool(true), - }, - ParseRules: &struct { - Title *string `json:"title,omitempty"` - MetaDescription *string `json:"metaDescription,omitempty"` - ArticleContent *string `json:"articleContent,omitempty"` - Author *string `json:"author,omitempty"` - DatePublished *string `json:"datePublished,omitempty"` - }{ - Title: ptrString("New Title"), - MetaDescription: ptrString("New Meta"), - ArticleContent: ptrString("New Content"), - Author: ptrString("New Author"), - DatePublished: ptrString("2022-01-01"), - }, - Storage: &struct { - OutputFormats *[]string `json:"outputFormats"` - SavePath *string `json:"savePath"` - FileName *string `json:"fileName"` - }{ - OutputFormats: &[]string{"csv"}, - SavePath: ptrString("new_output/"), - FileName: ptrString("new_data"), - }, - ScrapingOptions: &struct { - MaxDepth *int `json:"maxDepth"` - RateLimit *float64 `json:"rateLimit"` - RetryAttempts *int `json:"retryAttempts"` - UserAgent *string `json:"userAgent"` - }{ - MaxDepth: ptrInt(5), - RateLimit: ptrFloat64(2.0), - RetryAttempts: ptrInt(4), - UserAgent: ptrString("OverrideAgent"), + // Verify that PrintColored was called for each overridden field. + expectedSubstrs := []string{ + "Overriding Version: v2.0", + "Overriding URL.Base: https://override.com", + "Overriding URL.Routes: [", + "Overriding URL.IncludeBase: true", + "Overriding ParseRules.Title: New Title", + "Overriding ParseRules.MetaDescription: New Meta", + "Overriding ParseRules.ArticleContent: New Content", + "Overriding ParseRules.Author: New Author", + "Overriding ParseRules.DatePublished: 2022-01-01", + "Overriding Storage.OutputFormats: [", + "Overriding Storage.SavePath: new_output/", + "Overriding Storage.FileName: new_data", + "Overriding ScrapingOptions.MaxDepth: 5", + "Overriding ScrapingOptions.RateLimit: 2", + "Overriding ScrapingOptions.RetryAttempts: 4", + "Overriding ScrapingOptions.UserAgent: OverrideAgent", + "Overriding DataFormatting.CleanWhitespace: true", + "Overriding DataFormatting.RemoveHTML: true", + } + for _, substr := range expectedSubstrs { + if !strings.Contains(captured, substr) { + t.Errorf("Expected output to contain '%s', got '%s'", substr, captured) + } + } + }, }, - DataFormatting: &struct { - CleanWhitespace *bool `json:"cleanWhitespace"` - RemoveHTML *bool `json:"removeHTML"` - }{ - CleanWhitespace: ptrBool(true), - RemoveHTML: ptrBool(true), + { + desc: "Nil override leaves config unchanged", + overrideSetup: func() ConfigOverride { + return ConfigOverride{} + }, + validate: func(t *testing.T, base *Config, captured string) { + // Build a default config to compare. + defaultConfig := &Config{} + defaultConfig.ApplyDefaults() + if !reflect.DeepEqual(base, defaultConfig) { + t.Errorf("Expected config to remain unchanged when overrides are nil. Got %+v, expected %+v", base, defaultConfig) + } + // No PrintColored calls should be made. + if captured != "" { + t.Errorf("Expected no output from PrintColored when no overrides are applied, got '%s'", captured) + } + }, }, } - // Apply the override. - base.OverrideConfig(overrides) - - // Verify that each field has been updated. - if base.Version != "v2.0" { - t.Errorf("Expected Version to be 'v2.0', got '%s'", base.Version) - } - if base.URL.Base != "https://override.com" { - t.Errorf("Expected URL.Base to be 'https://override.com', got '%s'", base.URL.Base) - } - if !reflect.DeepEqual(base.URL.Routes, []string{"/new", "/extra"}) { - t.Errorf("Expected URL.Routes to be ['/new', '/extra'], got %v", base.URL.Routes) - } - if !base.URL.IncludeBase { - t.Errorf("Expected URL.IncludeBase to be true") - } - if base.ParseRules.Title != "New Title" { - t.Errorf("Expected ParseRules.Title to be 'New Title', got '%s'", base.ParseRules.Title) - } - if base.ParseRules.MetaDescription != "New Meta" { - t.Errorf("Expected ParseRules.MetaDescription to be 'New Meta', got '%s'", base.ParseRules.MetaDescription) - } - if base.ParseRules.ArticleContent != "New Content" { - t.Errorf("Expected ParseRules.ArticleContent to be 'New Content', got '%s'", base.ParseRules.ArticleContent) - } - if base.ParseRules.Author != "New Author" { - t.Errorf("Expected ParseRules.Author to be 'New Author', got '%s'", base.ParseRules.Author) - } - if base.ParseRules.DatePublished != "2022-01-01" { - t.Errorf("Expected ParseRules.DatePublished to be '2022-01-01', got '%s'", base.ParseRules.DatePublished) - } - if !reflect.DeepEqual(base.Storage.OutputFormats, []string{"csv"}) { - t.Errorf("Expected Storage.OutputFormats to be ['csv'], got %v", base.Storage.OutputFormats) - } - if base.Storage.SavePath != "new_output/" { - t.Errorf("Expected Storage.SavePath to be 'new_output/', got '%s'", base.Storage.SavePath) - } - if base.Storage.FileName != "new_data" { - t.Errorf("Expected Storage.FileName to be 'new_data', got '%s'", base.Storage.FileName) - } - if base.ScrapingOptions.MaxDepth != 5 { - t.Errorf("Expected ScrapingOptions.MaxDepth to be 5, got %d", base.ScrapingOptions.MaxDepth) - } - if base.ScrapingOptions.RateLimit != 2.0 { - t.Errorf("Expected ScrapingOptions.RateLimit to be 2.0, got %f", base.ScrapingOptions.RateLimit) - } - if base.ScrapingOptions.RetryAttempts != 4 { - t.Errorf("Expected ScrapingOptions.RetryAttempts to be 4, got %d", base.ScrapingOptions.RetryAttempts) - } - if base.ScrapingOptions.UserAgent != "OverrideAgent" { - t.Errorf("Expected ScrapingOptions.UserAgent to be 'OverrideAgent', got '%s'", base.ScrapingOptions.UserAgent) - } - if !base.DataFormatting.CleanWhitespace { - t.Errorf("Expected DataFormatting.CleanWhitespace to be true") - } - if !base.DataFormatting.RemoveHTML { - t.Errorf("Expected DataFormatting.RemoveHTML to be true") - } - - // Optionally, you can verify that PrintColored was called for each overridden field. - expectedSubstrs := []string{ - "Overriding Version: v2.0", - "Overriding URL.Base: https://override.com", - "Overriding URL.Routes: [", - "Overriding URL.IncludeBase: true", - "Overriding ParseRules.Title: New Title", - "Overriding ParseRules.MetaDescription: New Meta", - "Overriding ParseRules.ArticleContent: New Content", - "Overriding ParseRules.Author: New Author", - "Overriding ParseRules.DatePublished: 2022-01-01", - "Overriding Storage.OutputFormats: [", - "Overriding Storage.SavePath: new_output/", - "Overriding Storage.FileName: new_data", - "Overriding ScrapingOptions.MaxDepth: 5", - "Overriding ScrapingOptions.RateLimit: 2", - "Overriding ScrapingOptions.RetryAttempts: 4", - "Overriding ScrapingOptions.UserAgent: OverrideAgent", - "Overriding DataFormatting.CleanWhitespace: true", - "Overriding DataFormatting.RemoveHTML: true", - } - for _, substr := range expectedSubstrs { - if !strings.Contains(captured, substr) { - t.Errorf("Expected output to contain '%s', got '%s'", substr, captured) - } - } -} - -// TestOverrideConfigNil tests that passing a ConfigOverride with all nil values does not change the config. -func TestOverrideConfigNil(t *testing.T) { - var captured string - patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { - captured += fmt.Sprint(a...) - }) - defer patchColored.Unpatch() - - // Create a base config with default values. - base := &Config{} - base.ApplyDefaults() - - // Create an override with all nil pointers. - overrides := ConfigOverride{} - - // Apply the override. - base.OverrideConfig(overrides) + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + var captured string + patchColored := monkey.Patch(utils.PrintColored, func(a ...interface{}) { + captured += fmt.Sprint(a...) + }) + defer patchColored.Unpatch() - // Verify that no fields have changed (i.e. remain equal to their defaults). - defaultConfig := &Config{} - defaultConfig.ApplyDefaults() + // Create a base config with defaults applied. + base := &Config{} + base.ApplyDefaults() - if !reflect.DeepEqual(base, defaultConfig) { - t.Errorf("Expected config to remain unchanged when overrides are nil. Got %+v, expected %+v", base, defaultConfig) - } + // Apply the override from this test case. + override := tc.overrideSetup() + base.OverrideConfig(override) - // Since nothing is overridden, captured output should be empty. - if captured != "" { - t.Errorf("Expected no output from PrintColored when no overrides are applied, got '%s'", captured) + tc.validate(t, base, captured) + }) } }