Skip to content

Commit bcc3bcb

Browse files
committed
Merge remote-tracking branch 'origin/develop' into develop
2 parents ee6a343 + db05fd5 commit bcc3bcb

3 files changed

Lines changed: 80 additions & 22 deletions

File tree

cmd/scrapeycli/main.go

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
package main
22

33
import (
4-
"flag"
5-
"os"
6-
7-
"github.com/fatih/color"
8-
"github.com/heinrichb/scrapey-cli/pkg/config"
9-
"github.com/heinrichb/scrapey-cli/pkg/utils"
4+
"flag"
5+
"fmt"
6+
"os"
7+
"path"
8+
9+
"github.com/fatih/color"
10+
"github.com/heinrichb/scrapey-cli/pkg/config"
11+
"github.com/heinrichb/scrapey-cli/pkg/crawler"
12+
"github.com/heinrichb/scrapey-cli/pkg/utils"
1013
)
1114

1215
/*
@@ -58,27 +61,27 @@ It parses command-line flags, prints a welcome message, loads the configuration,
5861
applies CLI overrides using a ConfigOverride object, and prints confirmation messages.
5962
*/
6063
func main() {
61-
// Parse CLI flags.
62-
flag.Parse()
64+
// Parse CLI flags.
65+
flag.Parse()
6366

6467
// Store the verbose flag in global state.
6568
config.Verbose = verbose
6669

6770
// Print a welcome message in cyan using our PrintColored utility.
6871
utils.PrintColored("Welcome to Scrapey CLI!", "", color.FgCyan)
6972

70-
// Default to "configs/default.json" if no config path is provided.
71-
if configPath == "" {
72-
configPath = "configs/default.json"
73-
}
73+
// Default to "configs/default.json" if no config path is provided.
74+
if configPath == "" {
75+
configPath = "configs/default.json"
76+
}
7477

75-
// Attempt to load the configuration from the specified file.
76-
cfg, err := config.Load(configPath)
77-
if err != nil {
78-
// If loading fails, print an error message in red and exit.
79-
utils.PrintColored("Failed to load config: ", err.Error(), color.FgRed)
80-
os.Exit(1)
81-
}
78+
// Attempt to load the configuration from the specified file.
79+
cfg, err := config.Load(configPath)
80+
if err != nil {
81+
// If loading fails, print an error message in red and exit.
82+
utils.PrintColored("Failed to load config: ", err.Error(), color.FgRed)
83+
os.Exit(1)
84+
}
8285

8386
// Construct a partial ConfigOverride struct for CLI overrides.
8487
cliOverrides := config.ConfigOverride{}
@@ -128,10 +131,29 @@ func main() {
128131

129132
// Print which routes will be scraped.
130133
utils.PrintColored("Base URL: ", cfg.URL.Base, color.FgYellow)
131-
if cfg.URL.IncludeBase {
134+
// Create a new Crawler instance
135+
c := crawler.New()
136+
137+
if cfg.URL.IncludeBase {
132138
utils.PrintColored("Including base URL in scraping.", "", color.FgGreen)
139+
content, err := c.FetchURL(cfg.URL.Base)
140+
if err != nil {
141+
utils.PrintColored("Failed to fetch URL: ", err.Error(), color.FgRed)
142+
os.Exit(1) // To-Do Handle error without exiting program
143+
}
144+
// Print the fetched content
145+
fmt.Println("Fetched Content:")
146+
fmt.Println(content)
133147
}
134148
for _, route := range cfg.URL.Routes {
135149
utils.PrintColored("Scraping route: ", route, color.FgHiBlue)
150+
content, err := c.FetchURL(path.Join(cfg.URL.Base,route))
151+
if err != nil {
152+
utils.PrintColored("Failed to fetch URL: ", err.Error(), color.FgRed)
153+
os.Exit(1) // To-Do Handle error without exiting program
154+
}
155+
// Print the fetched content
156+
fmt.Println("Fetched Content:")
157+
fmt.Println(content)
136158
}
137159
}

configs/default.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"url": {
44
"base": "https://example.com",
55
"routes": ["/route1", "/route2", "*"],
6-
"includeBase": false
6+
"includeBase": true
77
},
88
"parseRules": {
99
"title": "title",

pkg/crawler/crawler.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
package crawler
44

5+
import (
6+
"fmt"
7+
"io/ioutil"
8+
"net/http"
9+
"time"
10+
)
11+
512
/*
613
Crawler is responsible for fetching HTML content from URLs.
714
@@ -53,5 +60,34 @@ Notes:
5360
*/
5461
func (c *Crawler) FetchURL(url string) (string, error) {
5562
// Stub: return placeholder HTML or empty string for now.
56-
return "", nil
63+
client := &http.Client{
64+
Timeout: 10 * time.Second, // Set timeout
65+
}
66+
67+
// jsonData := `{"key":"value"}`
68+
69+
// Create a custom request
70+
// req, err := http.NewRequest("Post", url, bytes.NewBuffer([]byte(jsonData)))
71+
req, err := http.NewRequest("GET", url, nil)
72+
if err != nil {
73+
fmt.Printf("Error creating request: %v\n", err)
74+
return "", err
75+
}
76+
// req.Header.Set("Content-Type", "application/json") // Set headers
77+
78+
// Send the request
79+
resp, err := client.Do(req)
80+
if err != nil {
81+
fmt.Printf("Error sending request: %v\n", err)
82+
return "", err
83+
}
84+
defer resp.Body.Close()
85+
86+
// Read and print the response body
87+
body, err := ioutil.ReadAll(resp.Body)
88+
if err != nil {
89+
fmt.Printf("Error reading response body: %v\n", err)
90+
return "", err
91+
}
92+
return string(body), nil
5793
}

0 commit comments

Comments
 (0)