-
Notifications
You must be signed in to change notification settings - Fork 0
Initial implementation #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
84ccafa
90c9ae4
29280dd
d536662
7cc0d98
d2a8fe1
af752e3
9d5548e
49cdd39
6c37960
93e7ca7
8f01515
0521b22
972c22b
16515dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| language: en | ||
|
|
||
| reviews: | ||
| profile: assertive | ||
| high_level_summary: true | ||
| poem: false | ||
| auto_review: | ||
| enabled: true | ||
| drafts: true | ||
|
|
||
| tools: | ||
| golangci-lint: | ||
| enabled: true | ||
| gitleaks: | ||
| enabled: true | ||
| yamllint: | ||
| enabled: true | ||
| actionlint: | ||
| enabled: true | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| shellcheck: | ||
| enabled: true | ||
| markdownlint: | ||
| enabled: true | ||
| osvScanner: | ||
| enabled: true | ||
| github-checks: | ||
| enabled: true | ||
| timeout_ms: 300000 | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,96 @@ | ||||||||||||||||||||||||||||||||||
| name: CI | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| on: | ||||||||||||||||||||||||||||||||||
| push: | ||||||||||||||||||||||||||||||||||
| pull_request: | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| permissions: | ||||||||||||||||||||||||||||||||||
| contents: read | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| concurrency: | ||||||||||||||||||||||||||||||||||
| group: ci-${{ github.workflow }}-${{ github.ref }} | ||||||||||||||||||||||||||||||||||
| cancel-in-progress: true | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| jobs: | ||||||||||||||||||||||||||||||||||
| build-test: | ||||||||||||||||||||||||||||||||||
| runs-on: ubuntu-latest | ||||||||||||||||||||||||||||||||||
|
coderabbitai[bot] marked this conversation as resolved.
|
||||||||||||||||||||||||||||||||||
| timeout-minutes: 20 | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| # Run from the repo root now that go.mod is at top level | ||||||||||||||||||||||||||||||||||
| defaults: | ||||||||||||||||||||||||||||||||||
| run: | ||||||||||||||||||||||||||||||||||
| shell: bash | ||||||||||||||||||||||||||||||||||
| working-directory: . | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| steps: | ||||||||||||||||||||||||||||||||||
| - name: Checkout | ||||||||||||||||||||||||||||||||||
| uses: actions/checkout@v4 | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+26
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Pin GitHub Actions by commit SHA for supply‑chain security. Avoid mutable tags; pin to immutable SHAs (update via Dependabot). - - name: Checkout
- uses: actions/checkout@v4
+ - name: Checkout
+ uses: actions/checkout@<commit-sha>
- - name: Set up Go
- uses: actions/setup-go@v5
+ - name: Set up Go
+ uses: actions/setup-go@<commit-sha>
- - name: Upload coverage (artifact)
- if: always()
- uses: actions/upload-artifact@v4
+ - name: Upload coverage (artifact)
+ if: always()
+ uses: actions/upload-artifact@<commit-sha>Tip: enable Dependabot for GitHub Actions to auto-bump SHAs. Also applies to: 29-29, 91-91 🤖 Prompt for AI Agents
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be already addressed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: Length of output: 2584 @b4prog, I can see that the structure for pinning GitHub Actions is in place, but the actual commit SHAs haven't been filled in yet. The workflow currently uses placeholder values:
To complete the security improvement, these placeholders need to be replaced with actual commit SHAs. Here are the current stable commit SHAs for these actions: - name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Go
uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0
- name: Upload coverage (artifact)
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3Would you like me to create an issue to track completing this security improvement with the actual commit SHAs? |
||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
|
b4prog marked this conversation as resolved.
|
||||||||||||||||||||||||||||||||||
| - name: Set up Go | ||||||||||||||||||||||||||||||||||
| uses: actions/setup-go@v5 | ||||||||||||||||||||||||||||||||||
| with: | ||||||||||||||||||||||||||||||||||
| go-version: "1.25.x" | ||||||||||||||||||||||||||||||||||
| check-latest: true | ||||||||||||||||||||||||||||||||||
| cache: true | ||||||||||||||||||||||||||||||||||
| cache-dependency-path: go.sum | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
|
Comment on lines
+29
to
+36
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Drive toolchain version from go.mod for single source of truth. Let setup-go read the version from go.mod; fewer drift issues. - - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version: "1.25.x"
- check-latest: true
- cache: true
- cache-dependency-path: go.sum
+ - name: Set up Go
+ uses: actions/setup-go@<commit-sha>
+ with:
+ go-version-file: go.mod
+ check-latest: true
+ cache: true
+ cache-dependency-path: go.sumIf you intentionally target Go 1.25.x regardless of go.mod, keep the current config. 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||
| - name: Verify formatting (gofmt) | ||||||||||||||||||||||||||||||||||
| run: | | ||||||||||||||||||||||||||||||||||
| unformatted=$(gofmt -l .) | ||||||||||||||||||||||||||||||||||
| if [[ -n "$unformatted" ]]; then | ||||||||||||||||||||||||||||||||||
| echo "These files are not gofmt-formatted:" | ||||||||||||||||||||||||||||||||||
| echo "$unformatted" | ||||||||||||||||||||||||||||||||||
| echo "Run: gofmt -w ." | ||||||||||||||||||||||||||||||||||
| echo "Diff:" | ||||||||||||||||||||||||||||||||||
| for f in $unformatted; do | ||||||||||||||||||||||||||||||||||
| echo "---- $f" | ||||||||||||||||||||||||||||||||||
| diff -u "$f" <(gofmt "$f") || true | ||||||||||||||||||||||||||||||||||
| done | ||||||||||||||||||||||||||||||||||
| exit 1 | ||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| - name: Tidy (verify no changes) | ||||||||||||||||||||||||||||||||||
| run: | | ||||||||||||||||||||||||||||||||||
| set -euo pipefail | ||||||||||||||||||||||||||||||||||
| cp go.mod go.mod.orig | ||||||||||||||||||||||||||||||||||
| had_go_sum=0 | ||||||||||||||||||||||||||||||||||
| if [[ -f go.sum ]]; then | ||||||||||||||||||||||||||||||||||
| cp go.sum go.sum.orig | ||||||||||||||||||||||||||||||||||
| had_go_sum=1 | ||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||
| if ! go mod tidy; then | ||||||||||||||||||||||||||||||||||
| echo "'go mod tidy' failed" | ||||||||||||||||||||||||||||||||||
| mv -f go.mod.orig go.mod | ||||||||||||||||||||||||||||||||||
| if [[ $had_go_sum -eq 1 ]]; then mv -f go.sum.orig go.sum; else rm -f go.sum || true; fi | ||||||||||||||||||||||||||||||||||
| exit 1 | ||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||
| tidy_ok=0 | ||||||||||||||||||||||||||||||||||
| diff -u go.mod.orig go.mod || tidy_ok=1 | ||||||||||||||||||||||||||||||||||
| if [[ $had_go_sum -eq 1 ]]; then | ||||||||||||||||||||||||||||||||||
| diff -u go.sum.orig go.sum || tidy_ok=1 | ||||||||||||||||||||||||||||||||||
| else | ||||||||||||||||||||||||||||||||||
| [[ -f go.sum ]] && tidy_ok=1 | ||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||
| mv -f go.mod.orig go.mod | ||||||||||||||||||||||||||||||||||
| if [[ $had_go_sum -eq 1 ]]; then mv -f go.sum.orig go.sum; else rm -f go.sum || true; fi | ||||||||||||||||||||||||||||||||||
| if [[ $tidy_ok -ne 0 ]]; then | ||||||||||||||||||||||||||||||||||
| echo "go.mod/go.sum would change. Run 'go mod tidy' locally and commit." | ||||||||||||||||||||||||||||||||||
| exit 1 | ||||||||||||||||||||||||||||||||||
| fi | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| - name: Vet | ||||||||||||||||||||||||||||||||||
| run: go vet -mod=readonly ./... | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
| - name: Build | ||||||||||||||||||||||||||||||||||
| run: go build -mod=readonly ./... | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
|
Comment on lines
+81
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add golangci-lint to catch issues beyond vet. Staticcheck, style, and common bug patterns won’t be covered by vet alone. - name: Vet
run: go vet -mod=readonly ./...
+ - name: Lint (golangci-lint)
+ uses: golangci/golangci-lint-action@<commit-sha>
+ with:
+ version: latest
+ args: --timeout=5m📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||
| - name: Test (race, coverage) | ||||||||||||||||||||||||||||||||||
| run: go test -mod=readonly -race -covermode=atomic -coverprofile=coverage.out ./... | ||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||
|
Comment on lines
+87
to
+89
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Shuffle tests to surface order dependencies. Helps catch hidden global state issues. - - name: Test (race, coverage)
- run: go test -mod=readonly -race -covermode=atomic -coverprofile=coverage.out ./...
+ - name: Test (race, coverage)
+ run: go test -mod=readonly -race -shuffle=on -covermode=atomic -coverprofile=coverage.out ./...📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||
| - name: Upload coverage (artifact) | ||||||||||||||||||||||||||||||||||
| if: always() | ||||||||||||||||||||||||||||||||||
| uses: actions/upload-artifact@v4 | ||||||||||||||||||||||||||||||||||
| with: | ||||||||||||||||||||||||||||||||||
| name: coverage | ||||||||||||||||||||||||||||||||||
| path: coverage.out | ||||||||||||||||||||||||||||||||||
| if-no-files-found: ignore | ||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,46 @@ | ||
| # Log2CSV | ||
| Log2CSV is a command-line tool that transforms any log into a CSV file using a regular expression. | ||
|
|
||
| `Log2CSV` is a command-line tool written in Go that transforms raw log files into CSV. | ||
| It extracts structured data from logs using a regular expression with named capture groups and writes CSV to STDOUT. | ||
|
|
||
| --- | ||
|
|
||
| ## Features | ||
|
|
||
| - Reads log lines from **STDIN** and writes CSV to **STDOUT**. | ||
| - Extracts fields using **named capture groups** (`(?P<Name>...)`). | ||
| - The **CSV header row** is automatically generated from group names. | ||
| - Preserves the input's line endings (LF/CRLF). | ||
| - Optional **unmatched mode** (`-unmatched`) to print **unique non-matching lines** instead of CSV. | ||
|
Comment on lines
+10
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Docs/code mismatch for -unmatched named-groups requirement. Current code requires named groups even with -unmatched; README doesn’t say so. Either relax the code (preferred; see code review) or document the requirement. Also applies to: 18-22 🤖 Prompt for AI Agents |
||
|
|
||
| ## Usage | ||
|
|
||
| Log2CSV reads from STDIN and, by default, converts matching lines to CSV written to STDOUT using the provided regular expression. | ||
|
|
||
| - **CSV mode :** provide `-regexp` with named capture groups; matching lines become CSV rows (header generated automatically). | ||
| - **Unmatched mode:** add `-unmatched` to print each **unique** input line that **does not** match the pattern (one per line), to STDOUT. No CSV is produced in this mode. | ||
|
|
||
| ### Example - convert UFW log to CSV | ||
|
|
||
| ```sh | ||
| log2csv -regexp '^(?P<Timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P<Hostname>\S+)\s+(?P<Facility>\S+):\s+\[\s*(?P<Kernel_Time>\d+(?:\.\d+)?)\]\s+\[(?P<Action>UFW\s+\S+)\]\s+IN=(?P<IN>\S*)\s+OUT=(?P<OUT>\S*)\s+MAC=(?P<MAC>\S*)\s+SRC=(?P<SRC>\S+)\s+DST=(?P<DST>\S+)\s+LEN=(?P<LEN>\d+)(?:(?:\s+(?:TOS=(?P<TOS>0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P<PREC>0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P<TTL>\d+)\s+)?ID=(?P<ID>\d+)(?:\s+(?P<DF>DF))?)|\s+TC=(?P<TC>\d+)\s+HOPLIMIT=(?P<HOPLIMIT>\d+)\s+FLOWLBL=(?P<FLOWLBL>[0-9A-Fa-fx]+))?\s+PROTO=(?P<PROTO>[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P<SPT>\d+))?(?:\s+(?:DPT|DP)=(?P<DPT>\d+))?(?:\s+WINDOW=(?P<WINDOW>\d+))?(?:\s+RES=(?P<RES>0x[0-9A-Fa-f]{2}))?(?:\s+(?P<TCP_Flags>(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P<URGP>\d+))?(?:\s+TYPE=(?P<ICMP_TYPE>\d+))?(?:\s+CODE=(?P<ICMP_CODE>\d+))?(?:\s+SEQ=(?P<ICMP_SEQ>\d+))?(?:\s+LEN=(?P<L4_LEN>\d+))?\s*$' < /var/log/ufw.log | ||
| ``` | ||
|
|
||
| On Windows | ||
|
|
||
| ```powershell | ||
| Get-Content C:\path\ufw.log | log2csv -regexp "^(?P<Timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P<Hostname>\S+)\s+(?P<Facility>\S+):\s+\[\s*(?P<Kernel_Time>\d+(?:\.\d+)?)\]\s+\[(?P<Action>UFW\s+\S+)\]\s+IN=(?P<IN>\S*)\s+OUT=(?P<OUT>\S*)\s+MAC=(?P<MAC>\S*)\s+SRC=(?P<SRC>\S+)\s+DST=(?P<DST>\S+)\s+LEN=(?P<LEN>\d+)(?:(?:\s+(?:TOS=(?P<TOS>0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P<PREC>0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P<TTL>\d+)\s+)?ID=(?P<ID>\d+)(?:\s+(?P<DF>DF))?)|\s+TC=(?P<TC>\d+)\s+HOPLIMIT=(?P<HOPLIMIT>\d+)\s+FLOWLBL=(?P<FLOWLBL>[0-9A-Fa-fx]+))?\s+PROTO=(?P<PROTO>[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P<SPT>\d+))?(?:\s+(?:DPT|DP)=(?P<DPT>\d+))?(?:\s+WINDOW=(?P<WINDOW>\d+))?(?:\s+RES=(?P<RES>0x[0-9A-Fa-f]{2}))?(?:\s+(?P<TCP_Flags>(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P<URGP>\d+))?(?:\s+TYPE=(?P<ICMP_TYPE>\d+))?(?:\s+CODE=(?P<ICMP_CODE>\d+))?(?:\s+SEQ=(?P<ICMP_SEQ>\d+))?(?:\s+LEN=(?P<L4_LEN>\d+))?\s*$". | ||
| ``` | ||
|
|
||
| ## Install | ||
|
|
||
| ```sh | ||
| go install github.com/b4prog/Log2CSV@latest | ||
| ``` | ||
|
|
||
|
Comment on lines
+35
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Mention minimum Go version. Improves install clarity. ## Install
@@
```sh
go install github.com/b4prog/Log2CSV@latest+Requires Go 1.25+. README.md around lines 35 to 40: the Install section calls "go install |
||
| ## Build from source | ||
|
|
||
| ```sh | ||
| task build | ||
| ./build/log2csv -help | ||
| ``` | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "bytes" | ||
| "encoding/csv" | ||
| "errors" | ||
| "flag" | ||
| "fmt" | ||
| "io" | ||
| "os" | ||
| "regexp" | ||
| "strings" | ||
| ) | ||
|
|
||
| const ( | ||
| csvSeparator = ',' | ||
| logLineSizeMax = 64 * 1024 | ||
| bufferSizeMax = 10 * 1024 * 1024 | ||
| ) | ||
|
|
||
| var ( | ||
| // ErrInvalidRegexp is returned when the provided regular expression | ||
| // cannot be compiled due to invalid syntax. | ||
| ErrInvalidRegexp = errors.New("invalid regular expression syntax") | ||
| // ErrNoNamedCaptureGroups is returned when the provided regular expression | ||
| // does not contain any named capture groups (e.g. (?P<name>...)). | ||
| ErrNoNamedCaptureGroups = errors.New("the regular expression must contain at least one named capture group") | ||
| ) | ||
|
b4prog marked this conversation as resolved.
|
||
|
|
||
| func usage() { | ||
| msg := `Usage: | ||
| log2csv -regexp '<pattern with (?P<name>...) groups>' [-unmatched] | ||
|
|
||
| Description: | ||
| Reads log lines from STDIN, extracts named capture groups using the provided regular expression, | ||
| and writes a CSV to STDOUT. | ||
|
|
||
| If -unmatched is provided, the tool instead prints the unique non-matching lines (one per line) to STDOUT. | ||
|
|
||
| Examples: | ||
| # CSV mode (default) | ||
| log2csv -regexp '^(?P<Timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P<Hostname>\S+)\s+(?P<Facility>\S+):\s+\[\s*(?P<Kernel_Time>\d+(?:\.\d+)?)\]\s+\[(?P<Action>UFW\s+\S+)\]\s+IN=(?P<IN>\S*)\s+OUT=(?P<OUT>\S*)\s+MAC=(?P<MAC>\S*)\s+SRC=(?P<SRC>\S+)\s+DST=(?P<DST>\S+)\s+LEN=(?P<LEN>\d+)(?:(?:\s+(?:TOS=(?P<TOS>0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P<PREC>0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P<TTL>\d+)\s+)?ID=(?P<ID>\d+)(?:\s+(?P<DF>DF))?)|\s+TC=(?P<TC>\d+)\s+HOPLIMIT=(?P<HOPLIMIT>\d+)\s+FLOWLBL=(?P<FLOWLBL>[0-9A-Fa-fx]+))?\s+PROTO=(?P<PROTO>[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P<SPT>\d+))?(?:\s+(?:DPT|DP)=(?P<DPT>\d+))?(?:\s+WINDOW=(?P<WINDOW>\d+))?(?:\s+RES=(?P<RES>0x[0-9A-Fa-f]{2}))?(?:\s+(?P<TCP_Flags>(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P<URGP>\d+))?(?:\s+TYPE=(?P<ICMP_TYPE>\d+))?(?:\s+CODE=(?P<ICMP_CODE>\d+))?(?:\s+SEQ=(?P<ICMP_SEQ>\d+))?(?:\s+LEN=(?P<L4_LEN>\d+))?\s*$' < /var/log/ufw.log | ||
|
|
||
| # List unique non-matching lines | ||
| log2csv -regexp '<your-regex>' -unmatched < /var/log/ufw.log | ||
| ` | ||
| fmt.Fprint(os.Stderr, msg) | ||
| } | ||
|
Comment on lines
+48
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Silence errcheck for informational prints. Main/usage can’t return errors; explicitly ignore the result. Apply: - fmt.Fprint(os.Stderr, msg)
+ _, _ = fmt.Fprint(os.Stderr, msg)
@@
- fmt.Fprintf(os.Stderr, "error: %v\n", err)
+ _, _ = fmt.Fprintf(os.Stderr, "error: %v\n", err)Also applies to: 53-54 🤖 Prompt for AI Agents |
||
|
|
||
| func main() { | ||
| if err := run(); err != nil { | ||
| fmt.Fprintf(os.Stderr, "error: %v\n", err) | ||
| os.Exit(1) | ||
| } | ||
| } | ||
|
|
||
| func run() error { | ||
| pattern := flag.String("regexp", "", "regular expression with named capture groups, e.g. '(?P<ts>...) (?P<level>...)'") | ||
| listUnmatched := flag.Bool("unmatched", false, "only list unique non-matching lines from STDIN") | ||
| flag.Usage = usage | ||
| flag.Parse() | ||
| if strings.TrimSpace(*pattern) == "" { | ||
| usage() | ||
| return fmt.Errorf("flag -regexp is required") | ||
| } | ||
| re, err := regexp.Compile(*pattern) | ||
| if err != nil { | ||
| return fmt.Errorf("%w: %v", ErrInvalidRegexp, err) | ||
| } | ||
| groupNames := extractGroupNames(re) | ||
| if len(groupNames) == 0 { | ||
| return ErrNoNamedCaptureGroups | ||
| } | ||
| out := bufio.NewWriter(os.Stdout) | ||
| err = processInput(os.Stdin, re, groupNames, out, *listUnmatched) | ||
| if flushErr := out.Flush(); err == nil && flushErr != nil { | ||
| err = flushErr | ||
| } | ||
|
Comment on lines
+71
to
+79
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Avoid per-line SubexpNames; precompute indices. Also don’t require named groups in -unmatched mode.
Apply: @@
- groupNames := extractGroupNames(re)
- if len(groupNames) == 0 {
- return ErrNoNamedCaptureGroups
- }
- out := bufio.NewWriter(os.Stdout)
- err = processInput(os.Stdin, re, groupNames, out, *listUnmatched)
+ // Precompute indices for named groups once.
+ groupIndices := namedGroupIndices(re)
+ // Only require/compute names when emitting CSV.
+ var groupNames []string
+ if !*listUnmatched {
+ groupNames = extractGroupNames(re)
+ if len(groupNames) == 0 {
+ return ErrNoNamedCaptureGroups
+ }
+ }
+ out := bufio.NewWriter(os.Stdout)
+ err = processInput(os.Stdin, re, groupIndices, groupNames, out, *listUnmatched)
@@
func extractGroupNames(re *regexp.Regexp) []string {
@@
}
+// namedGroupIndices returns the capture indices (into FindStringSubmatch)
+// for all named groups, in declaration order. Duplicates are preserved.
+func namedGroupIndices(re *regexp.Regexp) []int {
+ names := re.SubexpNames()
+ idxs := make([]int, 0, len(names))
+ for i := 1; i < len(names); i++ {
+ if names[i] != "" {
+ idxs = append(idxs, i)
+ }
+ }
+ return idxs
+}
@@
-func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, output io.Writer, listUnmatched bool) error {
+func processInput(input io.Reader, re *regexp.Regexp, groupIndices []int, groupNames []string, output io.Writer, listUnmatched bool) error {
@@
- values, ok := processLine(line, re, groupNames)
+ values, ok := processLine(line, re, groupIndices)
@@
-// Process a log line and returns CSV values + true if the line is valid, or nil + false if the line should be ignored.
-func processLine(line string, re *regexp.Regexp, groupNames []string) ([]string, bool) {
- submatches := re.FindStringSubmatch(line)
- if submatches == nil {
- return nil, false
- }
- subNames := re.SubexpNames()
- values := make([]string, 0, len(groupNames))
- allEmpty := true
- for idxSubmatch := 1; idxSubmatch < len(submatches); idxSubmatch++ {
- name := subNames[idxSubmatch]
- if name == "" {
- continue
- }
- val := submatches[idxSubmatch]
- if val != "" {
- allEmpty = false
- }
- values = append(values, val)
- }
- if allEmpty {
- return nil, false
- }
- return values, true
-}
+// Process a log line and return CSV values + true if valid, or nil + false if ignored.
+func processLine(line string, re *regexp.Regexp, groupIndices []int) ([]string, bool) {
+ submatches := re.FindStringSubmatch(line)
+ if submatches == nil {
+ return nil, false
+ }
+ values := make([]string, len(groupIndices))
+ allEmpty := true
+ for i, idx := range groupIndices {
+ v := submatches[idx]
+ if v != "" {
+ allEmpty = false
+ }
+ values[i] = v
+ }
+ if allEmpty {
+ return nil, false
+ }
+ return values, true
+}Note: This preserves duplicate header names as-is (per your stated flexibility goal). Also applies to: 83-93, 94-94, 110-110, 175-199 🤖 Prompt for AI Agents |
||
| return err | ||
| } | ||
|
|
||
| func extractGroupNames(re *regexp.Regexp) []string { | ||
| names := re.SubexpNames()[1:] | ||
| ordered := make([]string, 0, len(names)) | ||
| for _, name := range names { | ||
| if name != "" { | ||
| ordered = append(ordered, name) | ||
| } | ||
| } | ||
| return ordered | ||
| } | ||
|
|
||
| func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, output io.Writer, listUnmatched bool) error { | ||
| inputReader, lineEnding, err := peekForLineEnding(input, logLineSizeMax) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| sc := openInput(inputReader) | ||
| csvWriter := csv.NewWriter(output) | ||
| csvWriter.Comma = csvSeparator | ||
| csvWriter.UseCRLF = lineEnding == "\r\n" | ||
| firstLine := true | ||
|
Comment on lines
+94
to
+103
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Avoid creating csv.Writer in -unmatched mode. Skip allocation/flush when not needed. - csvWriter := csv.NewWriter(output)
- csvWriter.Comma = csvSeparator
- csvWriter.UseCRLF = lineEnding == "\r\n"
+ var csvWriter *csv.Writer
+ if !listUnmatched {
+ csvWriter = csv.NewWriter(output)
+ csvWriter.Comma = csvSeparator
+ csvWriter.UseCRLF = lineEnding == "\r\n"
+ }
@@
- csvWriter.Flush()
- return csvWriter.Error()
+ if csvWriter != nil {
+ csvWriter.Flush()
+ return csvWriter.Error()
+ }
+ return nilAlso applies to: 151-153 🤖 Prompt for AI Agents |
||
| ignoredLines := 0 | ||
| var firstIgnoredLine string | ||
| // Track unique non-matching lines when -unmatched is set. | ||
| seenUnmatched := make(map[string]struct{}) | ||
| for sc.Scan() { | ||
|
Comment on lines
+106
to
+108
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick (assertive) Unbounded memory in -unmatched dedupe. seenUnmatched grows with unique lines; huge logs could exhaust memory. Consider a flag to disable dedupe (-unmatched-all), a size cap, or a Bloom filter if approximate uniqueness is acceptable. Would you like a patch adding -unmatched-all and an optional -unmatched-max= cap? 🤖 Prompt for AI Agents |
||
| line := sc.Text() | ||
| values, ok := processLine(line, re, groupNames) | ||
| if !ok { | ||
| if strings.TrimSpace(line) != "" { | ||
| if listUnmatched { | ||
| if _, exists := seenUnmatched[line]; !exists { | ||
| seenUnmatched[line] = struct{}{} | ||
| // In unmatched mode, print each unique non-matching line once, preserving line endings. | ||
| if _, err := io.WriteString(output, line+lineEnding); err != nil { | ||
| return err | ||
| } | ||
| } | ||
| } else { | ||
| ignoredLines++ | ||
| if firstIgnoredLine == "" { | ||
| firstIgnoredLine = line | ||
| } | ||
| } | ||
| } | ||
| continue | ||
| } | ||
| // In unmatched mode, we skip matched lines entirely. | ||
| if listUnmatched { | ||
| continue | ||
| } | ||
| if firstLine { | ||
| firstLine = false | ||
| if err := csvWriter.Write(groupNames); err != nil { | ||
| return err | ||
| } | ||
| } | ||
| if err := csvWriter.Write(values); err != nil { | ||
| return err | ||
| } | ||
| } | ||
| // Only warn about ignored lines in CSV mode. | ||
| if !listUnmatched && ignoredLines > 0 { | ||
| fmt.Fprintf(os.Stderr, "\nwarning: %d log line(s) did not match the pattern and were ignored\nfirst ignored line: %q\n", ignoredLines, firstIgnoredLine) | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| if err := sc.Err(); err != nil { | ||
| return err | ||
| } | ||
| csvWriter.Flush() | ||
| return csvWriter.Error() | ||
| } | ||
|
|
||
| func peekForLineEnding(input io.Reader, sizeMaxPeek int) (io.Reader, string, error) { | ||
| inputBuffer := bufio.NewReader(input) | ||
| sample, err := inputBuffer.Peek(sizeMaxPeek) | ||
| if err != nil && err != io.EOF && !errors.Is(err, bufio.ErrBufferFull) { | ||
| return nil, "", err | ||
| } | ||
| if idx := bytes.IndexByte(sample, '\n'); idx >= 0 { | ||
| if idx > 0 && sample[idx-1] == '\r' { | ||
| return inputBuffer, "\r\n", nil | ||
| } | ||
| return inputBuffer, "\n", nil | ||
| } | ||
| return inputBuffer, "\n", nil | ||
| } | ||
|
b4prog marked this conversation as resolved.
|
||
|
|
||
| func openInput(input io.Reader) *bufio.Scanner { | ||
| inputScanner := bufio.NewScanner(input) | ||
| buf := make([]byte, 0, logLineSizeMax) | ||
| inputScanner.Buffer(buf, bufferSizeMax) | ||
| return inputScanner | ||
| } | ||
|
b4prog marked this conversation as resolved.
|
||
|
|
||
| // Process a log line and returns CSV values + true if the line is valid, or nil + false if the line should be ignored. | ||
| func processLine(line string, re *regexp.Regexp, groupNames []string) ([]string, bool) { | ||
| submatches := re.FindStringSubmatch(line) | ||
| if submatches == nil { | ||
| return nil, false | ||
| } | ||
| subNames := re.SubexpNames() | ||
| values := make([]string, 0, len(groupNames)) | ||
| allEmpty := true | ||
| for idxSubmatch := 1; idxSubmatch < len(submatches); idxSubmatch++ { | ||
| name := subNames[idxSubmatch] | ||
| if name == "" { | ||
| continue | ||
| } | ||
| val := submatches[idxSubmatch] | ||
| if val != "" { | ||
| allEmpty = false | ||
| } | ||
| values = append(values, val) | ||
| } | ||
| if allEmpty { | ||
| return nil, false | ||
| } | ||
| return values, true | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| module github.com/b4prog/Log2CSV | ||
|
|
||
| go 1.25 |
Uh oh!
There was an error while loading. Please reload this page.