From 84ccafabbed11b438f3504fc3769044196629e5f Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 7 Sep 2025 23:51:09 +0200 Subject: [PATCH 01/15] Log2CSV initial implementation --- .coderabbit.yaml | 14 ++++ .github/workflows/ci.yaml | 102 +++++++++++++++++++++++ .gitignore | 14 +++- README.md | 44 +++++++++- cmd/log2csv/log2csv.go | 168 ++++++++++++++++++++++++++++++++++++++ go.mod | 3 + taskfile.yaml | 102 +++++++++++++++++++++++ 7 files changed, 444 insertions(+), 3 deletions(-) create mode 100644 .coderabbit.yaml create mode 100644 .github/workflows/ci.yaml create mode 100644 cmd/log2csv/log2csv.go create mode 100644 go.mod create mode 100644 taskfile.yaml diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 0000000..d976448 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,14 @@ +language: en + +reviews: + profile: assertive + high_level_summary: true + auto_review: + enabled: true + drafts: false + + tools: + golangci-lint: + enabled: true + gitleaks: + enabled: true diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..dfc4acd --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,102 @@ +name: CI + +on: + push: + pull_request: + +permissions: + contents: read + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-test: + runs-on: ubuntu-latest + + # Run from the repo root now that go.mod is at top level + defaults: + run: + shell: bash + working-directory: . + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.25.x" + check-latest: true + cache: true + cache-dependency-path: go.sum + + - name: Verify formatting (gofmt) + run: | + unformatted=$(gofmt -l .) + if [[ -n "$unformatted" ]]; then + echo "These files are not gofmt-formatted:" + echo "$unformatted" + echo "Run: gofmt -w ." + echo "Diff:" + for f in $unformatted; do + echo "---- $f" + diff -u "$f" <(gofmt "$f") || true + done + exit 1 + fi + + - name: Tidy (verify no changes) + run: | + set -euo pipefail + cp go.mod go.mod.orig + had_go_sum=0 + if [[ -f go.sum ]]; then + cp go.sum go.sum.orig + had_go_sum=1 + fi + if ! go mod tidy; then + echo "'go mod tidy' failed" + mv -f go.mod.orig go.mod + if [[ $had_go_sum -eq 1 ]]; then mv -f go.sum.orig go.sum; else rm -f go.sum || true; fi + exit 1 + fi + tidy_ok=0 + diff -u go.mod.orig go.mod || tidy_ok=1 + if [[ $had_go_sum -eq 1 ]]; then + diff -u go.sum.orig go.sum || tidy_ok=1 + else + [[ -f go.sum ]] && tidy_ok=1 + fi + mv -f go.mod.orig go.mod + if [[ $had_go_sum -eq 1 ]]; then mv -f go.sum.orig go.sum; else rm -f go.sum || true; fi + if [[ $tidy_ok -ne 0 ]]; then + echo "go.mod/go.sum would change. Run 'go mod tidy' locally and commit." + exit 1 + fi + + - name: Vet + run: go vet -mod=readonly ./... + + - name: Lint (golangci-lint) + uses: golangci/golangci-lint-action@v8 + with: + version: latest + args: --timeout=5m + install-mode: binary + + - name: Build + run: go build -mod=readonly ./... + + - name: Test (race, coverage) + run: go test -mod=readonly -race -covermode=atomic -coverprofile=coverage.out ./... + + - name: Upload coverage (artifact) + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage + path: coverage.out + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index aaadf73..4114b79 100644 --- a/.gitignore +++ b/.gitignore @@ -28,5 +28,15 @@ go.work.sum .env # Editor/IDE -# .idea/ -# .vscode/ +.idea/ +.vscode/* +!.vscode/extensions.json +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json + +# Build artifacts +/build/ + +# Task cache directory +.task/ \ No newline at end of file diff --git a/README.md b/README.md index 70abc12..c4c7923 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,44 @@ # Log2CSV -Log2CSV is a command-line tool that transforms any log into a CSV file using a regular expression. + +`Log2CSV` is a command-line tool written in Go that transforms raw log files into CSV. +It extracts structured data from logs using a regular expression with named capture groups and writes CSV to STDOUT. + +--- + +## Features + +- Reads log lines from **STDIN** and writes CSV to **STDOUT**. +- Extracts fields using **named capture groups** (`(?P...)`). +- The **CSV header row** is automatically generated from group names. +- Ignores lines that do not match the expression. +- Skips matched lines where every named group is empty. +- Preserves the input's line endings (LF/CRLF). + +## Usage + +Log2CSV reads from STDIN and converts matching lines to CSV, writing the result to STDOUT, using the provided regular expression. + +### Example - convert UFW log to CSV + +```sh +log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log +``` + +On Windows + +```powershell +Get-Content C:\path\ufw.log | log2csv -regexp "^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$". +``` + +## Install + +```sh +go install github.com/b4prog/Log2CSV@latest +``` + +## Build from source + +```sh +task build +./build/log2csv -help +``` diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go new file mode 100644 index 0000000..02a44c8 --- /dev/null +++ b/cmd/log2csv/log2csv.go @@ -0,0 +1,168 @@ +package main + +import ( + "bufio" + "bytes" + "errors" + "flag" + "fmt" + "io" + "os" + "regexp" + "strings" +) + +const ( + csvSeparator = ',' + logLineSizeMax = 64 * 1024 + bufferSizeMax = 10 * 1024 * 1024 +) + +var ( + // ErrInvalidRegexp is returned when the provided regular expression + // cannot be compiled due to invalid syntax. + ErrInvalidRegexp = errors.New("invalid regular expression syntax") + // ErrNoNamedCaptureGroups is returned when the provided regular expression + // does not contain any named capture groups (e.g. (?P...)). + ErrNoNamedCaptureGroups = errors.New("the regular expression must contain at least one named capture group") +) + +func usage() { + msg := `Usage: + log2csv -regexp '...) groups>' + +Description: + Reads log lines from STDIN, extracts named capture groups using the provided regular expression, + and writes a CSV to STDOUT. + +Examples: + cat /var/log/ufw.log | log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' +` + fmt.Fprint(os.Stderr, msg) +} + +func main() { + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} + +func run() error { + pattern := flag.String("regexp", "", "regular expression with named capture groups, e.g. '(?P...) (?P...)'") + flag.Usage = usage + flag.Parse() + if strings.TrimSpace(*pattern) == "" { + usage() + return fmt.Errorf("flag -regexp is required") + } + re, err := regexp.Compile(*pattern) + if err != nil { + return fmt.Errorf("%w: %v", ErrInvalidRegexp, err) + } + groupNames := extractGroupNames(re) + if len(groupNames) == 0 { + return ErrNoNamedCaptureGroups + } + out := bufio.NewWriter(os.Stdout) + err = processInput(os.Stdin, re, groupNames, out) + if flushErr := out.Flush(); err == nil && flushErr != nil { + err = flushErr + } + return err +} + +func extractGroupNames(re *regexp.Regexp) []string { + names := re.SubexpNames()[1:] + ordered := make([]string, 0, len(names)) + for _, name := range names { + if name != "" { + ordered = append(ordered, name) + } + } + return ordered +} + +func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, output io.Writer) error { + inputReader, lineEnding, err := peekForLineEnding(input, logLineSizeMax) + if err != nil { + return err + } + sc := openInput(inputReader) + firstLine := true + for sc.Scan() { + line := sc.Text() + values, ok := processLine(line, re, groupNames) + if !ok { + continue + } + if firstLine { + firstLine = false + if err := writeCSVRow(output, groupNames, lineEnding); err != nil { + return err + } + } + if err := writeCSVRow(output, values, lineEnding); err != nil { + return err + } + } + return sc.Err() +} + +func peekForLineEnding(input io.Reader, sizeMaxPeek int) (io.Reader, string, error) { + inputBuffer := bufio.NewReader(input) + sample, err := inputBuffer.Peek(sizeMaxPeek) + if err != nil && err != io.EOF && !errors.Is(err, bufio.ErrBufferFull) { + return nil, "", err + } + if idx := bytes.IndexByte(sample, '\n'); idx >= 0 { + if idx > 0 && sample[idx-1] == '\r' { + return inputBuffer, "\r\n", nil + } + return inputBuffer, "\n", nil + } + return inputBuffer, "\n", nil +} + +func openInput(input io.Reader) *bufio.Scanner { + inputScanner := bufio.NewScanner(input) + buf := make([]byte, 0, logLineSizeMax) + inputScanner.Buffer(buf, bufferSizeMax) + return inputScanner +} + +// Process a log line and returns CSV values + true if the line is valid, or nil + false if the line should be ignored. +func processLine(line string, re *regexp.Regexp, groupNames []string) ([]string, bool) { + submatches := re.FindStringSubmatch(line) + if submatches == nil { + return nil, false + } + subNames := re.SubexpNames() + values := make([]string, 0, len(groupNames)) + allEmpty := true + for idxSubmatch := 1; idxSubmatch < len(submatches); idxSubmatch++ { + name := subNames[idxSubmatch] + if name == "" { + continue + } + val := submatches[idxSubmatch] + if val != "" { + allEmpty = false + } + values = append(values, val) + } + if allEmpty { + return nil, false + } + return values, true +} + +func writeCSVRow(output io.Writer, values []string, lineEnding string) error { + csvValues := make([]string, len(values)) + for idxValue, value := range values { + csvValues[idxValue] = `"` + strings.ReplaceAll(value, `"`, `""`) + `"` + } + csvLine := strings.Join(csvValues, string(csvSeparator)) + lineEnding + _, err := io.WriteString(output, csvLine) + return err +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ad0cc13 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/b4prog/Log2CSV + +go 1.25 diff --git a/taskfile.yaml b/taskfile.yaml new file mode 100644 index 0000000..e212ea6 --- /dev/null +++ b/taskfile.yaml @@ -0,0 +1,102 @@ +version: "3" + +vars: + APP_NAME: Log2CSV + SRC_DIR: cmd/log2csv + BUILD_DIR: build + BIN_NAME: log2csv{{if eq .OS "windows"}}.exe{{end}} + CGO_ENABLED: "0" + LDFLAGS: "" + +tasks: + default: + desc: Build the project + cmds: + - task: build + + help: + desc: Show available tasks + silent: true + cmds: + - | + echo "Tasks:" + echo " task build # Compile to ./build/{{.BIN_NAME}}" + echo " task run -- [args] # Run the compiled binary with optional CLI args" + echo " task clean # Remove ./build" + echo " task fmt # go fmt ./..." + echo " task vet # go vet ./..." + echo " task tidy # go mod tidy" + echo " task deps # go mod download" + echo " task test # go test -race ./..." + + tidy: + desc: go mod tidy in SRC_DIR + dir: "{{.SRC_DIR}}" + cmds: + - go mod tidy + + deps: + desc: Download modules + dir: "{{.SRC_DIR}}" + cmds: + - go mod download + + fmt: + desc: go fmt + dir: "{{.SRC_DIR}}" + cmds: + - go fmt ./... + + vet: + desc: go vet + dir: "{{.SRC_DIR}}" + cmds: + - go vet ./... + + build: + desc: Build {{.APP_NAME}} to ./{{.BUILD_DIR}}/{{.BIN_NAME}} + deps: [deps, tidy, vet] + env: + CGO_ENABLED: "{{.CGO_ENABLED}}" + cmds: + - > + {{if eq .OS "windows"}} + powershell -NoProfile -Command "New-Item -ItemType Directory -Force {{.BUILD_DIR}}" + {{else}} + mkdir -p {{.BUILD_DIR}} + {{end}} + - go build -trimpath -ldflags "{{.LDFLAGS}}" -o {{.BUILD_DIR}}/{{.BIN_NAME}} ./{{.SRC_DIR}} + sources: + - "**/*.go" + - "go.mod" + - "go.sum" + generates: + - "{{.BUILD_DIR}}/{{.BIN_NAME}}" + + run: + desc: Build then run the binary (pass CLI args after --) + deps: [build] + cmds: + - > + {{if eq .OS "windows"}} + & ".\{{.BUILD_DIR}}\{{.BIN_NAME}}" {{.CLI_ARGS}} + {{else}} + "./{{.BUILD_DIR}}/{{.BIN_NAME}}" {{.CLI_ARGS}} + {{end}} + vars: + CLI_ARGS: "{{.CLI_ARGS}}" + test: + desc: Run unit tests + dir: "{{.SRC_DIR}}" + cmds: + - go test -race ./... + + clean: + desc: Remove build artifacts + cmds: + - > + {{if eq .OS "windows"}} + powershell -NoProfile -Command "if (Test-Path '{{.BUILD_DIR}}') { Remove-Item -Recurse -Force '{{.BUILD_DIR}}' }" + {{else}} + rm -rf "{{.BUILD_DIR}}" + {{end}} From 90c9ae402daac1d60db15e491897af96bf9560e1 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sun, 7 Sep 2025 23:56:09 +0200 Subject: [PATCH 02/15] improves CodeRabbit config --- .coderabbit.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index d976448..8514c98 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -12,3 +12,5 @@ reviews: enabled: true gitleaks: enabled: true + yamllint: + enabled: true From 29280dd1cd195bd42c26d3d2557e4786dfe47021 Mon Sep 17 00:00:00 2001 From: b4prog Date: Mon, 8 Sep 2025 00:26:27 +0200 Subject: [PATCH 03/15] =?UTF-8?q?Improves=20sample=20regular=20expression?= =?UTF-8?q?=20for=20UFW=20logs=20-=20IPv4=20(TOS/PREC/TTL/ID[/DF])=20or=20?= =?UTF-8?q?IPv6=20(TC/HOPLIMIT/FLOWLBL)=20through=20alternation=20?= =?UTF-8?q?=E2=80=93=20TCP/UDP/ICMP(-v6)=20with=20optional=20ports=20and?= =?UTF-8?q?=20TCP=20flags=20=E2=80=93=20the=20second=20LEN=20(L4=20length)?= =?UTF-8?q?=20present=20on=20UDP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- cmd/log2csv/log2csv.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c4c7923..0d3b270 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,13 @@ Log2CSV reads from STDIN and converts matching lines to CSV, writing the result ### Example - convert UFW log to CSV ```sh -log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log +log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log ``` On Windows ```powershell -Get-Content C:\path\ufw.log | log2csv -regexp "^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$". +Get-Content C:\path\ufw.log | log2csv -regexp "^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$". ``` ## Install diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go index 02a44c8..3951882 100644 --- a/cmd/log2csv/log2csv.go +++ b/cmd/log2csv/log2csv.go @@ -36,7 +36,7 @@ Description: and writes a CSV to STDOUT. Examples: - cat /var/log/ufw.log | log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?PROTO=(?P[A-Z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TC=(?P\d+))?(?:\s+HOPLIMIT=(?P\d+))?(?:\s+FLOWLBL=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' + log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log ` fmt.Fprint(os.Stderr, msg) } From d53666294bf3033470e5028868d0d953ffb053ac Mon Sep 17 00:00:00 2001 From: b4prog Date: Mon, 8 Sep 2025 00:47:43 +0200 Subject: [PATCH 04/15] Write CSV using 'encoding/csv' --- cmd/log2csv/log2csv.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go index 3951882..4b8eeef 100644 --- a/cmd/log2csv/log2csv.go +++ b/cmd/log2csv/log2csv.go @@ -3,6 +3,7 @@ package main import ( "bufio" "bytes" + "encoding/csv" "errors" "flag" "fmt" @@ -158,11 +159,12 @@ func processLine(line string, re *regexp.Regexp, groupNames []string) ([]string, } func writeCSVRow(output io.Writer, values []string, lineEnding string) error { - csvValues := make([]string, len(values)) - for idxValue, value := range values { - csvValues[idxValue] = `"` + strings.ReplaceAll(value, `"`, `""`) + `"` + csvWriter := csv.NewWriter(output) + csvWriter.Comma = csvSeparator + csvWriter.UseCRLF = lineEnding == "\r\n" + if err := csvWriter.Write(values); err != nil { + return err } - csvLine := strings.Join(csvValues, string(csvSeparator)) + lineEnding - _, err := io.WriteString(output, csvLine) - return err + csvWriter.Flush() + return csvWriter.Error() } From 7cc0d981145c53e49d667af935c160d71a1c9611 Mon Sep 17 00:00:00 2001 From: b4prog Date: Mon, 8 Sep 2025 02:02:03 +0200 Subject: [PATCH 05/15] Changes the behavior to mage an error when line don't match. -unmatched to list lines that do not match --- README.md | 16 +++++++++++----- cmd/log2csv/log2csv.go | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0d3b270..91b0145 100644 --- a/README.md +++ b/README.md @@ -10,24 +10,26 @@ It extracts structured data from logs using a regular expression with named capt - Reads log lines from **STDIN** and writes CSV to **STDOUT**. - Extracts fields using **named capture groups** (`(?P...)`). - The **CSV header row** is automatically generated from group names. -- Ignores lines that do not match the expression. -- Skips matched lines where every named group is empty. - Preserves the input's line endings (LF/CRLF). +- Optional **unmatched mode** (`-unmatched`) to print **unique non-matching lines** instead of CSV. ## Usage -Log2CSV reads from STDIN and converts matching lines to CSV, writing the result to STDOUT, using the provided regular expression. +Log2CSV reads from STDIN and, by default, converts matching lines to CSV written to STDOUT using the provided regular expression. + +- **CSV mode :** provide `-regexp` with named capture groups; matching lines become CSV rows (header generated automatically). +- **Unmatched mode:** add `-unmatched` to print each **unique** input line that **does not** match the pattern (one per line), to STDOUT. No CSV is produced in this mode. ### Example - convert UFW log to CSV ```sh -log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log +log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[\s*(?P\d+(?:\.\d+)?)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S*)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)(?:(?:\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)(?:\s+(?PDF))?)|\s+TC=(?P\d+)\s+HOPLIMIT=(?P\d+)\s+FLOWLBL=(?P[0-9A-Fa-fx]+))?\s+PROTO=(?P[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P\d+))?(?:\s+(?:DPT|DP)=(?P\d+))?(?:\s+WINDOW=(?P\d+))?(?:\s+RES=(?P0x[0-9A-Fa-f]{2}))?(?:\s+(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log ``` On Windows ```powershell -Get-Content C:\path\ufw.log | log2csv -regexp "^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$". +Get-Content C:\path\ufw.log | log2csv -regexp "^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[\s*(?P\d+(?:\.\d+)?)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S*)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)(?:(?:\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)(?:\s+(?PDF))?)|\s+TC=(?P\d+)\s+HOPLIMIT=(?P\d+)\s+FLOWLBL=(?P[0-9A-Fa-fx]+))?\s+PROTO=(?P[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P\d+))?(?:\s+(?:DPT|DP)=(?P\d+))?(?:\s+WINDOW=(?P\d+))?(?:\s+RES=(?P0x[0-9A-Fa-f]{2}))?(?:\s+(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$". ``` ## Install @@ -42,3 +44,7 @@ go install github.com/b4prog/Log2CSV@latest task build ./build/log2csv -help ``` + +``` + +``` diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go index 4b8eeef..aa49763 100644 --- a/cmd/log2csv/log2csv.go +++ b/cmd/log2csv/log2csv.go @@ -30,14 +30,20 @@ var ( func usage() { msg := `Usage: - log2csv -regexp '...) groups>' + log2csv -regexp '...) groups>' [-unmatched] Description: Reads log lines from STDIN, extracts named capture groups using the provided regular expression, and writes a CSV to STDOUT. + If -unmatched is provided, the tool instead prints the unique non-matching lines (one per line) to STDOUT. + Examples: - log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[(?P[\d\.]+)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S+)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)\s+(?:(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)\s+(?:(?PDF)\s+)?|(?:TC=(?P\d+)\s+)?(?:HOPLIMIT=(?P\d+)\s+)?(?:FLOWLBL=(?P[0-9A-Fa-fx]+)\s+)? )PROTO=(?P[A-Za-z0-9]+)\s+(?:(?:SPT|SP)=(?P\d+)\s+)?(?:(?:DPT|DP)=(?P\d+)\s+)?(?:WINDOW=(?P\d+)\s+)?(?:RES=(?P0x[0-9A-Fa-f]{2})\s+)?(?:(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log + # CSV mode (default) + log2csv -regexp '^(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?\+\d{2}:\d{2})\s+(?P\S+)\s+(?P\S+):\s+\[\s*(?P\d+(?:\.\d+)?)\]\s+\[(?PUFW\s+\S+)\]\s+IN=(?P\S*)\s+OUT=(?P\S*)\s+MAC=(?P\S*)\s+SRC=(?P\S+)\s+DST=(?P\S+)\s+LEN=(?P\d+)(?:(?:\s+(?:TOS=(?P0x[0-9A-Fa-f]{2})\s+)?(?:PREC=(?P0x[0-9A-Fa-f]{2})\s+)?(?:TTL=(?P\d+)\s+)?ID=(?P\d+)(?:\s+(?PDF))?)|\s+TC=(?P\d+)\s+HOPLIMIT=(?P\d+)\s+FLOWLBL=(?P[0-9A-Fa-fx]+))?\s+PROTO=(?P[A-Za-z0-9]+)(?:\s+(?:SPT|SP)=(?P\d+))?(?:\s+(?:DPT|DP)=(?P\d+))?(?:\s+WINDOW=(?P\d+))?(?:\s+RES=(?P0x[0-9A-Fa-f]{2}))?(?:\s+(?P(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE)(?:\s+(?:SYN|ACK|FIN|RST|PSH|URG|CWR|ECE))*))?(?:\s+URGP=(?P\d+))?(?:\s+TYPE=(?P\d+))?(?:\s+CODE=(?P\d+))?(?:\s+SEQ=(?P\d+))?(?:\s+LEN=(?P\d+))?\s*$' < /var/log/ufw.log + + # List unique non-matching lines + log2csv -regexp '' -unmatched < /var/log/ufw.log ` fmt.Fprint(os.Stderr, msg) } @@ -51,6 +57,7 @@ func main() { func run() error { pattern := flag.String("regexp", "", "regular expression with named capture groups, e.g. '(?P...) (?P...)'") + listUnmatched := flag.Bool("unmatched", false, "only list unique non-matching lines from STDIN") flag.Usage = usage flag.Parse() if strings.TrimSpace(*pattern) == "" { @@ -66,7 +73,7 @@ func run() error { return ErrNoNamedCaptureGroups } out := bufio.NewWriter(os.Stdout) - err = processInput(os.Stdin, re, groupNames, out) + err = processInput(os.Stdin, re, groupNames, out, *listUnmatched) if flushErr := out.Flush(); err == nil && flushErr != nil { err = flushErr } @@ -84,17 +91,39 @@ func extractGroupNames(re *regexp.Regexp) []string { return ordered } -func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, output io.Writer) error { +func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, output io.Writer, listUnmatched bool) error { inputReader, lineEnding, err := peekForLineEnding(input, logLineSizeMax) if err != nil { return err } sc := openInput(inputReader) firstLine := true + ignoredLines := 0 + var firstIgnoredLine string + // Track unique non-matching lines when -unmatched is set. + seenUnmatched := make(map[string]struct{}) for sc.Scan() { line := sc.Text() values, ok := processLine(line, re, groupNames) if !ok { + if strings.TrimSpace(line) != "" { + if listUnmatched { + if _, exists := seenUnmatched[line]; !exists { + seenUnmatched[line] = struct{}{} + // In unmatched mode, print each unique non-matching line once. + fmt.Fprintln(output, line) + } + } else { + ignoredLines++ + if firstIgnoredLine == "" { + firstIgnoredLine = line + } + } + } + continue + } + // In unmatched mode, we skip matched lines entirely. + if listUnmatched { continue } if firstLine { @@ -107,6 +136,10 @@ func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, outpu return err } } + // Only warn about ignored lines in CSV mode. + if !listUnmatched && ignoredLines > 0 { + fmt.Fprintf(os.Stderr, "\nwarning: %d log line(s) did not match the pattern and were ignored\nfirst ignored line: %q\n", ignoredLines, firstIgnoredLine) + } return sc.Err() } From d2a8fe1701acd2114c4238356e5934bb5bb0e2a4 Mon Sep 17 00:00:00 2001 From: b4prog Date: Mon, 8 Sep 2025 02:05:40 +0200 Subject: [PATCH 06/15] removes lint tha break way too often --- .github/workflows/ci.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dfc4acd..614f807 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -80,13 +80,6 @@ jobs: - name: Vet run: go vet -mod=readonly ./... - - name: Lint (golangci-lint) - uses: golangci/golangci-lint-action@v8 - with: - version: latest - args: --timeout=5m - install-mode: binary - - name: Build run: go build -mod=readonly ./... From af752e3d75b047ce590749fd661dc0654a58cc5c Mon Sep 17 00:00:00 2001 From: b4prog Date: Mon, 8 Sep 2025 06:53:00 +0200 Subject: [PATCH 07/15] Reuse a single csv.Writer; avoid per-row allocation/flush. --- cmd/log2csv/log2csv.go | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go index aa49763..1d2cdfb 100644 --- a/cmd/log2csv/log2csv.go +++ b/cmd/log2csv/log2csv.go @@ -97,6 +97,9 @@ func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, outpu return err } sc := openInput(inputReader) + csvWriter := csv.NewWriter(output) + csvWriter.Comma = csvSeparator + csvWriter.UseCRLF = lineEnding == "\r\n" firstLine := true ignoredLines := 0 var firstIgnoredLine string @@ -128,11 +131,11 @@ func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, outpu } if firstLine { firstLine = false - if err := writeCSVRow(output, groupNames, lineEnding); err != nil { + if err := csvWriter.Write(groupNames); err != nil { return err } } - if err := writeCSVRow(output, values, lineEnding); err != nil { + if err := csvWriter.Write(values); err != nil { return err } } @@ -140,7 +143,11 @@ func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, outpu if !listUnmatched && ignoredLines > 0 { fmt.Fprintf(os.Stderr, "\nwarning: %d log line(s) did not match the pattern and were ignored\nfirst ignored line: %q\n", ignoredLines, firstIgnoredLine) } - return sc.Err() + if err := sc.Err(); err != nil { + return err + } + csvWriter.Flush() + return csvWriter.Error() } func peekForLineEnding(input io.Reader, sizeMaxPeek int) (io.Reader, string, error) { @@ -190,14 +197,3 @@ func processLine(line string, re *regexp.Regexp, groupNames []string) ([]string, } return values, true } - -func writeCSVRow(output io.Writer, values []string, lineEnding string) error { - csvWriter := csv.NewWriter(output) - csvWriter.Comma = csvSeparator - csvWriter.UseCRLF = lineEnding == "\r\n" - if err := csvWriter.Write(values); err != nil { - return err - } - csvWriter.Flush() - return csvWriter.Error() -} From 9d5548edc6bd9ad0edf3b1e908eab38af333da74 Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:13:38 +0200 Subject: [PATCH 08/15] update CodeRabbit configuration --- .coderabbit.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 8514c98..7998b04 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -3,9 +3,10 @@ language: en reviews: profile: assertive high_level_summary: true + poem: false auto_review: enabled: true - drafts: false + drafts: true tools: golangci-lint: @@ -14,3 +15,5 @@ reviews: enabled: true yamllint: enabled: true + actionlint: + enabled: true From 49cdd393c78c8774e44ce094658aa9360fd4273d Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:18:06 +0200 Subject: [PATCH 09/15] Fix unchecked write and preserve CRLF in -unmatched mode. --- cmd/log2csv/log2csv.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/log2csv/log2csv.go b/cmd/log2csv/log2csv.go index 1d2cdfb..f46f941 100644 --- a/cmd/log2csv/log2csv.go +++ b/cmd/log2csv/log2csv.go @@ -113,8 +113,10 @@ func processInput(input io.Reader, re *regexp.Regexp, groupNames []string, outpu if listUnmatched { if _, exists := seenUnmatched[line]; !exists { seenUnmatched[line] = struct{}{} - // In unmatched mode, print each unique non-matching line once. - fmt.Fprintln(output, line) + // In unmatched mode, print each unique non-matching line once, preserving line endings. + if _, err := io.WriteString(output, line+lineEnding); err != nil { + return err + } } } else { ignoredLines++ From 6c379605f54f5de49a2964af58f9ff48d40850ac Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:19:01 +0200 Subject: [PATCH 10/15] fix readme.md empty quote --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 91b0145..f42ba48 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,3 @@ go install github.com/b4prog/Log2CSV@latest task build ./build/log2csv -help ``` - -``` - -``` From 93e7ca7c93002caa19adbda53b0def2103914c52 Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:21:30 +0200 Subject: [PATCH 11/15] Prevents infinite waits on stuck builds/tests. --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 614f807..73ef2a2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,6 +14,7 @@ concurrency: jobs: build-test: runs-on: ubuntu-latest + timeout-minutes: 20 # Run from the repo root now that go.mod is at top level defaults: From 8f0151585c8e1835f5424a809f4d677ae71643a8 Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:26:44 +0200 Subject: [PATCH 12/15] Avoid mutable tags; pin to immutable SHAs (update via Dependabot). --- .github/workflows/ci.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 73ef2a2..61f5907 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,10 +24,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@ - name: Set up Go - uses: actions/setup-go@v5 + uses: actions/setup-go@ with: go-version: "1.25.x" check-latest: true @@ -89,7 +89,7 @@ jobs: - name: Upload coverage (artifact) if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ with: name: coverage path: coverage.out From 0521b222b98725615450cdfab1d7ecfdce6baf5a Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:30:04 +0200 Subject: [PATCH 13/15] update CodeRabbit config --- .coderabbit.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 7998b04..2e99f3f 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -17,3 +17,9 @@ reviews: enabled: true actionlint: enabled: true + shellcheck: + enabled: true + markdownlint: + enabled: true + osvScanner: + enabled: true From 972c22b16be9ff1d27239ad7eb34d9fa4f0e81db Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:33:07 +0200 Subject: [PATCH 14/15] update CodeRabbit config: Explicitly set GitHub Checks timeout to reduce flakiness on slow runs. --- .coderabbit.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 2e99f3f..77316b3 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -23,3 +23,6 @@ reviews: enabled: true osvScanner: enabled: true + github-checks: + enabled: true + timeout_ms: 300000 From 16515dcb253d486eaa7e61820476492063d060aa Mon Sep 17 00:00:00 2001 From: b4prog Date: Tue, 9 Sep 2025 06:37:08 +0200 Subject: [PATCH 15/15] Revert "Avoid mutable tags; pin to immutable SHAs (update via Dependabot)." This reverts commit 8f0151585c8e1835f5424a809f4d677ae71643a8. --- .github/workflows/ci.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 61f5907..73ef2a2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,10 +24,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@ + uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@ + uses: actions/setup-go@v5 with: go-version: "1.25.x" check-latest: true @@ -89,7 +89,7 @@ jobs: - name: Upload coverage (artifact) if: always() - uses: actions/upload-artifact@ + uses: actions/upload-artifact@v4 with: name: coverage path: coverage.out