From b1e0b17742af55f53d97b26d491cce4b56fc05fd Mon Sep 17 00:00:00 2001 From: Andrei-Eugen IOSIFESCU Date: Tue, 31 Mar 2026 01:23:22 +0300 Subject: [PATCH] made SelectRows public + modernized syntax --- CHANGELOG.md | 2 +- README.md | 5 ++-- csv.go | 14 +++++----- csv_test.go | 6 ++--- df.go | 21 +++++++-------- df_test.go | 68 ++++++++++++++++++++++++------------------------- example_test.go | 52 ++++++++++++++++++------------------- ops.go | 62 ++++++++++++++++++++++---------------------- ops_test.go | 14 +++++----- stats.go | 30 +++++++++++----------- stats_test.go | 28 ++++++++++---------- type.go | 54 ++++++++++++++++++++++++--------------- type_test.go | 2 +- 13 files changed, 184 insertions(+), 174 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88d0e10..ca1dfd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ All notable changes to this project will be documented in this file. result := df.Filter("bad_column", "==", 1) ``` -- **`Head` and `Tail` panic on time columns** — The internal `slice()` function had no `TimeType` case, causing it to fall through to the error path on any DataFrame that contained a time column. Added the missing `case TimeType:` branch, consistent with the pattern already used in `selectRows()`. +- **`Head` and `Tail` panic on time columns** — The internal `slice()` function had no `TimeType` case, causing it to fall through to the error path on any DataFrame that contained a time column. Added the missing `case TimeType:` branch, consistent with the pattern already used in `SelectRows()`. - **GroupBy key collisions on pipe characters** — Group keys were built by joining column values with `"|"`, meaning a value containing that character (e.g. `"a|b"`) would be indistinguishable from two separate values (`"a"` and `"b"`) in a multi-column group. Keys are now length-prefixed (`"3:foo"`) and separated by a null byte, making them unambiguous for all string content. The original values are stored directly on the group struct so no parsing of the key is needed when building the result. diff --git a/README.md b/README.md index dc390ac..4da091f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ _Smooth, intelligent data processing for Go._ -Otters is a high-performance DataFrame library for Go, inspired by Pandas but designed for Go's strengths: type safety, performance, and simplicity. +Otters is a high-performance DataFrame library for Go, inspired by Pandas but designed for Go's +strengths: type safety, performance, and simplicity. [![Go Version](https://img.shields.io/badge/go-1.19+-blue.svg)](https://golang.org) [![Go Report Card](https://goreportcard.com/badge/github.com/datumbrain/otters)](https://goreportcard.com/report/github.com/datumbrain/otters) @@ -180,7 +181,7 @@ df, err := otters.ReadCSVWithOptions("data.csv", otters.CSVOptions{ }) // From data -df, err := otters.NewDataFrameFromMap(map[string]interface{}{ +df, err := otters.NewDataFrameFromMap(map[string]any{ "name": []string{"Alice", "Bob", "Carol"}, "age": []int64{25, 30, 35}, "salary": []float64{50000, 60000, 70000}, diff --git a/csv.go b/csv.go index ce3e871..6a1bc09 100644 --- a/csv.go +++ b/csv.go @@ -45,7 +45,7 @@ func ReadCSVWithOptions(filename string, options CSVOptions) (*DataFrame, error) } func skipRows(reader *csv.Reader, skipCount int, operation string) error { - for i := 0; i < skipCount; i++ { + for range skipCount { if _, err := reader.Read(); err != nil { if err == io.EOF { return nil @@ -112,7 +112,7 @@ func readCSVWithoutHeaders(reader *csv.Reader, options CSVOptions, operation str func generateHeaders(count int) []string { headers := make([]string, count) - for i := 0; i < count; i++ { + for i := range count { headers[i] = fmt.Sprintf("Column_%d", i) } return headers @@ -137,7 +137,7 @@ func readDataRows(reader *csv.Reader, headers []string, options CSVOptions, oper } rows = append(rows, row) - rowCount++ + rowCount += 1 if options.MaxRows > 0 && rowCount >= options.MaxRows { break @@ -181,7 +181,7 @@ func (df *DataFrame) WriteCSVWithOptions(filename string, options CSVOptions) er } // Write data rows - for i := 0; i < df.length; i++ { + for i := range df.length { var row []string for _, colName := range df.order { value, err := df.columns[colName].Get(i) @@ -289,7 +289,7 @@ func buildDataFrameFromRows(headers []string, rows [][]string) (*DataFrame, erro } // convertStringSliceToType converts a slice of strings to the specified type -func convertStringSliceToType(values []string, targetType ColumnType) (interface{}, error) { +func convertStringSliceToType(values []string, targetType ColumnType) (any, error) { switch targetType { case StringType: // Return a copy to avoid external modification @@ -367,7 +367,7 @@ func cleanHeader(header string) string { } // formatValueForCSV formats a value for CSV output -func formatValueForCSV(value interface{}) string { +func formatValueForCSV(value any) string { switch v := value.(type) { case string: return v @@ -479,7 +479,7 @@ func ValidateCSV(filename string) (*CSVInfo, error) { info.Rows+1, info.Columns, len(row))) } - info.Rows++ + info.Rows += 1 } return info, nil diff --git a/csv_test.go b/csv_test.go index bf3c8b7..ffbba81 100644 --- a/csv_test.go +++ b/csv_test.go @@ -99,7 +99,7 @@ Bob,30` } func TestWriteCSVEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -398,7 +398,7 @@ func TestCSV_WriteCSV_PropagatesDataFrameError(t *testing.T) { } func TestCSV_WriteCSVWithOptions_WritesFile(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []float64{1.1, 2.2, 3.3}, "col3": []bool{true, false, true}, @@ -420,7 +420,7 @@ func TestCSV_WriteCSVWithOptions_WritesFile(t *testing.T) { func TestCSV_WriteCSV_TimeColumn_WritesFile(t *testing.T) { tm := time.Date(2023, 1, 1, 12, 30, 0, 0, time.UTC) - data := map[string]interface{}{ + data := map[string]any{ "col1": []time.Time{tm, tm}, } df, _ := NewDataFrameFromMap(data) diff --git a/df.go b/df.go index 7c6bfeb..de3d5b5 100644 --- a/df.go +++ b/df.go @@ -31,7 +31,7 @@ func NewDataFrameFromSeries(series ...*Series) (*DataFrame, error) { } // NewDataFrameFromMap creates a DataFrame from a map of column data -func NewDataFrameFromMap(data map[string]interface{}) (*DataFrame, error) { +func NewDataFrameFromMap(data map[string]any) (*DataFrame, error) { if len(data) == 0 { return NewDataFrame(), nil } @@ -157,7 +157,7 @@ func (df *DataFrame) Tail(n int) *DataFrame { } // Get returns the value at the specified row and column -func (df *DataFrame) Get(row int, column string) (interface{}, error) { +func (df *DataFrame) Get(row int, column string) (any, error) { if df.err != nil { return nil, df.err } @@ -174,7 +174,7 @@ func (df *DataFrame) Get(row int, column string) (interface{}, error) { } // Set updates the value at the specified row and column -func (df *DataFrame) Set(row int, column string, value interface{}) error { +func (df *DataFrame) Set(row int, column string, value any) error { if df.err != nil { return df.err } @@ -332,12 +332,9 @@ func (df *DataFrame) String() string { sb.WriteString("\n") // Write data (show first 10 rows max for display) - maxRows := df.length - if maxRows > 10 { - maxRows = 10 - } + maxRows := min(df.length, 10) - for i := 0; i < maxRows; i++ { + for i := range maxRows { var row []string for _, colName := range df.order { value, _ := df.columns[colName].Get(i) @@ -348,7 +345,7 @@ func (df *DataFrame) String() string { } if df.length > 10 { - sb.WriteString(fmt.Sprintf("... (%d more rows)\n", df.length-10)) + fmt.Fprintf(&sb, "... (%d more rows)\n", df.length-10) } return sb.String() @@ -362,12 +359,12 @@ func (df *DataFrame) Info() string { var sb strings.Builder sb.WriteString("DataFrame Info:\n") - sb.WriteString(fmt.Sprintf(" Shape: (%d, %d)\n", df.length, len(df.columns))) + fmt.Fprintf(&sb, " Shape: (%d, %d)\n", df.length, len(df.columns)) sb.WriteString(" Columns:\n") for _, colName := range df.order { series := df.columns[colName] - sb.WriteString(fmt.Sprintf(" %s: %s\n", colName, series.Type.String())) + fmt.Fprintf(&sb, " %s: %s\n", colName, series.Type.String()) } return sb.String() @@ -394,7 +391,7 @@ func (df *DataFrame) slice(start, end int, operation string) *DataFrame { for _, colName := range df.order { series := df.columns[colName] - var newData interface{} + var newData any // Slice the appropriate data type switch series.Type { diff --git a/df_test.go b/df_test.go index a0757ea..3d5c5a4 100644 --- a/df_test.go +++ b/df_test.go @@ -8,7 +8,7 @@ import ( ) func TestDataFrame_Len(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -24,7 +24,7 @@ func TestDataFrame_Len(t *testing.T) { } func TestDataFrame_Width(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -41,7 +41,7 @@ func TestDataFrame_Width(t *testing.T) { } func TestDataFrame_String(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "name": []string{"Alice", "Bob"}, "age": []int64{25, 30}, } @@ -57,7 +57,7 @@ func TestDataFrame_String(t *testing.T) { } func TestDataFrame_Info(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "name": []string{"Alice", "Bob", "Carol"}, "age": []int64{25, 30, 35}, "salary": []float64{50000, 60000, 70000}, @@ -74,7 +74,7 @@ func TestDataFrame_Info(t *testing.T) { } func TestDataFrame_Reset(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -87,7 +87,7 @@ func TestDataFrame_Reset(t *testing.T) { } func TestDataFrame_GetEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -109,7 +109,7 @@ func TestDataFrame_GetEdgeCases(t *testing.T) { } func TestDataFrame_SetEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -136,7 +136,7 @@ func TestDataFrame_SetEdgeCases(t *testing.T) { } func TestDataFrame_HeadEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -153,7 +153,7 @@ func TestDataFrame_HeadEdgeCases(t *testing.T) { } func TestDataFrame_TailEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, } df, _ := NewDataFrameFromMap(data) @@ -179,7 +179,7 @@ func TestDataFrame_TailEdgeCases(t *testing.T) { } func TestDataFrame_GetSeriesEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -196,7 +196,7 @@ func TestDataFrame_GetSeriesEdgeCases(t *testing.T) { } func TestDataFrame_AddColumnEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -216,7 +216,7 @@ func TestDataFrame_AddColumnEdgeCases(t *testing.T) { } func TestDataFrame_DropColumnEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -237,7 +237,7 @@ func TestDataFrame_DropColumnEdgeCases(t *testing.T) { } func TestDataFrame_RenameColumnEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -263,7 +263,7 @@ func TestDataFrame_RenameColumnEdgeCases(t *testing.T) { } func TestDataFrame_GetColumnTypeEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -280,7 +280,7 @@ func TestDataFrame_GetColumnTypeEdgeCases(t *testing.T) { } func TestDataFrame_HasColumnEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -295,7 +295,7 @@ func TestDataFrame_HasColumnEdgeCases(t *testing.T) { } func TestDataFrame_CopyEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -331,7 +331,7 @@ func TestDF_Slice_AllTypes(t *testing.T) { t1 := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) t2 := time.Date(2023, 1, 2, 0, 0, 0, 0, time.UTC) t3 := time.Date(2023, 1, 3, 0, 0, 0, 0, time.UTC) - data := map[string]interface{}{ + data := map[string]any{ "s": []string{"a", "b", "c", "d"}, "i": []int64{1, 2, 3, 4}, "f": []float64{1.1, 2.2, 3.3, 4.4}, @@ -352,7 +352,7 @@ func TestDF_Slice_AllTypes(t *testing.T) { } func TestDF_Slice_InvalidRanges(t *testing.T) { - data := map[string]interface{}{"i": []int64{1, 2, 3}} + data := map[string]any{"i": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) if df.slice(-1, 1, "Slice").Error() == nil { t.Error("expected error for negative start") @@ -388,7 +388,7 @@ func TestDF_Copy_ErrorBranch(t *testing.T) { } func TestDF_Copy_DeepCopy(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, "col3": []float64{1.1, 2.2, 3.3}, @@ -406,7 +406,7 @@ func TestDF_Copy_DeepCopy(t *testing.T) { } func TestDF_String_SmallAndLarge(t *testing.T) { - df1, _ := NewDataFrameFromMap(map[string]interface{}{"col1": []int64{1, 2}}) + df1, _ := NewDataFrameFromMap(map[string]any{"col1": []int64{1, 2}}) if df1.String() == "" { t.Error("String() should return representation") } @@ -414,14 +414,14 @@ func TestDF_String_SmallAndLarge(t *testing.T) { for i := range largeData { largeData[i] = int64(i) } - df2, _ := NewDataFrameFromMap(map[string]interface{}{"col1": largeData}) + df2, _ := NewDataFrameFromMap(map[string]any{"col1": largeData}) if df2.String() == "" { t.Error("String() should return representation for large DataFrame") } } func TestDF_AddColumn_AllBranches(t *testing.T) { - data := map[string]interface{}{"existing": []int64{1, 2, 3}} + data := map[string]any{"existing": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) s1, _ := NewSeries("new", []int64{4, 5, 6}) @@ -466,17 +466,17 @@ func TestDF_NewDataFrameFromSeries_AllBranches(t *testing.T) { } func TestDF_NewDataFrameFromMap_AllBranches(t *testing.T) { - df1, err1 := NewDataFrameFromMap(map[string]interface{}{}) + df1, err1 := NewDataFrameFromMap(map[string]any{}) if err1 != nil || df1 == nil { t.Error("NewDataFrameFromMap with empty map should succeed") } - df2, err2 := NewDataFrameFromMap(map[string]interface{}{"col1": []int64{1, 2, 3}}) + df2, err2 := NewDataFrameFromMap(map[string]any{"col1": []int64{1, 2, 3}}) if err2 != nil || df2.Width() != 1 { t.Error("NewDataFrameFromMap with single column should succeed") } - df3, err3 := NewDataFrameFromMap(map[string]interface{}{ + df3, err3 := NewDataFrameFromMap(map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, }) @@ -484,7 +484,7 @@ func TestDF_NewDataFrameFromMap_AllBranches(t *testing.T) { t.Error("NewDataFrameFromMap with multiple columns should succeed") } - df4, err4 := NewDataFrameFromMap(map[string]interface{}{ + df4, err4 := NewDataFrameFromMap(map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b"}, }) @@ -494,7 +494,7 @@ func TestDF_NewDataFrameFromMap_AllBranches(t *testing.T) { } func TestDF_HasColumn_GetColumnType_AllBranches(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "str": []string{"a", "b"}, "int": []int64{1, 2}, "float": []float64{1.1, 2.2}, @@ -531,7 +531,7 @@ func TestDF_HasColumn_GetColumnType_ErrorBranch(t *testing.T) { } func TestDF_Tail_EdgeCases(t *testing.T) { - data := map[string]interface{}{"col1": []int64{1, 2, 3, 4, 5}} + data := map[string]any{"col1": []int64{1, 2, 3, 4, 5}} df, _ := NewDataFrameFromMap(data) if df.Tail(0).Len() != 0 { t.Error("Tail(0) should return 0 rows") @@ -548,7 +548,7 @@ func TestDF_Tail_EdgeCases(t *testing.T) { } func TestDF_Get_Set_EdgeCases(t *testing.T) { - data := map[string]interface{}{"col1": []int64{1, 2, 3}} + data := map[string]any{"col1": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) val, err := df.Get(0, "col1") @@ -573,7 +573,7 @@ func TestDF_Get_Set_EdgeCases(t *testing.T) { } func TestDF_GetSeries_EdgeCases(t *testing.T) { - data := map[string]interface{}{"col1": []int64{1, 2, 3}} + data := map[string]any{"col1": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) if s, err := df.GetSeries("col1"); err != nil || s == nil { t.Error("GetSeries should succeed") @@ -607,7 +607,7 @@ func TestDF_Count_ResetIndex_ErrorBranch(t *testing.T) { } func TestDF_Count_ResetIndex_SuccessBranch(t *testing.T) { - df, _ := NewDataFrameFromMap(map[string]interface{}{"col1": []int64{1, 2, 3, 4}}) + df, _ := NewDataFrameFromMap(map[string]any{"col1": []int64{1, 2, 3, 4}}) if df.Count() != 4 { t.Fatalf("expected count 4, got %d", df.Count()) } @@ -618,7 +618,7 @@ func TestDF_Count_ResetIndex_SuccessBranch(t *testing.T) { } func TestDF_ValidationFunctions(t *testing.T) { - data := map[string]interface{}{"col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}} + data := map[string]any{"col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}} df, _ := NewDataFrameFromMap(data) if err := df.validateColumnExists("col1"); err != nil { @@ -675,7 +675,7 @@ func TestDF_AddColumnWithEmptyDF(t *testing.T) { } func TestDF_DropColumnLastColumn(t *testing.T) { - data := map[string]interface{}{"col1": []int64{1, 2, 3}} + data := map[string]any{"col1": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) result := df.DropColumn("col1") if result.Error() != nil || result.Width() != 0 { @@ -684,7 +684,7 @@ func TestDF_DropColumnLastColumn(t *testing.T) { } func TestDF_RenameColumnSuccess(t *testing.T) { - data := map[string]interface{}{"old": []int64{1, 2, 3}} + data := map[string]any{"old": []int64{1, 2, 3}} df, _ := NewDataFrameFromMap(data) result := df.RenameColumn("old", "new") if result.Error() != nil || !result.HasColumn("new") || result.HasColumn("old") { diff --git a/example_test.go b/example_test.go index 589dd6a..1905d48 100644 --- a/example_test.go +++ b/example_test.go @@ -12,7 +12,7 @@ import ( // Example_basicDataFrame demonstrates basic DataFrame operations func Example_basicDataFrame() { // Create sample data - data := map[string]interface{}{ + data := map[string]any{ "name": []string{"Alice", "Bob", "Carol"}, "age": []int64{25, 30, 35}, "salary": []float64{50000, 60000, 70000}, @@ -92,7 +92,7 @@ Frank,Sales,55000,1` // DemoStatistics demonstrates statistical analysis func DemoStatistics() { // Sales data - data := map[string]interface{}{ + data := map[string]any{ "region": []string{"North", "South", "East", "West", "North", "South"}, "sales": []float64{120000, 110000, 95000, 130000, 125000, 115000}, "quarter": []int64{1, 1, 1, 1, 2, 2}, @@ -312,7 +312,7 @@ Eve,North,Phone,720,0.03,2024-01-19` // Test basic DataFrame creation and operations func TestDataFrameBasics(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "numbers": []int64{1, 2, 3, 4, 5}, "names": []string{"a", "b", "c", "d", "e"}, } @@ -436,7 +436,7 @@ func TestTimeTypeHeadTail(t *testing.T) { // character do not cause key collisions (regression for GroupBy key bug). func TestGroupByKeyCollision(t *testing.T) { // "a|b" and "a" with "b" are distinct groups but produced the same "|"-joined key. - data := map[string]interface{}{ + data := map[string]any{ "category": []string{"a|b", "a|b", "a"}, "value": []float64{1, 2, 10}, } @@ -456,7 +456,7 @@ func TestGroupByKeyCollision(t *testing.T) { } // Find the "a|b" group and verify its sum is 3, not 13. - for i := 0; i < rows; i++ { + for i := range rows { cat, _ := result.Get(i, "category") val, _ := result.Get(i, "value") if cat.(string) == "a|b" { @@ -475,7 +475,7 @@ func TestGroupByKeyCollision(t *testing.T) { // TestSetErrorDoesNotMutateCaller verifies that a failed operation does not // corrupt the original DataFrame (regression for the setError mutation bug). func TestSetErrorDoesNotMutateCaller(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "a": []int64{1, 2, 3}, } df, err := NewDataFrameFromMap(data) @@ -516,13 +516,13 @@ func TestErrorHandling(t *testing.T) { func BenchmarkDataFrameOperations(b *testing.B) { // Create test data size := 10000 - data := map[string]interface{}{ + data := map[string]any{ "id": make([]int64, size), "value": make([]float64, size), "status": make([]string, size), } - for i := 0; i < size; i++ { + for i := range size { data["id"].([]int64)[i] = int64(i) data["value"].([]float64)[i] = float64(i) * 2.5 data["status"].([]string)[i] = fmt.Sprintf("status_%d", i%10) @@ -536,25 +536,25 @@ func BenchmarkDataFrameOperations(b *testing.B) { b.ResetTimer() b.Run("Filter", func(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { _ = df.Filter("value", ">", 5000.0) } }) b.Run("Sort", func(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { _ = df.Sort("value", false) } }) b.Run("GroupBy", func(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { _, _ = df.GroupBy("status").Sum() } }) b.Run("Statistics", func(b *testing.B) { - for i := 0; i < b.N; i++ { + for range b.N { _, _ = df.Mean("value") } }) @@ -617,13 +617,13 @@ func DemoRealWorldUsage() { // TestDeterministicFromMap verifies that NewDataFrameFromMap always produces // columns in alphabetical order, regardless of map iteration order. func TestDeterministicFromMap(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "zebra": []int64{1, 2, 3}, "apple": []int64{4, 5, 6}, "mango": []int64{7, 8, 9}, } expected := []string{"apple", "mango", "zebra"} - for i := 0; i < 20; i++ { + for i := range 20 { df, err := NewDataFrameFromMap(data) if err != nil { t.Fatalf("NewDataFrameFromMap failed on iteration %d: %v", i, err) @@ -643,7 +643,7 @@ func TestDeterministicFromMap(t *testing.T) { // TestDeterministicGroupBy verifies that GroupBy produces rows in the same // order across repeated calls. func TestDeterministicGroupBy(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "category": []string{"B", "A", "C", "A", "B", "C"}, "value": []float64{10, 20, 30, 40, 50, 60}, } @@ -653,21 +653,21 @@ func TestDeterministicGroupBy(t *testing.T) { } var orders [][]string - for i := 0; i < 10; i++ { + for i := range 10 { result, err := df.GroupBy("category").Sum() if err != nil { t.Fatalf("GroupBy.Sum failed on iteration %d: %v", i, err) } rows, _ := result.Shape() order := make([]string, rows) - for r := 0; r < rows; r++ { + for r := range rows { val, _ := result.Get(r, "category") order[r] = val.(string) } orders = append(orders, order) } - for i := 1; i < len(orders); i++ { + for i := range len(orders) { for j, cat := range orders[i] { if cat != orders[0][j] { t.Errorf("non-deterministic GroupBy: iteration %d row %d = %q, want %q", @@ -680,7 +680,7 @@ func TestDeterministicGroupBy(t *testing.T) { // TestDataFrameManipulation covers Tail, Set, GetSeries, AddColumn, DropColumn, // RenameColumn, IsEmpty, and HasColumn. func TestDataFrameManipulation(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "id": []int64{1, 2, 3, 4, 5}, "name": []string{"a", "b", "c", "d", "e"}, } @@ -774,7 +774,7 @@ func TestDataFrameManipulation(t *testing.T) { // TestOpsOperations covers Drop, SortBy, Unique, Query, Where, and ResetIndex. func TestOpsOperations(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "a": []int64{3, 1, 2, 1, 3}, "b": []int64{30, 10, 20, 15, 35}, "c": []string{"x", "y", "z", "w", "v"}, @@ -851,7 +851,7 @@ func TestOpsOperations(t *testing.T) { // TestGroupByMinMax covers GroupBy.Min() and GroupBy.Max(). func TestGroupByMinMax(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "dept": []string{"Eng", "Eng", "Sales", "Sales"}, "salary": []float64{70000, 80000, 50000, 60000}, } @@ -869,7 +869,7 @@ func TestGroupByMinMax(t *testing.T) { if rows != 2 { t.Fatalf("GroupBy.Min: expected 2 groups, got %d", rows) } - for i := 0; i < rows; i++ { + for i := range rows { dept, _ := minDf.Get(i, "dept") sal, _ := minDf.Get(i, "salary") switch dept.(string) { @@ -893,7 +893,7 @@ func TestGroupByMinMax(t *testing.T) { if rows != 2 { t.Fatalf("GroupBy.Max: expected 2 groups, got %d", rows) } - for i := 0; i < rows; i++ { + for i := range rows { dept, _ := maxDf.Get(i, "dept") sal, _ := maxDf.Get(i, "salary") switch dept.(string) { @@ -911,7 +911,7 @@ func TestGroupByMinMax(t *testing.T) { // TestStringOperators covers Filter with "contains", "startswith", and "endswith". func TestStringOperators(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "name": []string{"Alice", "Bob", "Albany", "Sara"}, } df, err := NewDataFrameFromMap(data) @@ -953,7 +953,7 @@ func TestStringOperators(t *testing.T) { // TestStatsOperations covers Median, Var, Quantile, Describe, ValueCounts, // Correlation, and NumericSummary. func TestStatsOperations(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "value": []float64{10, 20, 30, 40, 50}, "category": []string{"a", "b", "a", "b", "a"}, "x": []float64{1, 2, 3, 4, 5}, @@ -1046,7 +1046,7 @@ func TestStatsOperations(t *testing.T) { // TestCSVFileOperations covers file-based CSV I/O using os.CreateTemp. func TestCSVFileOperations(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "id": []int64{1, 2, 3}, "name": []string{"Alice", "Bob", "Carol"}, "age": []int64{25, 30, 35}, diff --git a/ops.go b/ops.go index 047b940..8e0154f 100644 --- a/ops.go +++ b/ops.go @@ -9,7 +9,7 @@ import ( ) // Filter creates a new DataFrame with rows that match the condition -func (df *DataFrame) Filter(column, operator string, value interface{}) *DataFrame { +func (df *DataFrame) Filter(column, operator string, value any) *DataFrame { if df.err != nil { return df } @@ -30,11 +30,11 @@ func (df *DataFrame) Filter(column, operator string, value interface{}) *DataFra return df.setError(wrapColumnError("Filter", column, err)) } - return df.selectRows(matchingIndices, "Filter") + return df.SelectRows(matchingIndices, "Filter") } // filterIndicesTyped returns matching indices using typed slice access to avoid boxing. -func filterIndicesTyped(series *Series, operator string, value interface{}) ([]int, error) { +func filterIndicesTyped(series *Series, operator string, value any) ([]int, error) { switch series.Type { case Int64Type: return filterInt64Indices(series.Data.([]int64), operator, value) @@ -50,7 +50,7 @@ func filterIndicesTyped(series *Series, operator string, value interface{}) ([]i return nil, nil } -func filterInt64Indices(data []int64, op string, value interface{}) ([]int, error) { +func filterInt64Indices(data []int64, op string, value any) ([]int, error) { cmp, ok := toInt64(value) if !ok { return nil, newOpError("Filter", fmt.Sprintf("cannot convert %T to int64", value)) @@ -64,7 +64,7 @@ func filterInt64Indices(data []int64, op string, value interface{}) ([]int, erro return indices, nil } -func filterFloat64Indices(data []float64, op string, value interface{}) ([]int, error) { +func filterFloat64Indices(data []float64, op string, value any) ([]int, error) { cmp, ok := toFloat64(value) if !ok { return nil, newOpError("Filter", fmt.Sprintf("cannot convert %T to float64", value)) @@ -78,7 +78,7 @@ func filterFloat64Indices(data []float64, op string, value interface{}) ([]int, return indices, nil } -func filterStringIndices(data []string, op string, value interface{}) ([]int, error) { +func filterStringIndices(data []string, op string, value any) ([]int, error) { cmp, ok := value.(string) if !ok { cmp = fmt.Sprintf("%v", value) @@ -92,7 +92,7 @@ func filterStringIndices(data []string, op string, value interface{}) ([]int, er return indices, nil } -func filterBoolIndices(data []bool, op string, value interface{}) ([]int, error) { +func filterBoolIndices(data []bool, op string, value any) ([]int, error) { cmp, ok := value.(bool) if !ok { return nil, newOpError("Filter", fmt.Sprintf("cannot convert %T to bool", value)) @@ -106,7 +106,7 @@ func filterBoolIndices(data []bool, op string, value interface{}) ([]int, error) return indices, nil } -func filterTimeIndices(data []time.Time, op string, value interface{}) ([]int, error) { +func filterTimeIndices(data []time.Time, op string, value any) ([]int, error) { cmp, ok := value.(time.Time) if !ok { return nil, newOpError("Filter", fmt.Sprintf("cannot convert %T to time.Time", value)) @@ -120,7 +120,7 @@ func filterTimeIndices(data []time.Time, op string, value interface{}) ([]int, e return indices, nil } -func toInt64(v interface{}) (int64, bool) { +func toInt64(v any) (int64, bool) { switch x := v.(type) { case int64: return x, true @@ -132,7 +132,7 @@ func toInt64(v interface{}) (int64, bool) { return 0, false } -func toFloat64(v interface{}) (float64, bool) { +func toFloat64(v any) (float64, bool) { switch x := v.(type) { case float64: return x, true @@ -359,11 +359,11 @@ func (df *DataFrame) SortBy(columns []string, ascending []bool) *DataFrame { }) // Create new DataFrame with sorted rows - return df.selectRows(indices, "SortBy") + return df.SelectRows(indices, "SortBy") } // uniqueFromSeries extracts unique values from a series. -func uniqueFromSeries(series *Series) []interface{} { +func uniqueFromSeries(series *Series) []any { switch series.Type { case StringType: return uniqueStrings(series.Data.([]string)) @@ -379,9 +379,9 @@ func uniqueFromSeries(series *Series) []interface{} { return nil } -func uniqueStrings(data []string) []interface{} { +func uniqueStrings(data []string) []any { seen := make(map[string]bool, len(data)/4) - unique := make([]interface{}, 0, len(data)/4) + unique := make([]any, 0, len(data)/4) for _, v := range data { if !seen[v] { seen[v] = true @@ -391,9 +391,9 @@ func uniqueStrings(data []string) []interface{} { return unique } -func uniqueInt64(data []int64) []interface{} { +func uniqueInt64(data []int64) []any { seen := make(map[string]bool, len(data)/4) - unique := make([]interface{}, 0, len(data)/4) + unique := make([]any, 0, len(data)/4) for _, v := range data { key := strconv.FormatInt(v, 10) if !seen[key] { @@ -404,9 +404,9 @@ func uniqueInt64(data []int64) []interface{} { return unique } -func uniqueFloat64(data []float64) []interface{} { +func uniqueFloat64(data []float64) []any { seen := make(map[string]bool, len(data)/4) - unique := make([]interface{}, 0, len(data)/4) + unique := make([]any, 0, len(data)/4) for _, v := range data { key := strconv.FormatFloat(v, 'g', -1, 64) if !seen[key] { @@ -417,9 +417,9 @@ func uniqueFloat64(data []float64) []interface{} { return unique } -func uniqueBool(data []bool) []interface{} { +func uniqueBool(data []bool) []any { seen := make(map[string]bool, 2) - unique := make([]interface{}, 0, 2) + unique := make([]any, 0, 2) for _, v := range data { key := "false" if v { @@ -433,9 +433,9 @@ func uniqueBool(data []bool) []interface{} { return unique } -func uniqueTime(data []time.Time) []interface{} { +func uniqueTime(data []time.Time) []any { seen := make(map[string]bool, len(data)/4) - unique := make([]interface{}, 0, len(data)/4) + unique := make([]any, 0, len(data)/4) for _, v := range data { key := v.String() if !seen[key] { @@ -447,7 +447,7 @@ func uniqueTime(data []time.Time) []interface{} { } // Unique returns unique values from a specified column -func (df *DataFrame) Unique(column string) ([]interface{}, error) { +func (df *DataFrame) Unique(column string) ([]any, error) { if df.err != nil { return nil, df.err } @@ -479,7 +479,7 @@ func (df *DataFrame) GroupBy(columns ...string) *GroupBy { } // Where is an alias for Filter (Pandas compatibility) -func (df *DataFrame) Where(column, operator string, value interface{}) *DataFrame { +func (df *DataFrame) Where(column, operator string, value any) *DataFrame { return df.Filter(column, operator, value) } @@ -564,7 +564,7 @@ func (gb *GroupBy) Max() (*DataFrame, error) { // Internal helper methods // selectSeriesRows extracts rows at indices from a series, returning new data slice. -func selectSeriesRows(series *Series, indices []int) interface{} { +func selectSeriesRows(series *Series, indices []int) any { switch series.Type { case StringType: return selectStringRows(series.Data.([]string), indices) @@ -622,7 +622,7 @@ func selectTimeRows(data []time.Time, indices []int) []time.Time { } // emptySliceForType returns an empty slice for the given column type. -func emptySliceForType(colType ColumnType) interface{} { +func emptySliceForType(colType ColumnType) any { switch colType { case StringType: return []string{} @@ -639,8 +639,8 @@ func emptySliceForType(colType ColumnType) interface{} { } } -// selectRows creates a new DataFrame with rows at the specified indices -func (df *DataFrame) selectRows(indices []int, operation string) *DataFrame { +// SelectRows creates a new DataFrame with rows at the specified indices +func (df *DataFrame) SelectRows(indices []int, operation string) *DataFrame { if len(indices) == 0 { newDf := NewDataFrame() for _, colName := range df.order { @@ -677,7 +677,7 @@ func (df *DataFrame) selectRows(indices []int, operation string) *DataFrame { } // compareValues compares two values of the same type, returns -1, 0, or 1 -func compareValues(a, b interface{}, columnType ColumnType) int { +func compareValues(a, b any, columnType ColumnType) int { switch columnType { case StringType: return compareStrings(a.(string), b.(string)) @@ -784,7 +784,7 @@ func (gb *GroupBy) buildGroups() map[string]*groupKey { var key strings.Builder key.Grow(64) - for i := 0; i < gb.df.length; i++ { + for i := range gb.df.length { key.Reset() values := make([]string, len(gb.columns)) for j, series := range groupSeries { @@ -906,7 +906,7 @@ func buildResultDataFrame(columns []string, groupColData [][]string, numericCols } // calculateAggregation calculates aggregation for a column and indices. -// Optimized to access typed slices directly, avoiding per-row interface{} boxing. +// Optimized to access typed slices directly, avoiding per-row any boxing. func (gb *GroupBy) calculateAggregation(column string, indices []int, operation string) (float64, error) { series := gb.df.columns[column] n := len(indices) diff --git a/ops_test.go b/ops_test.go index dbdd3af..86a11aa 100644 --- a/ops_test.go +++ b/ops_test.go @@ -6,7 +6,7 @@ import ( ) func TestSelectEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, "col3": []float64{1.1, 2.2, 3.3}, @@ -25,7 +25,7 @@ func TestSelectEdgeCases(t *testing.T) { } func TestDropEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, "col2": []string{"a", "b", "c"}, } @@ -43,7 +43,7 @@ func TestDropEdgeCases(t *testing.T) { } func TestSortByEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{3, 1, 2}, "col2": []string{"c", "a", "b"}, } @@ -67,7 +67,7 @@ func TestSortByEdgeCases(t *testing.T) { } func TestUniqueEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 1, 3, 2}, } df, _ := NewDataFrameFromMap(data) @@ -82,7 +82,7 @@ func TestUniqueEdgeCases(t *testing.T) { } func TestQueryEdgeCases(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "age": []int64{25, 30, 35, 40}, } df, _ := NewDataFrameFromMap(data) @@ -224,7 +224,7 @@ func TestFilterIndicesTypedEdgeCases(t *testing.T) { } func TestGroupBy_MeanCount(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "category": []string{"A", "B", "A", "B"}, "value": []int64{10, 20, 30, 40}, } @@ -415,7 +415,7 @@ func TestMatchTime(t *testing.T) { func TestEmptySliceForType(t *testing.T) { tests := []struct { ct ColumnType - want interface{} + want any }{ {StringType, []string{}}, {Int64Type, []int64{}}, diff --git a/stats.go b/stats.go index 27012b5..af4a368 100644 --- a/stats.go +++ b/stats.go @@ -33,7 +33,7 @@ func (df *DataFrame) Sum(column string) (float64, error) { } sum := 0.0 - for i := 0; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return 0, wrapColumnError("Sum", column, err) @@ -69,7 +69,7 @@ func (df *DataFrame) Mean(column string) (float64, error) { } // Min finds the minimum value in a numeric column -func (df *DataFrame) Min(column string) (interface{}, error) { +func (df *DataFrame) Min(column string) (any, error) { if df.err != nil { return nil, df.err } @@ -93,7 +93,7 @@ func (df *DataFrame) Min(column string) (interface{}, error) { } min := convertToFloat64(firstValue) - for i := 1; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return nil, wrapColumnError("Min", column, err) @@ -113,7 +113,7 @@ func (df *DataFrame) Min(column string) (interface{}, error) { } // Max finds the maximum value in a numeric column -func (df *DataFrame) Max(column string) (interface{}, error) { +func (df *DataFrame) Max(column string) (any, error) { if df.err != nil { return nil, df.err } @@ -137,7 +137,7 @@ func (df *DataFrame) Max(column string) (interface{}, error) { } max := convertToFloat64(firstValue) - for i := 1; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return nil, wrapColumnError("Max", column, err) @@ -183,7 +183,7 @@ func (df *DataFrame) Std(column string) (float64, error) { // Calculate variance variance := 0.0 - for i := 0; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return 0, wrapColumnError("Std", column, err) @@ -228,7 +228,7 @@ func (df *DataFrame) Median(column string) (float64, error) { // Extract and sort values values := make([]float64, series.Length) - for i := 0; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return 0, wrapColumnError("Median", column, err) @@ -272,7 +272,7 @@ func (df *DataFrame) Quantile(column string, q float64) (float64, error) { // Extract and sort values values := make([]float64, series.Length) - for i := 0; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return 0, wrapColumnError("Quantile", column, err) @@ -320,7 +320,7 @@ func (df *DataFrame) Describe() (*DataFrame, error) { stats := []string{"count", "mean", "std", "min", "25%", "50%", "75%", "max"} // Create result data - resultData := make(map[string]interface{}) + resultData := make(map[string]any) resultData["statistic"] = stats // Calculate statistics for each numeric column @@ -399,14 +399,14 @@ func (df *DataFrame) ValueCounts(column string) (*DataFrame, error) { counts := make(map[string]int) // Count occurrences - for i := 0; i < series.Length; i++ { + for i := range series.Length { value, err := series.Get(i) if err != nil { return nil, wrapColumnError("ValueCounts", column, err) } key := fmt.Sprintf("%v", value) - counts[key]++ + counts[key] += 1 } // Create result DataFrame @@ -433,7 +433,7 @@ func (df *DataFrame) ValueCounts(column string) (*DataFrame, error) { frequencies = append(frequencies, int64(pair.count)) } - resultData := map[string]interface{}{ + resultData := map[string]any{ column: values, "count": frequencies, } @@ -462,7 +462,7 @@ func (df *DataFrame) Correlation() (*DataFrame, error) { // Calculate correlation matrix n := len(numericColumns) - resultData := make(map[string]interface{}) + resultData := make(map[string]any) resultData["column"] = numericColumns for _, col1 := range numericColumns { @@ -485,7 +485,7 @@ func (df *DataFrame) Correlation() (*DataFrame, error) { // Helper functions // convertToFloat64 converts numeric values to float64 -func convertToFloat64(value interface{}) float64 { +func convertToFloat64(value any) float64 { switch v := value.(type) { case int64: return float64(v) @@ -525,7 +525,7 @@ func (df *DataFrame) calculateCorrelation(col1, col2 string) (float64, error) { // Calculate correlation var numerator, sumSq1, sumSq2 float64 - for i := 0; i < series1.Length; i++ { + for i := range series1.Length { val1, err := series1.Get(i) if err != nil { return 0, err diff --git a/stats_test.go b/stats_test.go index 09e4f47..17c603f 100644 --- a/stats_test.go +++ b/stats_test.go @@ -6,7 +6,7 @@ import ( ) func TestDataFrame_Count(t *testing.T) { - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, "col2": []float64{1.1, 2.2, 3.3, 4.4, 5.5}, } @@ -49,7 +49,7 @@ func TestDataFrame_SumEdgeCases(t *testing.T) { t.Error("Sum() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "text": []string{"a", "b", "c"}, } df, _ := NewDataFrameFromMap(data) @@ -66,7 +66,7 @@ func TestDataFrame_MeanEdgeCases(t *testing.T) { t.Error("Mean() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -86,7 +86,7 @@ func TestDataFrame_MinEdgeCases(t *testing.T) { t.Error("Min() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []float64{5.5, 2.2, 8.8, 1.1}, } df, _ := NewDataFrameFromMap(data) @@ -106,7 +106,7 @@ func TestDataFrame_MaxEdgeCases(t *testing.T) { t.Error("Max() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []float64{5.5, 2.2, 8.8, 1.1}, } df, _ := NewDataFrameFromMap(data) @@ -126,7 +126,7 @@ func TestDataFrame_StdEdgeCases(t *testing.T) { t.Error("Std() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3}, } df, _ := NewDataFrameFromMap(data) @@ -143,7 +143,7 @@ func TestDataFrame_VarEdgeCases(t *testing.T) { t.Error("Var() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []float64{1.0, 2.0, 3.0, 4.0, 5.0}, } df, _ := NewDataFrameFromMap(data) @@ -160,7 +160,7 @@ func TestDataFrame_MedianEdgeCases(t *testing.T) { t.Error("Median() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, } df, _ := NewDataFrameFromMap(data) @@ -180,7 +180,7 @@ func TestDataFrame_QuantileEdgeCases(t *testing.T) { t.Error("Quantile() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, } df, _ := NewDataFrameFromMap(data) @@ -208,7 +208,7 @@ func TestDataFrame_DescribeEdgeCases(t *testing.T) { t.Error("Describe() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, "col2": []float64{1.1, 2.2, 3.3, 4.4, 5.5}, } @@ -230,7 +230,7 @@ func TestDataFrame_ValueCountsEdgeCases(t *testing.T) { t.Error("ValueCounts() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []string{"a", "b", "a", "c", "b", "a"}, } df, _ := NewDataFrameFromMap(data) @@ -256,7 +256,7 @@ func TestDataFrame_CorrelationEdgeCases(t *testing.T) { t.Error("Correlation() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "text": []string{"a", "b", "c"}, } df, _ := NewDataFrameFromMap(data) @@ -265,7 +265,7 @@ func TestDataFrame_CorrelationEdgeCases(t *testing.T) { t.Error("Correlation() should error when no numeric columns") } - data2 := map[string]interface{}{ + data2 := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, "col2": []float64{2.0, 4.0, 6.0, 8.0, 10.0}, } @@ -283,7 +283,7 @@ func TestDataFrame_NumericSummaryEdgeCases(t *testing.T) { t.Error("NumericSummary() should error on empty DataFrame") } - data := map[string]interface{}{ + data := map[string]any{ "col1": []int64{1, 2, 3, 4, 5}, } df, _ := NewDataFrameFromMap(data) diff --git a/type.go b/type.go index 9ee599c..5125241 100644 --- a/type.go +++ b/type.go @@ -38,14 +38,14 @@ func (ct ColumnType) String() string { // Series represents a single column of data with a specific type type Series struct { - Name string // Column name - Type ColumnType // Data type - Data interface{} // Actual data: []string, []int64, []float64, []bool, []time.Time - Length int // Number of elements + Name string // Column name + Type ColumnType // Data type + Data any // Actual data: []string, []int64, []float64, []bool, []time.Time + Length int // Number of elements } // NewSeries creates a new Series with the given name and data -func NewSeries(name string, data interface{}) (*Series, error) { +func NewSeries(name string, data any) (*Series, error) { s := &Series{ Name: name, Data: data, @@ -79,7 +79,7 @@ func NewSeries(name string, data interface{}) (*Series, error) { } // Get returns the value at the specified index -func (s *Series) Get(index int) (interface{}, error) { +func (s *Series) Get(index int) (any, error) { if index < 0 || index >= s.Length { return nil, &OtterError{ Op: "Series.Get", @@ -112,12 +112,16 @@ func (s *Series) Get(index int) (interface{}, error) { // Returns 0 and error if index is out of range or type mismatch. func (s *Series) GetInt64(index int) (int64, error) { if index < 0 || index >= s.Length { - return 0, &OtterError{Op: "Series.GetInt64", Column: s.Name, - Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length)} + return 0, &OtterError{ + Op: "Series.GetInt64", Column: s.Name, + Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length), + } } if s.Type != Int64Type { - return 0, &OtterError{Op: "Series.GetInt64", Column: s.Name, - Message: fmt.Sprintf("type mismatch: expected int64, got %s", s.Type)} + return 0, &OtterError{ + Op: "Series.GetInt64", Column: s.Name, + Message: fmt.Sprintf("type mismatch: expected int64, got %s", s.Type), + } } return s.Data.([]int64)[index], nil } @@ -125,12 +129,16 @@ func (s *Series) GetInt64(index int) (int64, error) { // GetFloat64 returns the float64 value at the specified index without boxing. func (s *Series) GetFloat64(index int) (float64, error) { if index < 0 || index >= s.Length { - return 0, &OtterError{Op: "Series.GetFloat64", Column: s.Name, - Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length)} + return 0, &OtterError{ + Op: "Series.GetFloat64", Column: s.Name, + Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length), + } } if s.Type != Float64Type { - return 0, &OtterError{Op: "Series.GetFloat64", Column: s.Name, - Message: fmt.Sprintf("type mismatch: expected float64, got %s", s.Type)} + return 0, &OtterError{ + Op: "Series.GetFloat64", Column: s.Name, + Message: fmt.Sprintf("type mismatch: expected float64, got %s", s.Type), + } } return s.Data.([]float64)[index], nil } @@ -138,12 +146,16 @@ func (s *Series) GetFloat64(index int) (float64, error) { // GetString returns the string value at the specified index without boxing. func (s *Series) GetString(index int) (string, error) { if index < 0 || index >= s.Length { - return "", &OtterError{Op: "Series.GetString", Column: s.Name, - Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length)} + return "", &OtterError{ + Op: "Series.GetString", Column: s.Name, + Message: fmt.Sprintf("index %d out of range [0:%d]", index, s.Length), + } } if s.Type != StringType { - return "", &OtterError{Op: "Series.GetString", Column: s.Name, - Message: fmt.Sprintf("type mismatch: expected string, got %s", s.Type)} + return "", &OtterError{ + Op: "Series.GetString", Column: s.Name, + Message: fmt.Sprintf("type mismatch: expected string, got %s", s.Type), + } } return s.Data.([]string)[index], nil } @@ -174,7 +186,7 @@ func (s *Series) StringSlice() []string { } // Set updates the value at the specified index -func (s *Series) Set(index int, value interface{}) error { +func (s *Series) Set(index int, value any) error { if index < 0 || index >= s.Length { return &OtterError{ Op: "Series.Set", @@ -378,7 +390,7 @@ func isTimeValue(value string) bool { } // ConvertValue converts a string value to the specified type -func ConvertValue(value string, targetType ColumnType) (interface{}, error) { +func ConvertValue(value string, targetType ColumnType) (any, error) { value = strings.TrimSpace(value) if value == "" { @@ -404,7 +416,7 @@ func ConvertValue(value string, targetType ColumnType) (interface{}, error) { } } -func getZeroValue(targetType ColumnType) interface{} { +func getZeroValue(targetType ColumnType) any { switch targetType { case StringType: return "" diff --git a/type_test.go b/type_test.go index 234474b..1584459 100644 --- a/type_test.go +++ b/type_test.go @@ -115,7 +115,7 @@ func TestSeries_StringSlice(t *testing.T) { func TestGetZeroValue(t *testing.T) { tests := []struct { ct ColumnType - want interface{} + want any }{ {StringType, ""}, {Int64Type, int64(0)},