From 705cc277882eb954d15096c3c11323dfffccc3eb Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sat, 27 Sep 2025 14:21:25 +0200
Subject: [PATCH 1/9] Introduce optional default_column_type parameter to
 complement column_types logic;

---
 cpp/src/arrow/csv/options.cc         |  1 +
 cpp/src/arrow/csv/options.h          |  2 ++
 cpp/src/arrow/csv/reader.cc          | 11 +++++--
 python/pyarrow/_csv.pyx              | 47 ++++++++++++++++++++++------
 python/pyarrow/includes/libarrow.pxd |  1 +
 5 files changed, 50 insertions(+), 12 deletions(-)
diff --git a/cpp/src/arrow/csv/options.cc b/cpp/src/arrow/csv/options.cc
index 365b5646b66..52daa9c5fc6 100644
--- a/cpp/src/arrow/csv/options.cc
+++ b/cpp/src/arrow/csv/options.cc
@@ -43,6 +43,7 @@ ConvertOptions ConvertOptions::Defaults() {
                          "NULL", "NaN",  "n/a",      "nan",     "null"};
   options.true_values = {"1", "True", "TRUE", "true"};
   options.false_values = {"0", "False", "FALSE", "false"};
+  options.default_column_type = nullptr;
   return options;
 }
 
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 10e55bf838c..839550c3f0c 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -76,6 +76,8 @@ struct ARROW_EXPORT ConvertOptions {
   bool check_utf8 = true;
   /// Optional per-column types (disabling type inference on those columns)
   std::unordered_map<std::string, std::shared_ptr<DataType>> column_types;
+  /// Default type to use for columns not in `column_types`
+  std::shared_ptr<DataType> default_column_type;
   /// Recognized spellings for null values
   std::vector<std::string> null_values;
   /// Recognized spellings for boolean true values
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 3c4e7e3da0c..4767626ae6c 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -674,8 +674,15 @@ class ReaderMixin {
       // Does the named column have a fixed type?
       auto it = convert_options_.column_types.find(col_name);
       if (it == convert_options_.column_types.end()) {
-        conversion_schema_.columns.push_back(
-            ConversionSchema::InferredColumn(std::move(col_name), col_index));
+        // If not explicitly typed, respect default_column_type when provided
+        if (convert_options_.default_column_type != nullptr) {
+          conversion_schema_.columns.push_back(ConversionSchema::TypedColumn(
+              std::move(col_name), col_index, convert_options_.default_column_type));
+        }
+        else {
+          conversion_schema_.columns.push_back(
+              ConversionSchema::InferredColumn(std::move(col_name), col_index));
+        }
       } else {
         conversion_schema_.columns.push_back(
             ConversionSchema::TypedColumn(std::move(col_name), col_index, it->second));
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index ed9d20beb6b..939fb7067a9 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -613,6 +613,9 @@ cdef class ConvertOptions(_Weakrefable):
     column_types : pyarrow.Schema or dict, optional
         Explicitly map column names to column types. Passing this argument
         disables type inference on the defined columns.
+    default_column_type : pyarrow.DataType, optional
+        Explicitly map columns not specified in column_types to a default type.
+        Passing this argument disables type inference on all columns.
     null_values : list, optional
         A sequence of strings that denote nulls in the data
         (defaults are appropriate in most cases). Note that by default,
@@ -816,7 +819,7 @@ cdef class ConvertOptions(_Weakrefable):
         self.options.reset(
             new CCSVConvertOptions(CCSVConvertOptions.Defaults()))
 
-    def __init__(self, *, check_utf8=None, column_types=None, null_values=None,
+    def __init__(self, *, check_utf8=None, column_types=None, default_column_type=None, null_values=None,
                  true_values=None, false_values=None, decimal_point=None,
                  strings_can_be_null=None, quoted_strings_can_be_null=None,
                  include_columns=None, include_missing_columns=None,
@@ -826,6 +829,8 @@ cdef class ConvertOptions(_Weakrefable):
             self.check_utf8 = check_utf8
         if column_types is not None:
             self.column_types = column_types
+        if default_column_type is not None:
+            self.default_column_type = default_column_type
         if null_values is not None:
             self.null_values = null_values
         if true_values is not None:
@@ -910,6 +915,27 @@ cdef class ConvertOptions(_Weakrefable):
             assert typ != NULL
             deref(self.options).column_types[tobytes(k)] = typ
 
+    @property
+    def default_column_type(self):
+        """
+        Explicitly map columns not specified in column_types to a default type.
+        """
+        if deref(self.options).default_column_type != NULL:
+            return pyarrow_wrap_data_type(deref(self.options).default_column_type)
+        else:
+            return None
+
+    @default_column_type.setter
+    def default_column_type(self, value):
+        cdef:
+            shared_ptr[CDataType] typ
+        if value is not None:
+            typ = pyarrow_unwrap_data_type(ensure_type(value))
+            assert typ != NULL
+            deref(self.options).default_column_type = typ
+        else:
+            deref(self.options).default_column_type.reset()
+
     @property
     def null_values(self):
         """
@@ -1071,6 +1097,7 @@ cdef class ConvertOptions(_Weakrefable):
         return (
             self.check_utf8 == other.check_utf8 and
             self.column_types == other.column_types and
+            self.default_column_type == other.default_column_type and
             self.null_values == other.null_values and
             self.true_values == other.true_values and
             self.false_values == other.false_values and
@@ -1087,17 +1114,17 @@ cdef class ConvertOptions(_Weakrefable):
         )
 
     def __getstate__(self):
-        return (self.check_utf8, self.column_types, self.null_values,
-                self.true_values, self.false_values, self.decimal_point,
-                self.timestamp_parsers, self.strings_can_be_null,
-                self.quoted_strings_can_be_null, self.auto_dict_encode,
-                self.auto_dict_max_cardinality, self.include_columns,
-                self.include_missing_columns)
+        return (self.check_utf8, self.column_types, self.default_column_type,
+                self.null_values, self.true_values, self.false_values,
+                self.decimal_point, self.timestamp_parsers,
+                self.strings_can_be_null, self.quoted_strings_can_be_null,
+                self.auto_dict_encode, self.auto_dict_max_cardinality,
+                self.include_columns, self.include_missing_columns)
 
     def __setstate__(self, state):
-        (self.check_utf8, self.column_types, self.null_values,
-         self.true_values, self.false_values, self.decimal_point,
-         self.timestamp_parsers, self.strings_can_be_null,
+        (self.check_utf8, self.column_types, self.default_column_type,
+         self.null_values, self.true_values, self.false_values,
+         self.decimal_point, self.timestamp_parsers, self.strings_can_be_null,
          self.quoted_strings_can_be_null, self.auto_dict_encode,
          self.auto_dict_max_cardinality, self.include_columns,
          self.include_missing_columns) = state
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index f294ee4d50b..fa479391211 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2104,6 +2104,7 @@ cdef extern from "arrow/csv/api.h" namespace "arrow::csv" nogil:
     cdef cppclass CCSVConvertOptions" arrow::csv::ConvertOptions":
         c_bool check_utf8
         unordered_map[c_string, shared_ptr[CDataType]] column_types
+        shared_ptr[CDataType] default_column_type
         vector[c_string] null_values
         vector[c_string] true_values
         vector[c_string] false_values

From a6d7113d772cc0ebc201eb31a880327cf78add68 Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sat, 27 Sep 2025 17:29:27 +0200
Subject: [PATCH 2/9] Implemented tests to cover default column type

---
 cpp/src/arrow/csv/reader_test.cc | 133 +++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 57cc7d8efa5..4035bf88b29 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -488,5 +488,138 @@ TEST(CountRowsAsync, Errors) {
                               internal::GetCpuThreadPool(), read_options, parse_options));
 }
 
+TEST(ReaderTests, DefaultColumnTypePartialDefault) {
+  // Input with header; force all unspecified columns to string, but override only `id` to int64
+  auto table_buffer = std::make_shared<Buffer>(
+      "id,name,value,date\n"
+      "0000101,apple,0003.1400,2024-01-15\n"
+      "00102,banana,001.6180,2024-02-20\n"
+      "0003,cherry,02.71800,2024-03-25\n");
+
+  auto input = std::make_shared<io::BufferReader>(table_buffer);
+  auto read_options = ReadOptions::Defaults();
+  auto parse_options = ParseOptions::Defaults();
+  auto convert_options = ConvertOptions::Defaults();
+  convert_options.column_types["id"] = int64();
+  convert_options.default_column_type = utf8();
+
+  ASSERT_OK_AND_ASSIGN(
+      auto reader,
+      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
+                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
+
+  auto expected_schema =
+      schema({
+        field("id", int64()),
+        field("name", utf8()),
+        field("value", utf8()),
+        field("date", utf8())
+      });
+  AssertSchemaEqual(expected_schema, table->schema());
+
+  auto expected_table = TableFromJSON(
+      expected_schema,
+      {R"([{"id":101, "name":"apple",  "value":"0003.1400", "date":"2024-01-15"},
+            {"id":102, "name":"banana", "value":"001.6180", "date":"2024-02-20"},
+            {"id":3,   "name":"cherry", "value":"02.71800", "date":"2024-03-25"}])"});
+  ASSERT_TRUE(table->Equals(*expected_table));
+}
+
+TEST(ReaderTests, DefaultColumnTypeAllStringsWithHeader) {
+  // Input with header; default all columns to strings
+  auto table_buffer = std::make_shared<Buffer>(
+      "Record_Type|ID|Code|Quantity_1|Quantity_2|Amount_1|Amount_2|Amount_3|Flag|Note|Total_Amount\n"
+      "AB|000388907|abc|0150|012|000045.67|000000.10|000001.25|Y|noteA|000045.6700\n");
+
+  auto input = std::make_shared<io::BufferReader>(table_buffer);
+  auto read_options = ReadOptions::Defaults();
+  auto parse_options = ParseOptions::Defaults();
+  parse_options.delimiter = '|';
+  auto convert_options = ConvertOptions::Defaults();
+  convert_options.default_column_type = utf8();
+
+  ASSERT_OK_AND_ASSIGN(
+      auto reader,
+      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
+                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
+
+  auto expected_schema = schema({
+    field("Record_Type", utf8()),
+    field("ID", utf8()),
+    field("Code", utf8()),
+    field("Quantity_1", utf8()),
+    field("Quantity_2", utf8()),
+    field("Amount_1", utf8()),
+    field("Amount_2", utf8()),
+    field("Amount_3", utf8()),
+    field("Flag", utf8()),
+    field("Note", utf8()),
+    field("Total_Amount", utf8())});
+  AssertSchemaEqual(expected_schema, table->schema());
+
+  auto expected_table = TableFromJSON(
+      expected_schema,
+      {R"([{
+        "Record_Type":"AB",
+        "ID":"000388907",
+        "Code":"abc",
+        "Quantity_1":"0150",
+        "Quantity_2":"012",
+        "Amount_1":"000045.67",
+        "Amount_2":"000000.10",
+        "Amount_3":"000001.25",
+        "Flag":"Y",
+        "Note":"noteA",
+        "Total_Amount":"000045.6700"
+      }])"});
+  ASSERT_TRUE(table->Equals(*expected_table));
+}
+
+TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
+  // Input without header; autogenerate column names and default all to strings
+  auto table_buffer = std::make_shared<Buffer>(
+      "AB|000388907|abc|0150|012|000045.67|000000.10|000001.25|Y|noteA|000045.6700\n");
+
+  auto input = std::make_shared<io::BufferReader>(table_buffer);
+  auto read_options = ReadOptions::Defaults();
+  read_options.autogenerate_column_names = true;  // treat first row as data
+  auto parse_options = ParseOptions::Defaults();
+  parse_options.delimiter = '|';
+  auto convert_options = ConvertOptions::Defaults();
+  convert_options.default_column_type = utf8();
+
+  ASSERT_OK_AND_ASSIGN(
+      auto reader,
+      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
+                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
+
+  auto expected_schema = schema({
+      field("f0", utf8()),  field("f1", utf8()),  field("f2", utf8()),
+      field("f3", utf8()),  field("f4", utf8()),  field("f5", utf8()),
+      field("f6", utf8()),  field("f7", utf8()),  field("f8", utf8()),
+      field("f9", utf8()),  field("f10", utf8())});
+  AssertSchemaEqual(expected_schema, table->schema());
+
+  auto expected_table = TableFromJSON(
+      expected_schema,
+      {R"([{
+        "f0":"AB",
+        "f1":"000388907",
+        "f2":"abc",
+        "f3":"0150",
+        "f4":"012",
+        "f5":"000045.67",
+        "f6":"000000.10",
+        "f7":"000001.25",
+        "f8":"Y",
+        "f9":"noteA",
+        "f10":"000045.6700"
+      }])"});
+  ASSERT_TRUE(table->Equals(*expected_table));
+}
+
 }  // namespace csv
 }  // namespace arrow

From 2fb1c376f230398a6c2ce30f8a5f732a2886c3fe Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sat, 27 Sep 2025 18:26:35 +0200
Subject: [PATCH 3/9] extend test_convert_options test - include.
 default_column_type

---
 python/pyarrow/tests/test_csv.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index f510c6dbe23..8429a653cc0 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -297,7 +297,8 @@ def test_convert_options(pickle_module):
         include_columns=['def', 'abc'],
         include_missing_columns=False,
         auto_dict_encode=True,
-        timestamp_parsers=[ISO8601, '%y-%m'])
+        timestamp_parsers=[ISO8601, '%y-%m'],
+        default_column_type=pa.int16())
 
     with pytest.raises(ValueError):
         opts.decimal_point = '..'
@@ -325,6 +326,17 @@ def test_convert_options(pickle_module):
     with pytest.raises(TypeError):
         opts.column_types = 0
 
+    assert opts.default_column_type is None
+    opts.default_column_type = pa.string()
+    assert opts.default_column_type == pa.string()
+    opts.default_column_type = 'int32'
+    assert opts.default_column_type == pa.int32()
+    opts.default_column_type = None
+    assert opts.default_column_type is None
+
+    with pytest.raises(TypeError, match='DataType expected'):
+        opts.default_column_type = 123
+
     assert isinstance(opts.null_values, list)
     assert '' in opts.null_values
     assert 'N/A' in opts.null_values

From 7f518c9a9a01747b6e6d5e0778b8a504a4054a9f Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sat, 27 Sep 2025 18:35:55 +0200
Subject: [PATCH 4/9] test how default_column_type affects schema

---
 python/pyarrow/tests/test_csv.py | 51 ++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 8429a653cc0..a4840bcb9f2 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -1343,6 +1343,57 @@ def test_column_types_with_column_names(self):
             'y': ['b', 'd', 'f'],
         }
 
+    def test_default_column_type(self):
+        rows = b"a,b,c,d\n001,2.5,hello,true\n4,3.14,world,false\n"
+
+        # Test with default_column_type only - all columns should use the specified type.
+        opts = ConvertOptions(default_column_type=pa.string())
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.string()),
+                            ('b', pa.string()),
+                            ('c', pa.string()),
+                            ('d', pa.string())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': ["001", "4"],
+            'b': ["2.5", "3.14"],
+            'c': ["hello", "world"],
+            'd': ["true", "false"],
+        }
+
+        # Test with both column_types and default_column_type
+        # Columns specified in column_types should override default_column_type
+        opts = ConvertOptions(
+            column_types={'b': pa.float64(), 'd': pa.bool_()},
+            default_column_type=pa.string()
+        )
+        table = self.read_bytes(rows, convert_options=opts)
+        schema = pa.schema([('a', pa.string()),
+                            ('b', pa.float64()),
+                            ('c', pa.string()),
+                            ('d', pa.bool_())])
+        assert table.schema == schema
+        assert table.to_pydict() == {
+            'a': ["001", "4"],
+            'b': [2.5, 3.14],
+            'c': ["hello", "world"],
+            'd': [True, False],
+        }
+
+        # Test that default_column_type disables type inference
+        opts_no_default = ConvertOptions(column_types={'b': pa.float64()})
+        table_no_default = self.read_bytes(rows, convert_options=opts_no_default)
+
+        opts_with_default = ConvertOptions(
+            column_types={'b': pa.float64()},
+            default_column_type=pa.string()
+        )
+        table_with_default = self.read_bytes(rows, convert_options=opts_with_default)
+
+        # Column 'a' should be int64 without default, string with default
+        assert table_no_default.schema.field('a').type == pa.int64()
+        assert table_with_default.schema.field('a').type == pa.string()
+
     def test_no_ending_newline(self):
         # No \n after last line
         rows = b"a,b,c\n1,2,3\n4,5,6"

From bc234dde23435987f887d225edccb04c38645084 Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sat, 27 Sep 2025 21:24:16 +0200
Subject: [PATCH 5/9] docs: add examples for default_column_type; add missing
 reference

---
 docs/source/python/csv.rst |  1 +
 python/pyarrow/_csv.pyx    | 53 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/docs/source/python/csv.rst b/docs/source/python/csv.rst
index 5eb68e9ccdc..27b740cdfd7 100644
--- a/docs/source/python/csv.rst
+++ b/docs/source/python/csv.rst
@@ -136,6 +136,7 @@ Available convert options are:
 
   ~ConvertOptions.check_utf8
   ~ConvertOptions.column_types
+  ~ConvertOptions.default_column_type
   ~ConvertOptions.null_values
   ~ConvertOptions.true_values
   ~ConvertOptions.false_values
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 939fb7067a9..ef84078f134 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -810,6 +810,59 @@ cdef class ConvertOptions(_Weakrefable):
     fast: bool
     ----
     fast: [[true,true,false,false,null]]
+
+    Set a default column type for all columns (disables type inference):
+
+    >>> convert_options = csv.ConvertOptions(default_column_type=pa.string())
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: string
+    entry: string
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [["2","4","5","100","6"]]
+    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
+    fast: [["Yes","Yes","No","No",""]]
+
+    Combine default_column_type with column_types (specific column types override default):
+
+    >>> convert_options = csv.ConvertOptions(
+    ...                   column_types={"n_legs": pa.int64(), "fast": pa.bool_()},
+    ...                   default_column_type=pa.string(),
+    ...                   true_values=["Yes"],
+    ...                   false_values=["No"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: int64
+    entry: string
+    fast: bool
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [[2,4,5,100,6]]
+    entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
+    fast: [[true,true,false,false,null]]
+
+    Use default_column_type with selective column_types for mixed type conversion:
+
+    >>> convert_options = csv.ConvertOptions(
+    ...                   column_types={"animals": pa.string(),
+    ...                                 "entry": pa.timestamp('s')},
+    ...                   default_column_type=pa.string(),
+    ...                   timestamp_parsers=["%m/%d/%Y"])
+    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
+    pyarrow.Table
+    animals: string
+    n_legs: string
+    entry: timestamp[s]
+    fast: string
+    ----
+    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
+    n_legs: [["2","4","5","100","6"]]
+    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
+    fast: [["Yes","Yes","No","No",""]]
     """
 
     # Avoid mistakingly creating attributes

From 94584c103c947497225307548e24a793a31f2fda Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Thu, 18 Dec 2025 22:22:16 +0100
Subject: [PATCH 6/9] docs: remove redundant example for default_column_type
 usage

---
 python/pyarrow/_csv.pyx | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index ef84078f134..b878266ef8b 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -844,25 +844,6 @@ cdef class ConvertOptions(_Weakrefable):
     n_legs: [[2,4,5,100,6]]
     entry: [["01/03/2022","02/03/2022","03/03/2022","04/03/2022","05/03/2022"]]
     fast: [[true,true,false,false,null]]
-
-    Use default_column_type with selective column_types for mixed type conversion:
-
-    >>> convert_options = csv.ConvertOptions(
-    ...                   column_types={"animals": pa.string(),
-    ...                                 "entry": pa.timestamp('s')},
-    ...                   default_column_type=pa.string(),
-    ...                   timestamp_parsers=["%m/%d/%Y"])
-    >>> csv.read_csv(io.BytesIO(s.encode()), convert_options=convert_options)
-    pyarrow.Table
-    animals: string
-    n_legs: string
-    entry: timestamp[s]
-    fast: string
-    ----
-    animals: [["Flamingo","Horse","Brittle stars","Centipede",""]]
-    n_legs: [["2","4","5","100","6"]]
-    entry: [[2022-01-03 00:00:00,2022-02-03 00:00:00,2022-03-03 00:00:00,2022-04-03 00:00:00,2022-05-03 00:00:00]]
-    fast: [["Yes","Yes","No","No",""]]
     """
 
     # Avoid mistakingly creating attributes

From c5546266db0b29c88eb34d8d8dd8bfa559ca0acf Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sun, 21 Dec 2025 14:30:01 +0100
Subject: [PATCH 7/9] column builder: guard test to ensure default column type
 won't override inference.

---
 cpp/src/arrow/csv/column_builder_test.cc | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/cpp/src/arrow/csv/column_builder_test.cc b/cpp/src/arrow/csv/column_builder_test.cc
index cb178c1d2b3..14e485fec51 100644
--- a/cpp/src/arrow/csv/column_builder_test.cc
+++ b/cpp/src/arrow/csv/column_builder_test.cc
@@ -342,6 +342,25 @@ TEST_F(InferringColumnBuilderTest, SingleChunkInteger) {
                 {ArrayFromJSON(int64(), "[null, 123, 456]")});
 }
 
+TEST_F(InferringColumnBuilderTest, SingleChunkDefaultColumnTypeDoesNotOverrideInference) {
+  auto options = ConvertOptions::Defaults();
+  options.default_column_type = utf8();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"0000404", "0000505", "0000606"}}, options,
+                {ArrayFromJSON(int64(), "[404, 505, 606]")});
+}
+
+TEST_F(InferringColumnBuilderTest, MultipleChunkDefaultColumnTypeDoesNotOverrideInference) {
+  auto options = ConvertOptions::Defaults();
+  options.default_column_type = utf8();
+  auto tg = TaskGroup::MakeSerial();
+
+  CheckInferred(tg, {{"0000404"}, {"0000505", "0000606"}}, options,
+                {ArrayFromJSON(int64(), "[404]"),
+                 ArrayFromJSON(int64(), "[505, 606]")});
+}
+
 TEST_F(InferringColumnBuilderTest, MultipleChunkInteger) {
   auto options = ConvertOptions::Defaults();
   auto tg = TaskGroup::MakeSerial();

From 815520e4fd716dd51ecc273993070702d15e1b01 Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Sun, 21 Dec 2025 15:33:30 +0100
Subject: [PATCH 8/9] simplify reader tests to focus on default_column_type
 behavior

---
 cpp/src/arrow/csv/reader_test.cc | 58 +++++++-------------------------
 1 file changed, 12 insertions(+), 46 deletions(-)

diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index 4035bf88b29..eaf47d1b390 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -489,7 +489,6 @@ TEST(CountRowsAsync, Errors) {
 }
 
 TEST(ReaderTests, DefaultColumnTypePartialDefault) {
-  // Input with header; force all unspecified columns to string, but override only `id` to int64
   auto table_buffer = std::make_shared<Buffer>(
       "id,name,value,date\n"
       "0000101,apple,0003.1400,2024-01-15\n"
@@ -526,16 +525,15 @@ TEST(ReaderTests, DefaultColumnTypePartialDefault) {
   ASSERT_TRUE(table->Equals(*expected_table));
 }
 
-TEST(ReaderTests, DefaultColumnTypeAllStringsWithHeader) {
-  // Input with header; default all columns to strings
+TEST(ReaderTests, DefaultColumnTypeForcesTypedColumns) {
   auto table_buffer = std::make_shared<Buffer>(
-      "Record_Type|ID|Code|Quantity_1|Quantity_2|Amount_1|Amount_2|Amount_3|Flag|Note|Total_Amount\n"
-      "AB|000388907|abc|0150|012|000045.67|000000.10|000001.25|Y|noteA|000045.6700\n");
+      "id,amount,code\n"
+      "0000404,000045.6700,001\n"
+      "0000505,000000.10,010\n");
 
   auto input = std::make_shared<io::BufferReader>(table_buffer);
   auto read_options = ReadOptions::Defaults();
   auto parse_options = ParseOptions::Defaults();
-  parse_options.delimiter = '|';
   auto convert_options = ConvertOptions::Defaults();
   convert_options.default_column_type = utf8();
 
@@ -545,42 +543,21 @@ TEST(ReaderTests, DefaultColumnTypeAllStringsWithHeader) {
                         convert_options));
   ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
 
-  auto expected_schema = schema({
-    field("Record_Type", utf8()),
-    field("ID", utf8()),
-    field("Code", utf8()),
-    field("Quantity_1", utf8()),
-    field("Quantity_2", utf8()),
-    field("Amount_1", utf8()),
-    field("Amount_2", utf8()),
-    field("Amount_3", utf8()),
-    field("Flag", utf8()),
-    field("Note", utf8()),
-    field("Total_Amount", utf8())});
+  auto expected_schema = schema(
+      {field("id", utf8()), field("amount", utf8()), field("code", utf8())});
   AssertSchemaEqual(expected_schema, table->schema());
 
   auto expected_table = TableFromJSON(
       expected_schema,
-      {R"([{
-        "Record_Type":"AB",
-        "ID":"000388907",
-        "Code":"abc",
-        "Quantity_1":"0150",
-        "Quantity_2":"012",
-        "Amount_1":"000045.67",
-        "Amount_2":"000000.10",
-        "Amount_3":"000001.25",
-        "Flag":"Y",
-        "Note":"noteA",
-        "Total_Amount":"000045.6700"
-      }])"});
+      {R"([{"id":"0000404", "amount":"000045.6700", "code":"001"},
+            {"id":"0000505", "amount":"000000.10", "code":"010"}])"});
   ASSERT_TRUE(table->Equals(*expected_table));
 }
 
 TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
   // Input without header; autogenerate column names and default all to strings
   auto table_buffer = std::make_shared<Buffer>(
-      "AB|000388907|abc|0150|012|000045.67|000000.10|000001.25|Y|noteA|000045.6700\n");
+      "AB|000388907|000045.6700\n");
 
   auto input = std::make_shared<io::BufferReader>(table_buffer);
   auto read_options = ReadOptions::Defaults();
@@ -596,11 +573,8 @@ TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
                         convert_options));
   ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
 
-  auto expected_schema = schema({
-      field("f0", utf8()),  field("f1", utf8()),  field("f2", utf8()),
-      field("f3", utf8()),  field("f4", utf8()),  field("f5", utf8()),
-      field("f6", utf8()),  field("f7", utf8()),  field("f8", utf8()),
-      field("f9", utf8()),  field("f10", utf8())});
+  auto expected_schema =
+      schema({field("f0", utf8()), field("f1", utf8()), field("f2", utf8())});
   AssertSchemaEqual(expected_schema, table->schema());
 
   auto expected_table = TableFromJSON(
@@ -608,15 +582,7 @@ TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
       {R"([{
         "f0":"AB",
         "f1":"000388907",
-        "f2":"abc",
-        "f3":"0150",
-        "f4":"012",
-        "f5":"000045.67",
-        "f6":"000000.10",
-        "f7":"000001.25",
-        "f8":"Y",
-        "f9":"noteA",
-        "f10":"000045.6700"
+        "f2":"000045.6700"
       }])"});
   ASSERT_TRUE(table->Equals(*expected_table));
 }

From 1d1b24a3ed36c36800c1ed6cfeddd8542291c02a Mon Sep 17 00:00:00 2001
From: Vlad Borovtsov <borovcov.vlad@gmail.com>
Date: Tue, 23 Dec 2025 17:20:06 +0100
Subject: [PATCH 9/9] fix codestyle

---
 cpp/src/arrow/csv/column_builder_test.cc |  6 ++--
 cpp/src/arrow/csv/reader.cc              |  3 +-
 cpp/src/arrow/csv/reader_test.cc         | 44 +++++++++---------------
 3 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/cpp/src/arrow/csv/column_builder_test.cc b/cpp/src/arrow/csv/column_builder_test.cc
index 14e485fec51..04c9cfe2482 100644
--- a/cpp/src/arrow/csv/column_builder_test.cc
+++ b/cpp/src/arrow/csv/column_builder_test.cc
@@ -351,14 +351,14 @@ TEST_F(InferringColumnBuilderTest, SingleChunkDefaultColumnTypeDoesNotOverrideIn
                 {ArrayFromJSON(int64(), "[404, 505, 606]")});
 }
 
-TEST_F(InferringColumnBuilderTest, MultipleChunkDefaultColumnTypeDoesNotOverrideInference) {
+TEST_F(InferringColumnBuilderTest,
+       MultipleChunkDefaultColumnTypeDoesNotOverrideInference) {
   auto options = ConvertOptions::Defaults();
   options.default_column_type = utf8();
   auto tg = TaskGroup::MakeSerial();
 
   CheckInferred(tg, {{"0000404"}, {"0000505", "0000606"}}, options,
-                {ArrayFromJSON(int64(), "[404]"),
-                 ArrayFromJSON(int64(), "[505, 606]")});
+                {ArrayFromJSON(int64(), "[404]"), ArrayFromJSON(int64(), "[505, 606]")});
 }
 
 TEST_F(InferringColumnBuilderTest, MultipleChunkInteger) {
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 4767626ae6c..b6412673ebf 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -678,8 +678,7 @@ class ReaderMixin {
         if (convert_options_.default_column_type != nullptr) {
           conversion_schema_.columns.push_back(ConversionSchema::TypedColumn(
               std::move(col_name), col_index, convert_options_.default_column_type));
-        }
-        else {
+        } else {
           conversion_schema_.columns.push_back(
               ConversionSchema::InferredColumn(std::move(col_name), col_index));
         }
diff --git a/cpp/src/arrow/csv/reader_test.cc b/cpp/src/arrow/csv/reader_test.cc
index eaf47d1b390..deb5c6dfbd5 100644
--- a/cpp/src/arrow/csv/reader_test.cc
+++ b/cpp/src/arrow/csv/reader_test.cc
@@ -502,19 +502,13 @@ TEST(ReaderTests, DefaultColumnTypePartialDefault) {
   convert_options.column_types["id"] = int64();
   convert_options.default_column_type = utf8();
 
-  ASSERT_OK_AND_ASSIGN(
-      auto reader,
-      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
-                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto reader,
+                       TableReader::Make(io::default_io_context(), input, read_options,
+                                         parse_options, convert_options));
   ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
 
-  auto expected_schema =
-      schema({
-        field("id", int64()),
-        field("name", utf8()),
-        field("value", utf8()),
-        field("date", utf8())
-      });
+  auto expected_schema = schema({field("id", int64()), field("name", utf8()),
+                                 field("value", utf8()), field("date", utf8())});
   AssertSchemaEqual(expected_schema, table->schema());
 
   auto expected_table = TableFromJSON(
@@ -537,27 +531,24 @@ TEST(ReaderTests, DefaultColumnTypeForcesTypedColumns) {
   auto convert_options = ConvertOptions::Defaults();
   convert_options.default_column_type = utf8();
 
-  ASSERT_OK_AND_ASSIGN(
-      auto reader,
-      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
-                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto reader,
+                       TableReader::Make(io::default_io_context(), input, read_options,
+                                         parse_options, convert_options));
   ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
 
-  auto expected_schema = schema(
-      {field("id", utf8()), field("amount", utf8()), field("code", utf8())});
+  auto expected_schema =
+      schema({field("id", utf8()), field("amount", utf8()), field("code", utf8())});
   AssertSchemaEqual(expected_schema, table->schema());
 
   auto expected_table = TableFromJSON(
-      expected_schema,
-      {R"([{"id":"0000404", "amount":"000045.6700", "code":"001"},
+      expected_schema, {R"([{"id":"0000404", "amount":"000045.6700", "code":"001"},
             {"id":"0000505", "amount":"000000.10", "code":"010"}])"});
   ASSERT_TRUE(table->Equals(*expected_table));
 }
 
 TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
   // Input without header; autogenerate column names and default all to strings
-  auto table_buffer = std::make_shared<Buffer>(
-      "AB|000388907|000045.6700\n");
+  auto table_buffer = std::make_shared<Buffer>("AB|000388907|000045.6700\n");
 
   auto input = std::make_shared<io::BufferReader>(table_buffer);
   auto read_options = ReadOptions::Defaults();
@@ -567,19 +558,16 @@ TEST(ReaderTests, DefaultColumnTypeAllStringsNoHeader) {
   auto convert_options = ConvertOptions::Defaults();
   convert_options.default_column_type = utf8();
 
-  ASSERT_OK_AND_ASSIGN(
-      auto reader,
-      TableReader::Make(io::default_io_context(), input, read_options, parse_options,
-                        convert_options));
+  ASSERT_OK_AND_ASSIGN(auto reader,
+                       TableReader::Make(io::default_io_context(), input, read_options,
+                                         parse_options, convert_options));
   ASSERT_OK_AND_ASSIGN(auto table, reader->Read());
 
   auto expected_schema =
       schema({field("f0", utf8()), field("f1", utf8()), field("f2", utf8())});
   AssertSchemaEqual(expected_schema, table->schema());
 
-  auto expected_table = TableFromJSON(
-      expected_schema,
-      {R"([{
+  auto expected_table = TableFromJSON(expected_schema, {R"([{
         "f0":"AB",
         "f1":"000388907",
         "f2":"000045.6700"