Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Fixed
:meth:`imod.mf6.Recharge.from_imod5_cap_data`,
:meth:`imod.mf6.LayeredWell.from_imod5_cap_data` now regrids the iMOD5 CAP
data to the MODFLOW6 target discretization.
- Fixed confusing warning about inconsistent IPF columns when loading GEN files.

Changed
~~~~~~~
Expand Down
30 changes: 30 additions & 0 deletions imod/formats/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import csv


def infer_delimwhitespace(line: str, ncol: int):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method does 2 things. From the name it isn't clear that it also does a column check.
You could make the name more generic and return a namedtuple.

The method signature than also contains the return type which makes it clearer what it does

class DelimiterInfo(NamedTuple):
    is_whitespace: bool
    has_expected_cols: bool


def infer_delimiter_info(line: str, ncol: int) -> DelimiterInfo:
    """
    Infer whether the line is delimited by whitespace or commas, and whether
    the number of comma-delimited columns matches the expected count.

    Parameters
    ----------
    line : str
        The line to analyze.
    ncol : int
        The expected number of columns if line is delimited by commas.

    Returns
    -------
    DelimiterInfo
        is_whitespace : bool
            Whether the line is delimited by whitespace.
        has_expected_cols : bool
            Whether the line has the expected number of columns if delimited by commas.
    """
    n_elem = len(next(csv.reader([line])))
    if n_elem == 1:
        return DelimiterInfo(is_whitespace=True, has_expected_cols=True)
    elif n_elem == ncol:
        return DelimiterInfo(is_whitespace=False, has_expected_cols=True)
    else:
        return DelimiterInfo(is_whitespace=False, has_expected_cols=False)

On the calling side you can then use the named tuple:

delimiter_info = infer_delimiter_info(line, ncol)`
delimiter_info.is_whitespace
delimiter_info.has_expected_cols

or directly unpack it:
has_whitespace, has_expected_cols = infer_delimiter_info(line, ncol)

Copy link
Copy Markdown
Collaborator

@Manangka Manangka May 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

infer_line_delimiter_info could also be a good name with the named tuple then being LineDelimiterInfo

"""
Infer whether the line is delimited by whitespace or commas, based on the
number of columns. Also returns whether the line has the amount of expected
columns if delimited by commas.

Parameters
----------
line : str
The line to analyze.
ncol : int
The expected number of columns if line delimited by commas.

Returns
-------
has_whitespace : bool
Whether the line is delimited by whitespace.
has_expected_cols : bool
Whether the line has the expected number of columns if delimited by commas.
"""
n_elem = len(next(csv.reader([line])))
if n_elem == 1:
return True, True
elif n_elem == ncol:
return False, True
else:
return False, False
4 changes: 2 additions & 2 deletions imod/formats/gen/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from scipy.io import FortranFile, FortranFormattingError

from imod.formats.ipf import _infer_delimwhitespace
from imod.formats.common import infer_delimwhitespace
from imod.util.imports import MissingOptionalModule

try:
Expand Down Expand Up @@ -52,7 +52,7 @@ def parse_ascii_segments(lines: List[str]):
indices = np.repeat(np.arange(n_feature), n_vertex)

first_coord = features[0][1:][0]
has_whitespace = _infer_delimwhitespace(first_coord, 2)
has_whitespace, _ = infer_delimwhitespace(first_coord, 3)
sep = r"\s+" if has_whitespace else ","

vertex_coords = []
Expand Down
21 changes: 12 additions & 9 deletions imod/formats/ipf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,23 @@
import pandas as pd

import imod
from imod.formats.common import infer_delimwhitespace
from imod.logging import LogLevel
from imod.util.time import to_pandas_datetime_series


def _infer_delimwhitespace(line, ncol):
n_elem = len(next(csv.reader([line])))
if n_elem == 1:
return True
elif n_elem == ncol:
return False
else:
warnings.warn(
f"Inconsistent IPF: header states {ncol} columns, first line contains {n_elem}"
infer_whitespace, has_expected_cols = infer_delimwhitespace(line, ncol)

if not has_expected_cols:
log_message = f"Inconsistent IPF: header states {ncol} columns, first line contains {len(line.split())} whitespace-delimited columns and {len(next(csv.reader([line])))} comma-delimited columns."
imod.logging.logger.log(
loglevel=LogLevel.WARNING,
message=log_message,
additional_depth=2,
)
return False
warnings.warn(log_message)
return infer_whitespace


def _read_ipf(path, kwargs=None) -> Tuple[pd.DataFrame, int, str]:
Expand Down
10 changes: 10 additions & 0 deletions imod/tests/test_formats/test_format_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from imod.formats.common import infer_delimwhitespace


def test_infer_delimwhitespace():
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test assert many things.
Consider using test cases to make it clearer what you're testing

@pytest.mark.parametrize(
    "line, ncol, expected",
    [
        ("1 2 3", 3, (True, True)),    # whitespace-delimited, correct ncol
        ("1 2 3", 2, (True, True)),    # whitespace-delimited, wrong ncol: ncol is ignored
        ("1\t2\t3", 3, (True, True)),  # tab-delimited (csv.reader sees 1 element -> whitespace branch)
        ("1,2,3", 3, (False, True)),   # comma-delimited, correct cols
        ("1, 2, 3", 3, (False, True)), # comma-delimited with spaces, correct cols
        ("1,2,3", 4, (False, False)),  # comma-delimited, wrong number of cols
        ("1 2,3", 3, (False, False)),  # mixed delimiters, unexpected col count
    ],
)
def test_infer_delimiter_info(line, ncol, expected):
    assert infer_delimiter_info(line, ncol) == expected

assert infer_delimwhitespace("1 2 3", 3) == (True, True)
assert infer_delimwhitespace("1,2,3", 3) == (False, True)
assert infer_delimwhitespace("1,2,3", 4) == (False, False)
assert infer_delimwhitespace("1, 2, 3", 3) == (False, True)
assert infer_delimwhitespace("1\t2\t3", 3) == (True, True)
assert infer_delimwhitespace("1 2,3", 3) == (False, False)
Loading