diff --git a/pyxform/errors.py b/pyxform/errors.py index bcb118cc..66902a10 100644 --- a/pyxform/errors.py +++ b/pyxform/errors.py @@ -221,6 +221,13 @@ class ErrorCode(Enum): "or update pyxform." ), ) + HEADER_006: Detail = Detail( + name="Headers - invalid translated non-translatable column", + msg=( + "[row : 1] On the '{sheet_name}' sheet, the column name '{column}' is invalid. " + "The column '{base}' is not translatable on this sheet." + ), + ) INTERNAL_001: Detail = Detail( name="Internal error - incorrectly processed question trigger data", msg=( @@ -245,6 +252,14 @@ class ErrorCode(Enum): "Learn more: https://xlsform.org/en/#setting-up-your-worksheets" ), ) + SETTINGS_001: Detail = Detail( + name="Settings - invalid submission_url", + msg=( + "[row : 1] On the 'settings' sheet, the 'submission_url' value is invalid. " + "Submission URLs must be full HTTP or HTTPS URLs, for example " + "'https://example.com/submission'." + ), + ) NAMES_001: Detail = Detail( name="Names - invalid duplicate name in same context", msg=( diff --git a/pyxform/parsing/sheet_headers.py b/pyxform/parsing/sheet_headers.py index 8343071a..e9482aed 100644 --- a/pyxform/parsing/sheet_headers.py +++ b/pyxform/parsing/sheet_headers.py @@ -11,6 +11,38 @@ SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'} RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES)) +TRANSLATABLE_HEADER_PATHS = { + constants.SURVEY: { + (constants.LABEL,), + (constants.HINT,), + ("guidance_hint",), + ("media", "image"), + ("media", "big-image"), + ("media", "audio"), + ("media", "video"), + ("bind", "jr:constraintMsg"), + ("bind", "jr:requiredMsg"), + ("bind", "jr:noAppErrorString"), + }, + constants.CHOICES: { + (constants.LABEL,), + ("media", "image"), + ("media", "big-image"), + ("media", "audio"), + ("media", "video"), + }, + constants.SETTINGS: set(), + constants.EXTERNAL_CHOICES: set(), + constants.ENTITIES: set(), +} + +GROUPABLE_HEADER_ROOTS = { + constants.SURVEY: {"attribute", "bind", "body", "control", "instance", "media"}, + constants.CHOICES: {"media"}, + constants.SETTINGS: {"attribute"}, + constants.EXTERNAL_CHOICES: {"media"}, +} + def clean_text_values( value: str, @@ -191,6 +223,44 @@ def process_row( return out_row +def validate_translatable_header( + sheet_name: str, + header: str, + new_header: str, + tokens: tuple[str, ...], + header_columns: Container[str], +) -> None: + """ + Raise a user-facing error for translated headers on non-translatable columns. + """ + if len(tokens) < 2: + return + + recognized_column = new_header != header or tokens[0] in header_columns + if not recognized_column: + return + + translation_paths = TRANSLATABLE_HEADER_PATHS.get(sheet_name, set()) + if tokens[:-1] in translation_paths: + return + + groupable_roots = GROUPABLE_HEADER_ROOTS.get(sheet_name, set()) + if len(tokens) == 2 and tokens[0] in groupable_roots: + return + + if "::" in header: + base = header.rsplit("::", 1)[0] + else: + base = header.rsplit(":", 1)[0] + raise PyXFormError( + ErrorCode.HEADER_006.value.format( + sheet_name=sheet_name, + column=header, + base=base, + ) + ) + + def dealias_and_group_headers( sheet_name: str, sheet_data: Sequence[dict[str, str]], @@ -248,6 +318,13 @@ def dealias_and_group_headers( header_aliases=header_aliases, header_columns=header_columns, ) + validate_translatable_header( + sheet_name=sheet_name, + header=header, + new_header=new_header, + tokens=tokens, + header_columns=header_columns, + ) other_header = tokens_key.get(tokens) if other_header and new_header != header: raise PyXFormError( diff --git a/pyxform/validators/pyxform/parameters_generic.py b/pyxform/validators/pyxform/parameters_generic.py index ee5769a6..7da825fe 100644 --- a/pyxform/validators/pyxform/parameters_generic.py +++ b/pyxform/validators/pyxform/parameters_generic.py @@ -26,7 +26,9 @@ def parse(raw_parameters: str) -> PARAMETERS_TYPE: ) k, v = param.split("=")[:2] key = maybe_strip(k.lower()) - params[key] = v if key in CASE_SENSITIVE_VALUES else maybe_strip(v.lower()) + params[key] = ( + maybe_strip(v) if key in CASE_SENSITIVE_VALUES else maybe_strip(v.lower()) + ) return params diff --git a/pyxform/validators/pyxform/settings.py b/pyxform/validators/pyxform/settings.py index 2007f6eb..e4046ac4 100644 --- a/pyxform/validators/pyxform/settings.py +++ b/pyxform/validators/pyxform/settings.py @@ -1,3 +1,5 @@ +from urllib.parse import urlsplit + from pyxform import constants as co from pyxform.errors import ErrorCode, PyXFormError from pyxform.parsing.expression import is_xml_tag @@ -19,3 +21,26 @@ def validate_name(name: str | None, from_sheet: bool = True): ) else: raise PyXFormError(ErrorCode.NAMES_009.value.format(name="form_name")) + + +def validate_submission_url(submission_url: str | None): + """ + The submission_url must be a full HTTP(S) URL. + + :param submission_url: The value to check. + """ + if submission_url in {None, ""}: + return + + try: + parsed = urlsplit(submission_url) + except ValueError as err: + raise PyXFormError(ErrorCode.SETTINGS_001.value.format()) from err + + if ( + any(c.isspace() for c in submission_url) + or parsed.scheme not in {"http", "https"} + or not parsed.netloc + or parsed.hostname is None + ): + raise PyXFormError(ErrorCode.SETTINGS_001.value.format()) diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 6a0b2022..0e89f45a 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -311,6 +311,9 @@ def workbook_to_json( ) settings = settings_sheet.data[0] validate_settings.validate_name(name=settings.get(constants.NAME, None)) + validate_settings.validate_submission_url( + submission_url=settings.get(constants.SUBMISSION_URL, None) + ) else: similar = find_sheet_misspellings(key=constants.SETTINGS, keys=sheet_names) if similar is not None: diff --git a/tests/test_external_instances_for_selects.py b/tests/test_external_instances_for_selects.py index c559bd37..09d1bfc4 100644 --- a/tests/test_external_instances_for_selects.py +++ b/tests/test_external_instances_for_selects.py @@ -265,6 +265,27 @@ def test_param_value_case_preserved(self): ], ) + def test_param_value_and_label_whitespace_trimmed_case_preserved(self): + """Should trim outer spaces for value/label params while preserving case.""" + md = """ + | survey | | | | | + | | type | name | label | parameters | + | | select_one_from_file cities{ext} | city | City | value = VAL , label = lBl | + | | select_multiple_from_file suburbs{ext} | suburbs | Suburbs | value = VAL , label = lBl | + """ + for ext, xp_city, xp_subs in self.xp_test_args: + with self.subTest(msg=ext): + self.assertPyxformXform( + name="test", + md=md.format(ext=ext), + xml__xpath_match=[ + xp_city.model_external_instance_and_bind(), + xp_subs.model_external_instance_and_bind(), + xp_city.body_itemset_nodeset_and_refs(value="VAL", label="lBl"), + xp_subs.body_itemset_nodeset_and_refs(value="VAL", label="lBl"), + ], + ) + def test_expected_error_message(self): """Should get helpful error when select_from_file is missing a file extension.""" md = """ diff --git a/tests/test_settings.py b/tests/test_settings.py index c954901c..d4c96613 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -29,6 +29,28 @@ def test_form_title(self): xml__xpath_match=[xps.form_title("My Form")], ) + def test_form_title__translated__error(self): + """Should raise a clear error for translated form_title columns.""" + md = """ + | settings | + | | form_title::French (fr) | + | | Mon formulaire | + | survey | | | | + | | type | name | label | + | | text | q1 | hello | + """ + self.assertPyxformXform( + md=md, + errored=True, + error__contains=[ + ErrorCode.HEADER_006.value.format( + sheet_name=co.SETTINGS, + column="form_title::French (fr)", + base="form_title", + ) + ], + ) + def test_form_id(self): """Should find the instance id set in the XForm.""" md = """ @@ -103,6 +125,55 @@ def test_name__from_file__invalid_characters__error(self): error__contains=[ErrorCode.NAMES_009.value.format(name="form_name")], ) + def test_submission_url__http__valid(self): + """Should allow full HTTP submission URLs.""" + md = """ + | settings | + | | submission_url | + | | http://example.com/submit | + + | survey | + | | type | name | label | + | | text | q1 | hello | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + """ + /h:html/h:head/x:model/x:submission[ + @action='http://example.com/submit' + and @method='post' + ] + """ + ], + ) + + def test_submission_url__invalid__error(self): + """Should raise an error for obviously invalid submission URLs.""" + md = """ + | settings | + | | submission_url | + | | {submission_url} | + + | survey | + | | type | name | label | + | | text | q1 | hello | + """ + bad_urls = ( + "not_a_url", + "/submission", + "ftp://example.com/submission", + "https://", + "https://example .com/submission", + ) + for submission_url in bad_urls: + with self.subTest(msg=submission_url): + self.assertPyxformXform( + md=md.format(submission_url=submission_url), + errored=True, + error__contains=[ErrorCode.SETTINGS_001.value.format()], + ) + def test_clean_text_values__yes(self): """Should find clean_text_values=yes (default) collapses survey sheet whitespace.""" md = """ diff --git a/tests/test_sheet_columns.py b/tests/test_sheet_columns.py index 8f6d7ff8..c663f48b 100644 --- a/tests/test_sheet_columns.py +++ b/tests/test_sheet_columns.py @@ -143,6 +143,23 @@ def test_media_column__is_ignored(self): xml__excludes=["m.png"], ) + def test_non_translatable_column__translated__error(self): + self.assertPyxformXform( + md=""" + | survey | | | | | + | | type | name | label | appearance::French (fr) | + | | text | q1 | hello | minimal | + """, + errored=True, + error__contains=[ + ErrorCode.HEADER_006.value.format( + sheet_name=constants.SURVEY, + column="appearance::French (fr)", + base="appearance", + ) + ], + ) + def test_column_case(self): """ Ensure that column name is case insensitive