diff --git a/test/diffy_tests.erl b/test/diffy_tests.erl index 42065a4..755281a 100644 --- a/test/diffy_tests.erl +++ b/test/diffy_tests.erl @@ -320,6 +320,98 @@ diff_test() -> <<"cat mouse dog ">>)), ok. +compute_diff_substring_match_test() -> + %% Exercise the {Start, Length} branch of compute_diff/3 where + %% binary:match(LongText, ShortText) succeeds — i.e. the short text + %% is a verbatim substring of the long text. + + %% "test" found inside "a-test-b": no common prefix ('t' /= 'a') and no + %% common suffix ('t' /= 'b'), so split_pre_and_suffix leaves both texts + %% unchanged. compute_diff sees ShortText = <<"test">>, LongText = + %% <<"a-test-b">>, binary:match finds "test" at byte 2, producing: + %% [{insert, <<"a-">>}, {equal, <<"test">>}, {insert, <<"-b">>}] + ?assertEqual([{insert, <<"a-">>}, {equal, <<"test">>}, {insert, <<"-b">>}], + diffy:diff(<<"test">>, <<"a-test-b">>)), + + %% Reversed direction: "a-test-b" vs "test". + ?assertEqual([{delete, <<"a-">>}, {equal, <<"test">>}, {delete, <<"-b">>}], + diffy:diff(<<"a-test-b">>, <<"test">>)), + + %% "barfoo" vs "foo": split_pre_and_suffix strips "foo" as common suffix, + %% compute_diff sees <<"bar">> vs <<>>, yielding [{delete, <<"bar">>}]. + %% Combined with suffix: [{delete, <<"bar">>}, {equal, <<"foo">>}]. + ?assertEqual([{delete, <<"bar">>}, {equal, <<"foo">>}], + diffy:diff(<<"barfoo">>, <<"foo">>)), + + %% "prefoo" vs "foo": no common prefix ('p' /= 'f'), common suffix "foo" + %% stripped. compute_diff sees <<"pre">> vs <<>>. + ?assertEqual([{delete, <<"pre">>}, {equal, <<"foo">>}], + diffy:diff(<<"prefoo">>, <<"foo">>)), + + ok. + +diff_non_ascii_prefix_test() -> + %% Verify that diff/2 handles non-ASCII characters correctly when they + %% precede an ASCII common suffix. + %% + %% diff(<<"a">>, <<Ā/utf8, "a">>): + %% split_pre_and_suffix finds no common prefix (first bytes differ: + %% 97 vs 196), but "a" is a common suffix. After stripping the suffix + %% compute_diff sees <<>> vs <<196,128>> (Ā in UTF-8). + %% Result: [{insert, <<Ā/utf8>>}, {equal, <<"a">>}]. + ?assertEqual( + [{insert, <<$\x{100}/utf8>>}, {equal, <<"a">>}], + diffy:diff(<<"a">>, <<$\x{100}/utf8, "a">>)), + + %% Longer variant: two Ā codepoints precede "ab". + %% Common suffix "ab" stripped; compute_diff sees <<>> vs <<Ā/utf8, Ā/utf8>>. + ?assertEqual( + [{insert, <<$\x{100}/utf8, $\x{100}/utf8>>}, {equal, <<"ab">>}], + diffy:diff(<<"ab">>, <<$\x{100}/utf8, $\x{100}/utf8, "ab">>)), + + %% Non-ASCII: U+0100 (Ā) before "test". No common prefix (196 /= 116), + %% common suffix "test" stripped; compute_diff sees <<>> vs <<Ā/utf8>>. + ?assertEqual([{insert, <<$\x{100}/utf8>>}, {equal, <<"test">>}], + diffy:diff(<<"test">>, <<$\x{100}/utf8, "test">>)), + + ok. + +compute_diff_test() -> + %% Branch 1: OldText is empty -> pure insert + ?assertEqual([{insert, <<"hello">>}], diffy:diff(<<>>, <<"hello">>)), + + %% Branch 2: NewText is empty -> pure delete + ?assertEqual([{delete, <<"hello">>}], diffy:diff(<<"hello">>, <<>>)), + + %% Branch 3: ShortText is a substring of LongText. + %% OldText shorter: "foo" found inside "barfoo" (via common-suffix stripping + %% then compute_diff on the remainder). + ?assertEqual([{delete, <<"bar">>}, {equal, <<"foo">>}], + diffy:diff(<<"barfoo">>, <<"foo">>)), + + %% OldText longer: "foobar" and "foo" share the common prefix "foo", + %% which split_pre_and_suffix strips. compute_diff then processes + %% "bar" vs <<>>, yielding [{delete,<<"bar">>}]. + ?assertEqual([{equal, <<"foo">>}, {delete, <<"bar">>}], + diffy:diff(<<"foobar">>, <<"foo">>)), + + %% Branch 4a: single-codepoint ShortText with no match in LongText + %% -> [{delete, OldText}, {insert, NewText}] + ?assertEqual([{delete, <<"x">>}, {insert, <<"test">>}], + diffy:diff(<<"x">>, <<"test">>)), + ?assertEqual([{delete, <<"test">>}, {insert, <<"x">>}], + diffy:diff(<<"test">>, <<"x">>)), + + %% Branch 4b: no substring relationship, length > 1 codepoint each — + %% falls through to try_half_match / bisect. Check round-trip correctness. + Old = <<"the cat sat on the mat">>, + New = <<"the dog sat on the rug">>, + Diffs = diffy:diff(Old, New), + ?assertEqual(Old, diffy:source_text(Diffs)), + ?assertEqual(New, diffy:destination_text(Diffs)), + + ok. + %% %% Helpers