diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ca85721..7adb427 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,14 +18,14 @@ jobs:
 
     strategy:
       matrix:
-        otp_version: [22,23,24]
+        otp_version: [26,27,28]
         os: [ubuntu-latest]
 
     container:
       image: erlang:${{ matrix.otp_version }}
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Compile
         run: make
       - name: Test
diff --git a/Makefile b/Makefile
index 82eaca1..716ad98 100644
--- a/Makefile
+++ b/Makefile
@@ -28,13 +28,18 @@ xref: $(REBAR)
 dialyzer: $(REBAR)
 	$(REBAR) as test dialyzer
 
-clean: $(REBAR)
+clean: $(REBAR) clean_doc
 	$(REBAR) clean
 
-distclean:
-	rm -rf _build
-	rm $(REBAR)
+clean_doc:
+	@rm -rf doc
 
+distclean: clean_doc
+	@rm -rf _build
+	@rm -f $(REBAR)
+
+doc: $(REBAR)
+	$(REBAR) ex_doc --output doc --formatter html
 
 # dializer 
 
diff --git a/rebar.config b/rebar.config
index adb9366..ce7ffd3 100644
--- a/rebar.config
+++ b/rebar.config
@@ -1,19 +1,29 @@
 {erl_opts, [debug_info, warn_unused, warn_shadow_vars]}.
-{deps, [
-    {zotonic_stdlib, "1.2.3"}
-]}.
 
+{deps, [ ]}.
+
+{project_plugins, [rebar3_ex_doc]}.
+
+{hex, [{doc, ex_doc}]}.
+
+{ex_doc, [
+    {source_url, <<"https://github.com/zotonic/diffy">>},
+    {extras, [<<"README.md">>, <<"LICENSE">>]},
+    {main, <<"readme">>}
+]}.
 
 {profiles, [
     {test, [
         {deps, [
-            {proper, "1.2.0"}
+            {proper, "1.5.0"}
         ]},
 
         {xref_checks, [
             undefined_function_calls,
+            undefined_functions,
             locals_not_used,
-            deprecated_function_calls
+            deprecated_function_calls,
+            deprecated_functions
         ]},
 
         {xref_ignores, [
@@ -21,8 +31,16 @@
 
         {dialyzer, [
           {warnings, [
-              no_return
+              no_return,
+              extra_return,
+              missing_return,
+              underspecs
+          ]},
+          {plt_extra_apps, [
+              eunit,
+              proper
           ]}
         ]}
     ]}
 ]}.
+
diff --git a/rebar.lock b/rebar.lock
index 05bb477..57afcca 100644
--- a/rebar.lock
+++ b/rebar.lock
@@ -1,11 +1 @@
-{"1.2.0",
-[{<<"proper">>,{pkg,<<"proper">>,<<"1.2.0">>},0},
- {<<"zotonic_stdlib">>,{pkg,<<"zotonic_stdlib">>,<<"1.2.3">>},0}]}.
-[
-{pkg_hash,[
- {<<"proper">>, <<"1466492385959412A02871505434E72E92765958C60DBA144B43863554B505A4">>},
- {<<"zotonic_stdlib">>, <<"4A33B60C82379169C9934CCD1FC9E512CA16B922E131AD6B6D26E562F66DF9CC">>}]},
-{pkg_hash_ext,[
- {<<"proper">>, <<"CBC3766C08337806741343D330BF4BCB826155D2141BE8514C4B02858AA19FD3">>},
- {<<"zotonic_stdlib">>, <<"4712DD7A0C0C600AFEDAFDA738D40FEBF10CFC2485E62D109361FCC190F7381A">>}]}
-].
+[].
diff --git a/src/diffy.erl b/src/diffy.erl
index fc75a87..d9b7f5d 100644
--- a/src/diffy.erl
+++ b/src/diffy.erl
@@ -1,9 +1,10 @@
 %% @author Maas-Maarten Zeeman <mmzeeman@xs4all.nl>
-%% @copyright 2014-2019 Maas-Maarten Zeeman
+%% @copyright 2014-2026 Maas-Maarten Zeeman
 %%
 %% @doc Diffy, an erlang diff match and patch implementation 
+%% @end
 %%
-%% Copyright 2014-2019 Maas-Maarten Zeeman
+%% Copyright 2014-2026 Maas-Maarten Zeeman
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -16,12 +17,12 @@
 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
-%% Erlang diff-match-patch implementation
 
 -module(diffy).
 
 -export([
     diff/2,
+    diff/3,
     diff_bisect/2,
     diff_linemode/2,
 
@@ -43,24 +44,30 @@
 
     text_size/1,
 
-    split_pre_and_suffix/2,
-    unique_match/2
+    split_pre_and_suffix/2
 ]).
 
 -type diff_op() :: delete | equal | insert.
 -type diff() :: {diff_op(), unicode:unicode_binary()}.
 -type diffs() :: list(diff()).
 
+-type diff_option() ::
+    semantic |
+    efficiency |
+    {efficiency, EditCost :: pos_integer()} |
+    no_linemode.
+
 -type for_fun() :: fun((integer(), term()) -> {continue, term()} | {break, term()}).
 
--export_type([diffs/0]).
+-export_type([diff_op/0, diff/0, diffs/0, diff_option/0]).
 
+-define(DEFAULT_EDIT_COST, 4).
 -define(PATCH_MARGIN, 4).
--define(PATCH_MAX_PATCH_LEN, 32).
-
--define(MATCH_MAXBITS, 31).
-
 -define(IS_INS_OR_DEL(Op), (Op =:= insert orelse Op =:= delete)).
+-define(IS_UTF32_ALIGNED(Offset), (Offset rem 4 =:= 0)).
+-define(IS_WS(C), (C =:= $\s orelse C =:= $\t orelse C =:= $\n orelse C =:= $\r orelse C =:= $\f orelse C =:= $\v)).
+-define(IS_LB(C), (C =:= $\n orelse C =:= $\r)).
+-define(IS_ALPHA(C), ((C >= $a andalso C =< $z) orelse (C >= $A andalso C =< $Z) orelse (C >= $0 andalso C =< $9))).
 
 -record(bisect_state, {
     k1start = 0, k1end = 0,
@@ -79,58 +86,86 @@
     length2 = 0
 }).
 
-% @doc Compute the difference between two binary texts
-%
+-dialyzer({no_match, for/5}).
+
+% @doc Compute the difference between two binary texts.
 -spec diff(unicode:unicode_binary(), unicode:unicode_binary()) -> diffs().
 diff(Text1, Text2) ->
-    diff(Text1, Text2, true).
+    diff(Text1, Text2, []).
 
-diff(<<>>, <<>>, _CheckLines) ->
-    [];
-diff(Text1, Text2, _CheckLines) when Text1 =:= Text2 ->
-    [{equal, Text1}];
-diff(Text1, Text2, CheckLines) ->
+% @doc Compute the difference between two binary texts with options.
+%
+% Options:
+%   semantic             - run cleanup_semantic/1 on the result
+%   efficiency           - run cleanup_efficiency/1 on the result (default edit cost 4)
+%   {efficiency, Cost}   - run cleanup_efficiency/2 with a custom edit cost
+%   no_linemode          - disable the linemode optimization for large texts
+%
+% Cleanups are always applied in the correct order: semantic first, then efficiency.
+-spec diff(unicode:unicode_binary(), unicode:unicode_binary(), [diff_option()]) -> diffs().
+diff(Text1, Text2, Options) when is_list(Options) ->
+    T1 = to_utf32(Text1),
+    T2 = to_utf32(Text2),
+    CheckLines = not proplists:get_value(no_linemode, Options, false),
+    Diffs32 = diff32(T1, T2, CheckLines),
+    Diffs1 = case proplists:get_value(semantic, Options) of
+                 true  -> cleanup_semantic32(Diffs32);
+                 _ -> Diffs32
+             end,
+    Diffs2 = case proplists:get_value(efficiency, Options) of
+                 NoEfficiency when NoEfficiency =:= undefined orelse NoEfficiency =:= false  -> Diffs1;
+                 true -> cleanup_efficiency32(Diffs1);
+                 Cost when is_integer(Cost) andalso Cost > 0 -> cleanup_efficiency32(Diffs1, Cost)
+             end,
+    %% Single conversion at the exit boundary.
+    [{Op, to_utf8(D)} || {Op, D} <- Diffs2].
+
+%% Internal diff working entirely in UTF-32 binaries.
+diff32(<<>>, <<>>, _CheckLines) -> [];
+diff32(<<>>, Text2, _CheckLines) -> [{insert, Text2}];
+diff32(Text1, <<>>, _CheckLines) -> [{delete, Text1}];
+diff32(Text1, Text2, _CheckLines) when Text1 =:= Text2 -> [{equal, Text1}];
+diff32(Text1, Text2, CheckLines) ->
     {Prefix, MText1, MText2, Suffix} = split_pre_and_suffix(Text1, Text2),
 
     Diffs = compute_diff(MText1, MText2, CheckLines),
 
     Diffs1 = case Suffix of
-        <<>> -> Diffs;
-        _ -> Diffs ++ [{equal, Suffix}]
-    end,
+                 <<>> -> Diffs;
+                 _ -> Diffs ++ [{equal, Suffix}]
+             end,
 
-    Diffs2 = case Prefix of 
-        <<>> -> Diffs1;
-        _ -> [{equal, Prefix} | Diffs1]
-    end,
+    Diffs2 = case Prefix of
+                 <<>> -> Diffs1;
+                 _ -> [{equal, Prefix} | Diffs1]
+             end,
 
-    cleanup_merge(Diffs2).
+    cleanup_merge32(Diffs2).
 
-%% This assumes Text1 and Text2 don't have a common prefix
-compute_diff(<<>>, NewText, _CheckLines) ->
-    [{insert, NewText}];
-compute_diff(OldText, <<>>, _CheckLines) ->
-    [{delete, OldText}];
+%% This assumes Text1 and Text2 don't have a common prefix. Operates on UTF-32.
+compute_diff(<<>>, NewText, _CheckLines) -> [{insert, NewText}];
+compute_diff(OldText, <<>>, _CheckLines) -> [{delete, OldText}];
 compute_diff(OldText, NewText, CheckLines) ->
     OldStNew = size(OldText) < size(NewText),
 
     {ShortText, LongText} = case OldStNew of
-        true -> {OldText, NewText};
-        false -> {NewText, OldText}
-    end,
+                                true -> {OldText, NewText};
+                                false -> {NewText, OldText}
+                            end,
 
-    case binary:match(LongText, ShortText) of
+    case aligned_utf32_match(LongText, ShortText, 0) of
         {Start, Length} ->
             <<Pre:Start/binary, _:Length/binary, Suf/binary>> = LongText,
             Op = diff_op(OldStNew),
-            [{Op, Pre}, {equal, ShortText}, {Op, Suf}]; 
+            [{Op, Pre}, {equal, ShortText}, {Op, Suf}];
         nomatch ->
-            case single_char(ShortText) of
+            %% In UTF-32, a single codepoint is exactly 4 bytes.
+            case size(ShortText) =:= 4 of
                 true ->
                     [{delete, OldText}, {insert, NewText}];
                 false ->
                     try_half_match(OldText, NewText, CheckLines)
-             end
+            end
     end.
 
 diff_op(true) -> insert;
@@ -140,49 +175,56 @@ diff_op(false) -> delete.
 try_half_match(OldText, NewText, CheckLines) ->
     case half_match(OldText, NewText) of
         {half_match, A1, A2, B1, B2, Common} ->
-            Diffs1 = diff(A1, B1, CheckLines),
-            Diffs2 = diff(A2, B2, CheckLines),
+            Diffs1 = diff32(A1, B1, CheckLines),
+            Diffs2 = diff32(A2, B2, CheckLines),
             Diffs1 ++ [{equal, Common} | Diffs2];
         undefined ->
             compute_diff1(OldText, NewText, CheckLines)
     end.
 
 %% Check if we can do a half-match diff, returns undefined if it is not advantageous.
+%% Operates on UTF-32 binaries — size comparisons are in bytes (4 bytes per codepoint).
 half_match(A, B) ->
-    AGtB = size(A) > size(B),
-    {Short, Long} = case AGtB of
-        true -> {B, A};
-        false -> {A, B}
-    end,
+    AgtB = size(A) > size(B),
+    {Short, Long} = case AgtB of
+                        true -> {B, A};
+                        false -> {A, B}
+                    end,
+
+    LongSize = size(Long),
+    ShortSize = size(Short),
 
-    case text_smaller_than(Long, 4) orelse size(Short) * 2 < size(Long) of
+    %% text_smaller_than(Long, 4) becomes size(Long) < 4*4 in UTF-32.
+    case LongSize < 16 orelse ShortSize * 2 < LongSize of
         true ->
             %% No point in looking.
             undefined;
         false ->
-            %% Note: this could split through a utf8 byte sequence.
-            Hm1 = half_match_i(Long, Short, (size(Long) + 3) div 4),
-            Hm2 = half_match_i(Long, Short, (size(Long) + 1) div 2),
+            %% Seed positions are quarter-way and half-way through Long,
+            %% expressed as byte offsets (codepoints * 4).
+            LongLen = LongSize div 4,  %% codepoint count
+            Hm1 = half_match_i(Long, Short, ((LongLen + 3) div 4) * 4),
+            Hm2 = half_match_i(Long, Short, ((LongLen + 1) div 2) * 4),
 
             %% Select the longest half-match.
             Hm = case {Hm1, Hm2} of
-                {undefined, undefined} -> 
-                    undefined;
-                {undefined, _} -> 
-                    Hm2;
-                {_, undefined} -> 
-                    Hm1;
-                {{half_match, _, _, _, _, C1}, {half_match, _, _, _, _, C2}} when size(C1) > size(C2) ->
-                    Hm1;
-                {_, _} ->
-                    Hm2
-            end,
+                     {undefined, undefined} -> 
+                         undefined;
+                     {undefined, _} -> 
+                         Hm2;
+                     {_, undefined} -> 
+                         Hm1;
+                     {{half_match, _, _, _, _, C1}, {half_match, _, _, _, _, C2}} when size(C1) > size(C2) ->
+                         Hm1;
+                     {_, _} ->
+                         Hm2
+                 end,
 
             %% Swap values if A was smaller than B
             case Hm of
                 undefined -> undefined;
                 {half_match, T1A, T1B, T2A, T2B, MidCommon} ->
-                    case AGtB of
+                    case AgtB of
                         true -> Hm;
                         false ->
                             {half_match, T2A, T2B, T1A, T1B, MidCommon}
@@ -190,24 +232,19 @@ half_match(A, B) ->
             end
     end.
 
-
 % Find the best common overlap at location I.
 half_match_i(Long, Short, I) ->
     {NewI, Seed} = seed(Long, I),
     case Seed of
-        <<>> -> 
-            undefined;
-        _ ->
-            best_common(Long, Short, Seed, NewI, 0, 
-                undefined, undefined, undefined, undefined, <<>>) 
+        <<>> -> undefined;
+        _ -> best_common(Long, Short, Seed, NewI, 0, <<>>, <<>>, <<>>, <<>>, <<>>) 
     end.
 
-
 %% Find the best common overlap inside two texts.
 best_common(Long, Short, Seed, SeedLoc, Start, 
         BestLongA, BestLongB, BestShortA, BestShortB, BestCommon) ->
     %% Check if we can find a match for Seed2 inside the shorttext.
-    case binary:match(Short, Seed, [{scope, {Start, size(Short)-Start}}]) of
+    case aligned_utf32_match(Short, Seed, Start) of
         nomatch -> 
             case size(BestCommon) * 2 >= size(Long) of
                 false -> 
@@ -248,45 +285,68 @@ best_common(Long, Short, Seed, SeedLoc, Start,
             end
     end.
 
-%% @doc Return the position of the next character.
-next_char(Bin, Pos) ->
-    <<_:Pos/binary, C/utf8, _Rest/binary>> = Bin,
-    %% The next char is at binary position...
-    Pos + size(<<C/utf8>>). 
+%% @doc Find a match whose start offset is aligned to a UTF-32 codepoint boundary.
+aligned_utf32_match(Bin, Pattern, Start)
+  when ?IS_UTF32_ALIGNED(Start) andalso Start >= 0 ->
+    case Start + size(Pattern) > size(Bin) of
+        true ->
+            nomatch;
+        false ->
+            case binary:match(Bin, Pattern, [{scope, {Start, size(Bin) - Start}}]) of
+                nomatch ->
+                    nomatch;
+                {MatchStart, Length} when ?IS_UTF32_ALIGNED(MatchStart) ->
+                    %% Match found, and it is correctly aligned.
+                    {MatchStart, Length};
+                {MatchStart, _Length} ->
+                    %% Misaligned hit. binary:match found the first byte-level match,
+                    %% so there is no aligned match before MatchStart. Skip directly
+                    %% to the next aligned boundary after MatchStart.
+                    aligned_utf32_match(Bin, Pattern, MatchStart + (4 - MatchStart rem 4))
+            end
+    end.
+
+%% @doc Return the byte position of the next codepoint in a UTF-32 binary.
+next_char(_Bin, Pos) ->
+    Pos + 4.
 
-%% 
+%%
+%% In UTF-32 every codepoint is exactly 4 bytes. Start is always a 4-byte-aligned
+%% byte offset, so no alignment step is needed.
 seed(Long, Start) ->
-    SeedSize = size(Long) div 4,
-
-    %% Note, need to split on utf8 character boundary here.
+    TotalCodepoints = size(Long) div 4,
+    SeedCodepoints = TotalCodepoints div 4,
+    SeedSize = SeedCodepoints * 4,
     <<_Pre:Start/binary, Seed:SeedSize/binary, _Post/binary>> = Long,
-
-    %% Utf-8 repair the seed's head and tail. 
-    {Pre, Seed1} = repair_head(Seed),
-    {Seed2, _} = repair_tail(Seed1),
-
-    %% return the start position of the seed and the seed itself.
-    {Start - size(Pre), Seed2}.
+    {Start, Seed}.
 
 
 %% Line diff
 compute_diff1(Text1, Text2, true) ->
-    diff_linemode(Text1, Text2);
-compute_diff1(Text1, Text2, false) when size(Text1) > 100 orelse size(Text2) > 100 ->
-    diff_linemode(Text1, Text2);
+    diff_linemode32(Text1, Text2);
+compute_diff1(Text1, Text2, false) when size(Text1) > 400 orelse size(Text2) > 400 ->
+    %% 100 UTF-8 bytes ≈ 400 UTF-32 bytes (conservative upper bound)
+    diff_linemode32(Text1, Text2);
 compute_diff1(Text1, Text2, false) ->
-    diff_bisect(Text1, Text2).
+    diff_bisect32(Text1, Text2).
 
 
-%% Compute diff in linemode
+%% Public entry: accepts UTF-8, converts at boundary.
 diff_linemode(Text1, Text2) ->
+    T1 = to_utf32(Text1),
+    T2 = to_utf32(Text2),
+    Diffs32 = diff_linemode32(T1, T2),
+    [{Op, to_utf8(D)} || {Op, D} <- Diffs32].
+
+%% Internal: operates entirely on UTF-32 binaries.
+diff_linemode32(Text1, Text2) ->
     {CharText1, CharText2, Lines} = lines_to_chars(Text1, Text2),
-    Diffs = diff(CharText1, CharText2, false),
+    Diffs = diff32(CharText1, CharText2, false),
 
     %% Transform the diffs back to lines.
-    Diffs1 = chars_to_lines(Diffs, Lines),
+    Diffs1 = decode_lines(Diffs, Lines),
 
-    Cleaned = cleanup_merge(Diffs1),
+    Cleaned = cleanup_merge32(Diffs1),
     cleanup_line_diff(Cleaned, <<>>, <<>>, [], []).
 
 
@@ -310,57 +370,56 @@ cleanup_line_diff([{equal, _}=E|Rest], DeleteData, InsertData, TmpAcc, Acc)
 
 %% Found leading insert and delete data, diff the texts and replace the operations.
 cleanup_line_diff([{equal, _}=E|Rest], DeleteData, InsertData, _TmpAcc, Acc) ->
-    %% rediff the delete and insert data.
-    Diffs = diff(DeleteData, InsertData, false),
+    %% Data is already UTF-32 — pass directly to diff32.
+    Diffs = diff32(DeleteData, InsertData, false),
     Acc1 = lists:reverse(Diffs) ++ Acc,
     cleanup_line_diff(Rest, <<>>, <<>>, [], [E|Acc1]).
 
 
-%% Diff lines
+%% Diff lines.
+%% Text1 and Text2 are UTF-32 binaries. Lines are stored as UTF-32 binaries.
+%% CharText1/CharText2 are UTF-32 binaries where each 4-byte word is a line index.
 lines_to_chars(Text1, Text2) ->
-    {CharText1, NextChar, Lines1, Dict1} = lines_to_chars(Text1, 0, <<>>, 0, [], dict:new()),
-    {CharText2, _, Lines2, _Dict2} = lines_to_chars(Text2, 0, <<>>, NextChar, Lines1, Dict1),
-
+    {CharText1, NextChar, Lines1, Map1} = lines_to_chars(Text1, 0, <<>>, 0, [], #{}),
+    {CharText2, _, Lines2, _Map2} = lines_to_chars(Text2, 0, <<>>, NextChar, Lines1, Map1),
     {CharText1, CharText2, lists:reverse(Lines2)}.
 
-% Transform each unique line into a single char
-lines_to_chars(Text, Idx, CharText, NextChar, Lines, D) when Idx >= size(Text) ->
-    {CharText, NextChar, Lines, D};
-lines_to_chars(Text, Idx, CharText, NextChar, Lines, D) ->
-    case binary:match(Text, <<"\n">>, [{scope, {Idx, size(Text)-Idx}}]) of
+%% Transform each unique line into a 4-byte index; store line content as UTF-32.
+lines_to_chars(Text, Idx, CharText, NextChar, Lines, Map) when Idx >= byte_size(Text) ->
+    {CharText, NextChar, Lines, Map};
+lines_to_chars(Text, Idx, CharText, NextChar, Lines, Map) when ?IS_UTF32_ALIGNED(Idx) ->
+    case aligned_utf32_match(Text, <<$\n:32>>, Idx) of
         nomatch ->
             <<_:Idx/binary, Line/binary>> = Text,
-            {Char, NextChar1, Lines1, D1} = insert_line(Line, Lines, D, NextChar),
-            CharText1 = <<CharText/binary, Char/utf8>>,
-            {CharText1, NextChar1, Lines1, D1};
+            {Char, NextChar1, Lines1, Map1} = insert_line(Line, Lines, Map, NextChar),
+            CharText1 = <<CharText/binary, Char:32>>,
+            {CharText1, NextChar1, Lines1, Map1};
         {Start, _} ->
-            LineLength = Start - Idx + 1,
+            LineLength = Start - Idx + 4,
             <<_:Idx/binary, Line:LineLength/binary, _/binary>> = Text,
-
-            {Char, NextChar1, Lines1, D1} = insert_line(Line, Lines, D, NextChar),
-            CharText1 = <<CharText/binary, Char/utf8>>,
-
-            lines_to_chars(Text, Idx + LineLength, CharText1, NextChar1, Lines1, D1) 
+            {Char, NextChar1, Lines1, Map1} = insert_line(Line, Lines, Map, NextChar),
+            CharText1 = <<CharText/binary, Char:32>>,
+            lines_to_chars(Text, Idx + LineLength, CharText1, NextChar1, Lines1, Map1)
     end.
 
-insert_line(Line, Lines, Dict, NextChar) ->
-    case dict:find(Line, Dict) of
-        {ok, Char} ->
-            {Char, NextChar, Lines, Dict};
-        error ->
-            {NextChar, NextChar+1, [Line|Lines], dict:store(Line, NextChar, Dict)}
+insert_line(Line, Lines, Map, NextChar) ->
+    case Map of
+        #{Line := Char} ->
+            {Char, NextChar, Lines, Map};
+        _ ->
+            {NextChar, NextChar + 1, [Line | Lines], Map#{Line => NextChar}}
     end.
 
-%%
-chars_to_lines(Diffs, Lines) when is_list(Lines) ->
-    A = array:from_list(Lines),
-    chars_to_lines(Diffs, A, []).
+decode_lines(Diffs, Lines) when is_list(Lines) ->
+    LinesTuple = list_to_tuple(Lines),
+    decode_lines(Diffs, LinesTuple, []).
 
-chars_to_lines([], _A, Acc) ->
+decode_lines([], _LinesTuple, Acc) ->
     lists:reverse(Acc);
-chars_to_lines([{Op, Data}|Rest], LineArray, Acc) ->
-    Data1 = << <<(array:get(C, LineArray))/binary>> || <<C/utf8>> <= Data >>,
-    chars_to_lines(Rest, LineArray, [{Op, Data1}|Acc]).
+decode_lines([{Op, Data} | Rest], LinesTuple, Acc) ->
+    %% Each index is a 32-bit word; lines are already UTF-32 — just concatenate.
+    Data1 = << <<(element(C + 1, LinesTuple))/binary>> || <<C:32>> <= Data >>,
+    decode_lines(Rest, LinesTuple, [{Op, Data1} | Acc]).
 
 
 % Find the 'middle snake' of a diff, split the problem in two
@@ -375,20 +434,26 @@ chars_to_lines([{Op, Data}|Rest], LineArray, Acc) ->
 %%    Returns:
 %%      Array of diff tuples.
 %%    """
+%% Public entry point — converts UTF-8 inputs to UTF-32, runs bisect, converts back.
 diff_bisect(A, B) when is_binary(A) andalso is_binary(B) ->
-    ArrA = array_from_binary(A),
-    ArrB = array_from_binary(B),
-    try compute_diff_bisect1(ArrA, ArrB, array:size(ArrA), array:size(ArrB)) of
-        no_overlap -> [{delete, A}, {insert, B}] 
+    Diffs32 = diff_bisect32(to_utf32(A), to_utf32(B)),
+    [{Op, to_utf8(D)} || {Op, D} <- Diffs32].
+
+%% Internal bisect working entirely on UTF-32 binaries.
+diff_bisect32(A, B) ->
+    M = byte_size(A) div 4,
+    N = byte_size(B) div 4,
+    try compute_diff_bisect1(A, B, M, N) of
+        no_overlap -> [{delete, A}, {insert, B}]
     catch
-        throw:{overlap, A1, B1, X, Y} ->
-            diff_bisect_split(A1, B1, X, Y)
+        throw:{overlap, X, Y} ->
+            diff_bisect_split(A, B, X, Y)
     end.
 
 compute_diff_bisect1(A, B, M, N) ->
     %% TODO, add deadline... 
     
-    MaxD = int_ceil((M + N) / 2),
+    MaxD = ceil((M + N) / 2),
 
     VOffset = MaxD,
     VLength = 2 * MaxD,
@@ -410,11 +475,13 @@ compute_diff_bisect1(A, B, M, N) ->
         S3 = for(-D + S1#bisect_state.k1start, D + 1 - S1#bisect_state.k1end, 2, fun(K1, S2) ->
             K1Offset = VOffset + K1,
 
-            X1 = case K1 =:= -D orelse (K1 =/= D andalso 
-                    (array:get(K1Offset-1, S2#bisect_state.v1) < array:get(K1Offset+1, S2#bisect_state.v1))) of
-                true -> array:get(K1Offset + 1, S2#bisect_state.v1);
-                false -> array:get(K1Offset - 1, S2#bisect_state.v1) + 1
-            end,
+            X1 = case K1 =:= -D
+                      orelse (K1 =/= D
+                              andalso (array:get(K1Offset-1, S2#bisect_state.v1) < array:get(K1Offset+1, S2#bisect_state.v1)))
+                 of
+                     true -> array:get(K1Offset + 1, S2#bisect_state.v1);
+                     false -> array:get(K1Offset - 1, S2#bisect_state.v1) + 1
+                 end,
 
             Y1 = X1 - K1,
             {X1_1, Y1_1} = match_front(X1, Y1, A, M, B, N),
@@ -439,12 +506,10 @@ compute_diff_bisect1(A, B, M, N) ->
                                 true ->
                                     % Mirror x2 onto top-left coordinate system.
                                     X2 = M - V2AtOffset,
-                                    if 
-                                        X1_1 >= X2 ->
-                                            % Overlap detected
-                                            throw({overlap, A, B, X1_1, Y1_1});
-                                        true ->
-                                            {continue, S2_1}
+                                    case X1_1 >= X2 of 
+                                        % Overlap detected
+                                        true -> throw({overlap, X1_1, Y1_1});
+                                        false -> {continue, S2_1}
                                     end;
                                 false -> {continue, S2_1}
                             end
@@ -456,13 +521,13 @@ compute_diff_bisect1(A, B, M, N) ->
         %% Walk the reverse path one step. (verdacht hetzelfde als het ding hierboven...)
         S5 = for(-D + S3#bisect_state.k2start, D + 1 - S3#bisect_state.k2end, 2, fun(K2, S4) ->
             K2Offset = VOffset + K2,
-            X2 = case K2 =:= -D orelse (K2 =/= D andalso 
-                        array:get(K2Offset-1, S4#bisect_state.v2) < array:get(K2Offset+1, S4#bisect_state.v2)) of
-                true -> 
-                    array:get(K2Offset + 1, S4#bisect_state.v2);
-                false -> 
-                    array:get(K2Offset - 1, S4#bisect_state.v2) + 1
-            end,
+            X2 = case K2 =:= -D
+                      orelse (K2 =/= D
+                              andalso array:get(K2Offset-1, S4#bisect_state.v2) < array:get(K2Offset+1, S4#bisect_state.v2))
+                 of
+                     true -> array:get(K2Offset + 1, S4#bisect_state.v2);
+                     false -> array:get(K2Offset - 1, S4#bisect_state.v2) + 1
+                 end,
 
             Y2 = X2 - K2,
 
@@ -488,13 +553,11 @@ compute_diff_bisect1(A, B, M, N) ->
                                 true ->
                                     X1 = V1AtOffset,
                                     Y1 = VOffset + X1 - K1Offset,
-                                    if 
-                                        % Mirror x2 onto top-left coordinate system.
-                                        X1 >= M - X2_1 ->
-                                            % Overlap detected
-                                            throw({overlap, A, B, X1, Y1});
-                                        true ->
-                                            {continue, S4_1}
+                                    % Mirror x2 onto top-left coordinate system.
+                                    case X1 >= M - X2_1 of
+                                        % Overlap detected
+                                        true -> throw({overlap, X1, Y1});
+                                        false -> {continue, S4_1}
                                     end;
                                 false -> {continue, S4_1}
                             end
@@ -507,60 +570,68 @@ compute_diff_bisect1(A, B, M, N) ->
 
     no_overlap.
 
-% @doc Split A and B and process the parts.
+% @doc Split A and B at the overlap point and recursively diff each half.
 diff_bisect_split(A, B, X, Y) ->
-    A1 = binary_from_array(0, X, A),
-    A2 = binary_from_array(0, Y, B),
+    A1 = binary:part(A, 0, X * 4),
+    A2 = binary:part(B, 0, Y * 4),
+    B1 = binary:part(A, X * 4, byte_size(A) - X * 4),
+    B2 = binary:part(B, Y * 4, byte_size(B) - Y * 4),
 
-    B1 = binary_from_array(X, array:size(A), A),
-    B2 = binary_from_array(Y, array:size(B), B),
-
-    Diffs = diff(A1, A2, false),
-    DiffsB = diff(B1, B2, false),
-
-    Diffs ++ DiffsB.
+    diff32(A1, A2, false) ++ diff32(B1, B2, false).
 
 % @doc Convert the diffs into a pretty html report
--spec pretty_html(diffs()) -> iolist().
 pretty_html(Diffs) ->
     pretty_html(Diffs, []).
 
 pretty_html([], Acc) ->
     lists:reverse(Acc);
-pretty_html([{Op, Data}|T], Acc) ->
-    Text = z_html:escape(Data),
+pretty_html([{Op, Data} | T], Acc) ->
+    Safe = html_escape(Data),
     HTML = case Op of
         insert ->
-            [<<"<ins style='background:#e6ffe6;'>">>, Text, <<"</ins>">>];
+            [<<"<ins style='background:#e6ffe6;'>">>, Safe, <<"</ins>">>];
         delete ->
-            [<<"<del style='background:#ffe6e6;'>">>, Text, <<"</del>">>];
+            [<<"<del style='background:#ffe6e6;'>">>, Safe, <<"</del>">>];
         equal ->
-            [<<"<span>">>, Text, <<"</span>">>]
+            [<<"<span>">>, Safe, <<"</span>">>]
     end,
-    pretty_html(T, [HTML|Acc]).
+    pretty_html(T, [HTML | Acc]).
+
+-if(?OTP_RELEASE >= 27).
+html_escape(B) when is_binary(B) ->
+    binary:replace(B,
+                   [<<"&">>, <<"<">>, <<">">>, <<"\"">>, <<"'">>],
+                   fun (<<"&">>)   -> <<"&amp;">>;
+                       (<<"<">>)   -> <<"&lt;">>;
+                       (<<">">>)   -> <<"&gt;">>;
+                       (<<"\"">>)  -> <<"&quot;">>;
+                       (<<"'">>)   -> <<"&#39;">>
+                   end,
+                   [global]).
+-else.
+html_escape(B) when is_binary(B) ->
+    lists:foldl(fun({From, To}, Acc) ->
+                        binary:replace(Acc, From, To, [global])
+                end,
+                B,
+                [
+                 {<<"&">>,  <<"&amp;">>},
+                 {<<"<">>,  <<"&lt;">>},
+                 {<<">">>,  <<"&gt;">>},
+                 {<<"\"">>, <<"&quot;">>},
+                 {<<"'">>,  <<"&#39;">>}
+                ]).
+-endif.
 
+
+% Above function can be replaced with this when OTP 27 is the lowest supported 
 % @doc Compute the source text from a list of diffs.
 source_text(Diffs) ->
-    source_text(Diffs, <<>>).
-
-source_text([], Acc) ->
-    Acc;
-source_text([{insert, _Data}|T], Acc) ->
-    source_text(T, Acc);
-source_text([{_Op, Data}|T], Acc) ->
-    source_text(T, <<Acc/binary, Data/binary>>).
-    
+    iolist_to_binary([Data || {Op, Data} <- Diffs, Op =/= insert]).
 
 % @doc Compute the destination text from a list of diffs.
 destination_text(Diffs) ->
-    destination_text(Diffs, <<>>).
-    
-destination_text([], Acc) -> 
-    Acc;
-destination_text([{delete, _Data}|T], Acc) ->
-    destination_text(T, Acc);
-destination_text([{_Op, Data}|T], Acc) ->
-    destination_text(T, <<Acc/binary, Data/binary>>).
+    iolist_to_binary([Data || {Op, Data} <- Diffs, Op =/= delete]).
     
 % @doc Compute the Levenshtein distance, the number of inserted, deleted or substituted characters.
 levenshtein(Diffs) ->
@@ -581,117 +652,361 @@ levenshtein([{equal, _Data}|T], Insertions, Deletions, Levenshtein) ->
 %
 -spec cleanup_merge(diffs()) -> diffs().
 cleanup_merge(Diffs) ->
-    cleanup_merge(Diffs, []). 
+    Diffs32 = [{Op, to_utf32(D)} || {Op, D} <- Diffs],
+    [{Op, to_utf8(D)} || {Op, D} <- cleanup_merge32(Diffs32)].
+
+%% Internal cleanup_merge operating on UTF-32 diffs.
+cleanup_merge32(Diffs) ->
+    cleanup_merge32(Diffs, []).
 
 %% Done
-cleanup_merge([], Acc) ->
+cleanup_merge32([], Acc) ->
     lists:reverse(Acc);
 %% Remove operations without data.
-cleanup_merge([{_Op, <<>>}|T], Acc) ->
-    cleanup_merge(T, Acc);
-%% Merge data from equal operations
-cleanup_merge([{Op2, Data2}|T], [{Op1, Data1}|Acc]) when Op1 =:= Op2 ->
-    cleanup_merge(T, [{Op1, <<Data1/binary, Data2/binary>>}|Acc]);
-%% Cleanup edits before equal operation
-cleanup_merge([{Op1, Data1}|T], [{Op2, _}=I, {Op3, Data3}|Acc]) when Op1 =/= Op2 andalso Op1 =:= Op3 andalso Op2 =/= equal andalso Op3 =/= equal ->
-    cleanup_merge(T, [I, {Op3, <<Data3/binary, Data1/binary>>}|Acc]);
-%% Check if Op1Data and Op2Data have common prefixes.
-cleanup_merge([{equal, E1}|T], [{Op1, Op1Data}, {Op2, Op2Data}, {equal, E2}|Acc]) when Op1 =/= Op2 andalso Op1 =/= equal andalso Op2 =/= equal ->
+cleanup_merge32([{_Op, <<>>}|T], Acc) ->
+    cleanup_merge32(T, Acc);
+%% Ensure delete/insert ordering: if insert is on top and a delete arrives, sink the insert.
+cleanup_merge32([{delete, _}=D|T], [{insert, _}=I|Acc]) ->
+    cleanup_merge32([D, I|T], Acc);
+%% Merge data from equal operations.
+cleanup_merge32([{Op2, Data2}|T], [{Op1, Data1}|Acc]) when Op1 =:= Op2 ->
+    cleanup_merge32(T, [{Op1, <<Data1/binary, Data2/binary>>}|Acc]);
+%% Cleanup edits before equal operation — re-queue merged op for further processing.
+cleanup_merge32([{Op1, Data1}|T], [{Op2, _}=I, {Op3, Data3}|Acc])
+        when Op1 =/= Op2 andalso Op1 =:= Op3 andalso Op2 =/= equal andalso Op3 =/= equal ->
+    cleanup_merge32([I, {Op3, <<Data3/binary, Data1/binary>>} | T], Acc);
+%% Factor out common prefixes and suffixes from adjacent insert/delete pairs.
+cleanup_merge32([{equal, E1}|T], [{Op1, Op1Data}, {Op2, Op2Data}, {equal, E2}|Acc])
+        when Op1 =/= Op2 andalso Op1 =/= equal andalso Op2 =/= equal ->
     {Prefix, Op1DataD, Op2DataD, Suffix} = split_pre_and_suffix(Op1Data, Op2Data),
-    cleanup_merge(T, [{equal, <<Suffix/binary, E1/binary>>}, 
+    cleanup_merge32(T, [{equal, <<Suffix/binary, E1/binary>>},
         {Op1, Op1DataD}, {Op2, Op2DataD}, {equal, <<E2/binary, Prefix/binary>>}|Acc]);
-%% Check for slide left and slide right edits
-cleanup_merge([{equal, E1}=H|T], [{Op, I}, {equal, E2}|AccTail]=Acc) when Op =:= insert orelse Op =:= delete ->
+%% Slide edits left and right.
+cleanup_merge32([{equal, E1}=H|T], [{Op, I}, {equal, E2}|AccTail]=Acc)
+        when Op =:= insert orelse Op =:= delete ->
     case is_suffix(E2, I) of
         false ->
             case is_prefix(E1, I) of
                 false ->
-                    cleanup_merge(T, [H|Acc]);
+                    cleanup_merge32(T, [H|Acc]);
                 true ->
                     P = size(E1),
                     <<_:P/binary, Post/binary>> = I,
-                    cleanup_merge([{equal, <<E2/binary, E1/binary>>}, {Op, <<Post/binary, E1/binary>>}|T], AccTail)
+                    cleanup_merge32([{equal, <<E2/binary, E1/binary>>}, {Op, <<Post/binary, E1/binary>>}|T], AccTail)
             end;
         true ->
             R = size(I) - size(E2),
-            <<Pre:R/binary,  Post/binary>> = I,
-            cleanup_merge([{Op, <<E2/binary, Pre/binary>>}, {equal, <<Post/binary, E1/binary>>}|T], AccTail)
+            <<Pre:R/binary, Post/binary>> = I,
+            cleanup_merge32([{Op, <<E2/binary, Pre/binary>>}, {equal, <<Post/binary, E1/binary>>}|T], AccTail)
     end;
-cleanup_merge([H|T], Acc) ->
-    cleanup_merge(T, [H|Acc]).
+cleanup_merge32([H|T], Acc) ->
+    cleanup_merge32(T, [H|Acc]).
 
 % @doc Do semantic cleanup of diffs
 %
 -spec cleanup_semantic(diffs()) -> diffs().
 cleanup_semantic(Diffs) ->
-    cleanup_semantic(Diffs, []).
+    Diffs32 = [{Op, to_utf32(D)} || {Op, D} <- Diffs],
+    [{Op, to_utf8(D)} || {Op, D} <- cleanup_semantic32(Diffs32)].
+
+%% Internal semantic cleanup operating on UTF-32 diffs.
+cleanup_semantic32(Diffs) ->
+    Diffs1 = cleanup_semantic_breakpoints(Diffs),
+    Diffs2 = cleanup_merge32(Diffs1),
+    Diffs3 = cleanup_semantic_lossless(Diffs2),
+    cleanup_semantic_overlaps(Diffs3).
+
+cleanup_semantic_breakpoints(Diffs) ->
+    case find_breakpoint(Diffs, [], 0, 0, 0, 0, undefined) of
+        {found, NewDiffs} -> cleanup_semantic_breakpoints(NewDiffs);
+        not_found -> Diffs
+    end.
 
-cleanup_semantic([], Acc) ->
-    lists:reverse(Acc);
-cleanup_semantic([H|T], Acc) ->
-    cleanup_semantic(T, [H|Acc]).
+find_breakpoint([], _Acc, _LI1, _LD1, _LI2, _LD2, _LE) ->
+    not_found;
+find_breakpoint([{equal, Data} | T], Acc, _LI1, _LD1, LI2, LD2, _LE) ->
+    find_breakpoint(T, [{equal, Data} | Acc], LI2, LD2, 0, 0, Data);
+find_breakpoint([{insert, Data} | T], Acc, LI1, LD1, LI2, LD2, LE) ->
+    NewLI2 = LI2 + text_size32(Data),
+    case is_breakpoint(LE, LI1, LD1, NewLI2, LD2) of
+        true -> {found, apply_breakpoint(LE, Acc, [{insert, Data} | T])};
+        false -> find_breakpoint(T, [{insert, Data} | Acc], LI1, LD1, NewLI2, LD2, LE)
+    end;
+find_breakpoint([{delete, Data} | T], Acc, LI1, LD1, LI2, LD2, LE) ->
+    NewLD2 = LD2 + text_size32(Data),
+    case is_breakpoint(LE, LI1, LD1, LI2, NewLD2) of
+        true -> {found, apply_breakpoint(LE, Acc, [{delete, Data} | T])};
+        false -> find_breakpoint(T, [{delete, Data} | Acc], LI1, LD1, LI2, NewLD2, LE)
+    end.
+
+is_breakpoint(undefined, _, _, _, _) -> false;
+is_breakpoint(LE, LI1, LD1, LI2, LD2) ->
+    LEN = text_size32(LE),
+    LEN =< max(LI1, LD1) andalso LEN =< max(LI2, LD2).
+
+apply_breakpoint(LE, Acc, T) ->
+    replace_equality(LE, Acc, T).
+
+replace_equality(LE, [{equal, LE} | T_Acc], T) ->
+    lists:reverse(T_Acc) ++ [{delete, LE}, {insert, LE} | T];
+replace_equality(LE, [H | T_Acc], T) ->
+    replace_equality(LE, T_Acc, [H | T]).
+
+cleanup_semantic_lossless(Diffs) ->
+    cleanup_semantic_lossless(Diffs, []).
+
+cleanup_semantic_lossless([{equal, E1}, {Op, Edit}, {equal, E2} | T], Acc) when ?IS_INS_OR_DEL(Op) ->
+    {NewE1, NewEdit, NewE2} = slide_edit(E1, Edit, E2),
+    case NewE1 of
+        <<>> ->
+            cleanup_semantic_lossless(lists:reverse(Acc, [{Op, NewEdit}, {equal, NewE2} | T]), []);
+        _ ->
+            case NewE2 of
+                <<>> ->
+                    cleanup_semantic_lossless(lists:reverse(Acc, [{equal, NewE1}, {Op, NewEdit} | T]), []);
+                _ ->
+                    cleanup_semantic_lossless([{Op, NewEdit}, {equal, NewE2} | T], [{equal, NewE1} | Acc])
+            end
+    end;
+cleanup_semantic_lossless([H | T], Acc) ->
+    cleanup_semantic_lossless(T, [H | Acc]);
+cleanup_semantic_lossless([], Acc) ->
+    lists:reverse(Acc).
+
+slide_edit(E1, Edit, E2) ->
+    Suffix = common_suffix(E1, Edit),
+    {E1_1, Edit_1, E2_1} = case Suffix of
+                               <<>> -> {E1, Edit, E2};
+                               _ ->
+                                   SLen = size(Suffix),
+                                   { binary:part(E1, 0, size(E1) - SLen),
+                                     <<Suffix/binary, (binary:part(Edit, 0, size(Edit) - SLen))/binary>>,
+                                     <<Suffix/binary, E2/binary>> }
+                           end,
+    find_best_slide(E1_1, Edit_1, E2_1).
+
+find_best_slide(E1, Edit, E2) ->
+    Score = cleanup_semantic_score(E1, Edit) + cleanup_semantic_score(Edit, E2),
+    find_best_slide(E1, Edit, E2, Score, E1, Edit, E2).
+
+find_best_slide(E1, Edit, E2, BestScore, BestE1, BestEdit, BestE2) ->
+    case can_slide_right(Edit, E2) of
+        {true, Char, RestEdit, RestE2} ->
+            NewE1 = <<E1/binary, Char/binary>>,
+            NewEdit = <<RestEdit/binary, Char/binary>>,
+            NewE2 = RestE2,
+            NewScore = cleanup_semantic_score(NewE1, NewEdit) + cleanup_semantic_score(NewEdit, NewE2),
+            case NewScore >= BestScore of 
+                true -> find_best_slide(NewE1, NewEdit, NewE2, NewScore, NewE1, NewEdit, NewE2);
+                false -> find_best_slide(NewE1, NewEdit, NewE2, BestScore, BestE1, BestEdit, BestE2)
+            end;
+        false ->
+            {BestE1, BestEdit, BestE2}
+    end.
+
+%% In UTF-32 each codepoint is exactly 4 bytes — no pattern matching on variable-width needed.
+can_slide_right(<<Char:32, RestEdit/binary>>, <<Char:32, RestE2/binary>>) ->
+    {true, <<Char:32>>, RestEdit, RestE2};
+can_slide_right(_, _) ->
+    false.
+
+cleanup_semantic_score(<<>>, _) -> 6;
+cleanup_semantic_score(_, <<>>) -> 6;
+cleanup_semantic_score(One, Two) ->
+    Char1 = last_char(One),
+    Char2 = first_char(Two),
+    NonAlphaNumeric1 = not ?IS_ALPHA(Char1),
+    NonAlphaNumeric2 = not ?IS_ALPHA(Char2),
+    Whitespace1 = NonAlphaNumeric1 andalso ?IS_WS(Char1),
+    Whitespace2 = NonAlphaNumeric2 andalso ?IS_WS(Char2),
+    LineBreak1 = Whitespace1 andalso ?IS_LB(Char1),
+    LineBreak2 = Whitespace2 andalso ?IS_LB(Char2),
+    BlankLine1 = LineBreak1 andalso is_blankline_end(One),
+    BlankLine2 = LineBreak2 andalso is_blankline_start(Two),
+    if
+        BlankLine1 orelse BlankLine2 -> 5;
+        LineBreak1 orelse LineBreak2 -> 4;
+        NonAlphaNumeric1 andalso (not Whitespace1) andalso Whitespace2 -> 3;
+        Whitespace1 orelse Whitespace2 -> 2;
+        NonAlphaNumeric1 orelse NonAlphaNumeric2 -> 1;
+        true -> 0
+    end.
+
+cleanup_semantic_overlaps(Diffs) ->
+    cleanup_semantic_overlaps(Diffs, []).
+
+cleanup_semantic_overlaps([{delete, Del}, {insert, Ins} | T], Acc) ->
+    Overlap1 = common_overlap(Del, Ins),
+    Overlap2 = common_overlap(Ins, Del),
+    TDel = text_size32(Del),
+    TIns = text_size32(Ins),
+    case Overlap1 >= Overlap2 of
+        true ->
+            case Overlap1 * 2 >= TDel orelse Overlap1 * 2 >= TIns of
+                true ->
+                    Common = binary:part(Ins, 0, Overlap1 * 4),
+                    NewDel = binary:part(Del, 0, (TDel - Overlap1) * 4),
+                    NewIns = binary:part(Ins, Overlap1 * 4, (TIns - Overlap1) * 4),
+                    cleanup_semantic_overlaps([{insert, NewIns} | T], [{equal, Common}, {delete, NewDel} | Acc]);
+                false ->
+                    cleanup_semantic_overlaps([{insert, Ins} | T], [{delete, Del} | Acc])
+            end;
+        false ->
+            case Overlap2 * 2 >= TIns orelse Overlap2 * 2 >= TDel of
+                true ->
+                    Common = binary:part(Ins, (TIns - Overlap2) * 4, Overlap2 * 4),
+                    NewIns = binary:part(Ins, 0, (TIns - Overlap2) * 4),
+                    NewDel = binary:part(Del, Overlap2 * 4, (TDel - Overlap2) * 4),
+                    cleanup_semantic_overlaps([{delete, NewDel} | T], [{equal, Common}, {insert, NewIns} | Acc]);
+                false ->
+                    cleanup_semantic_overlaps([{insert, Ins} | T], [{delete, Del} | Acc])
+            end
+    end;
+cleanup_semantic_overlaps([H | T], Acc) ->
+    cleanup_semantic_overlaps(T, [H | Acc]);
+cleanup_semantic_overlaps([], Acc) ->
+    lists:reverse(Acc).
+
+%% In UTF-32 every codepoint is exactly 4 bytes, so all byte/codepoint conversions
+%% are simple multiplications and binary:part calls.
+
+%% @doc Return the first Len codepoints of Bin as a binary.
+substring_start(Bin, Len) ->
+    binary:part(Bin, 0, Len * 4).
+
+%% @doc Return the last Len codepoints of Bin as a binary.
+substring_end(Bin, Len) ->
+    TotalLen = text_size32(Bin),
+    case TotalLen =< Len of
+        true -> Bin;
+        false -> binary:part(Bin, (TotalLen - Len) * 4, Len * 4)
+    end.
+
+common_overlap(<<>>, _) -> 0;
+common_overlap(_, <<>>) -> 0;
+common_overlap(Text1, Text2) ->
+    T1Len = text_size32(Text1),
+    T2Len = text_size32(Text2),
+    {T1, T2, TMin} = if
+                         T1Len > T2Len ->
+                             {substring_end(Text1, T2Len), Text2, T2Len};
+                         T1Len < T2Len ->
+                             {Text1, substring_start(Text2, T1Len), T1Len};
+                         true ->
+                             {Text1, Text2, T1Len}
+                     end,
+    case T1 =:= T2 of
+        true -> TMin;
+        false -> common_overlap_loop(T1, T2, TMin, 0, 1)
+    end.
+
+common_overlap_loop(T1, T2, TMin, Best, Length) when Length =< TMin ->
+    Pattern = substring_end(T1, Length),
+    case aligned_utf32_match(T2, Pattern, 0) of
+        nomatch ->
+            Best;
+        {FoundByteOffset, _} ->
+            %% In UTF-32, byte offset maps directly to codepoint count.
+            FoundCharCount = FoundByteOffset div 4,
+            NewLength = Length + FoundCharCount,
+            case NewLength > TMin of
+                true -> Best;
+                false ->
+                    case substring_end(T1, NewLength) =:= substring_start(T2, NewLength) of
+                        true ->
+                            common_overlap_loop(T1, T2, TMin, NewLength, NewLength + 1);
+                        false ->
+                            common_overlap_loop(T1, T2, TMin, Best, NewLength + 1)
+                    end
+            end
+    end;
+common_overlap_loop(_T1, _T2, _TMin, Best, _Length) ->
+    Best.
+
+%% In UTF-32 the first and last codepoints are always at fixed byte offsets.
+first_char(<<C:32, _/binary>>) -> C;
+first_char(_) -> undefined.
+
+last_char(<<>>) -> undefined;
+last_char(Bin) ->
+    Size = byte_size(Bin),
+    <<_:(Size-4)/binary, C:32>> = Bin,
+    C.
+
+%% In UTF-32 each codepoint is 4 bytes, so newline patterns are fixed-width.
+is_blankline_end(Bin) when byte_size(Bin) >= 8 ->
+    Size = byte_size(Bin),
+    case Bin of
+        <<_:(Size-8)/binary,  $\n:32, $\n:32>>         -> true;
+        <<_:(Size-12)/binary, $\n:32, $\r:32, $\n:32>> -> true;
+        _ -> false
+    end;
+is_blankline_end(_) -> false.
+
+is_blankline_start(Bin) when byte_size(Bin) >= 8 ->
+    case Bin of
+        <<$\n:32, $\n:32, _/binary>>                 -> true;
+        <<$\n:32, $\r:32, $\n:32, _/binary>>         -> true;
+        <<$\r:32, $\n:32, $\n:32, _/binary>>         -> true;
+        <<$\r:32, $\n:32, $\r:32, $\n:32, _/binary>> -> true;
+        _ -> false
+    end;
+is_blankline_start(_) -> false.
 
 % @doc Do efficiency cleanup of diffs.
 %
 -spec cleanup_efficiency(diffs()) -> diffs().
 cleanup_efficiency(Diffs) ->
-    cleanup_efficiency(Diffs, 4).
+    cleanup_efficiency(Diffs, ?DEFAULT_EDIT_COST).
 
+-spec cleanup_efficiency(diffs(), pos_integer()) -> diffs().
 cleanup_efficiency(Diffs, EditCost) ->
-    cleanup_efficiency(Diffs, false, EditCost, []).
+    Diffs32 = [{Op, to_utf32(D)} || {Op, D} <- Diffs],
+    [{Op, to_utf8(D)} || {Op, D} <- cleanup_efficiency32(Diffs32, EditCost)].
+
+%% Internal efficiency cleanup operating on UTF-32 diffs.
+cleanup_efficiency32(Diffs) ->
+    cleanup_efficiency32(Diffs, ?DEFAULT_EDIT_COST).
+
+cleanup_efficiency32(Diffs, EditCost) ->
+    cleanup_efficiency32(Diffs, false, EditCost, []).
 
 %% Done.
-cleanup_efficiency([], Changed, _EditCost, Acc) ->
+cleanup_efficiency32([], Changed, _EditCost, Acc) ->
     Diffs = lists:reverse(Acc),
     case Changed of
         false -> Diffs;
-        true -> cleanup_merge(Diffs)
+        true -> cleanup_merge32(Diffs)
     end;
 %% Any equality which is surrounded on both sides by an insertion and deletion need less then 
 %% EditCost characters for it to be advantageous to split.
-cleanup_efficiency([{O1, _}=A, {equal, XY}=E, {O2, _}=B | T], Changed, EditCost, Acc) when 
-        O1 =/= O2 andalso ?IS_INS_OR_DEL(O1) andalso ?IS_INS_OR_DEL(O2) ->
+cleanup_efficiency32([{O1, _}=A, {equal, XY}=E, {O2, _}=B | T], Changed, EditCost, Acc)
+  when O1 =/= O2 andalso ?IS_INS_OR_DEL(O1) andalso ?IS_INS_OR_DEL(O2) ->
     case text_smaller_than(XY, EditCost) of
         true ->
-            %% Split
             Del = {delete, XY},
             Ins = {insert, XY},
-
-            cleanup_efficiency([Ins, B | T], true, EditCost, [Del, A | Acc]);
+            cleanup_efficiency32([Ins, B | T], true, EditCost, [Del, A | Acc]);
         false ->
-            %% Equal is big enough, move A and equal out of the way.
-            cleanup_efficiency([B | T], Changed, EditCost, [E, A |Acc])
+            cleanup_efficiency32([B | T], Changed, EditCost, [E, A | Acc])
     end;
 %% Any equality which is surrounded on one side by an existing insertion and deletion and on the 
-%% other side by an exisiting insertion or deletion needs by less than half C characters long for it 
-%% to be advantagous to split.
-cleanup_efficiency([{O1, _}=A, {O2, _}=B, {equal, X}=E, {O3, _}=C | T], Changed, EditCost, Acc) when
-    O1 =/= O2 andalso ?IS_INS_OR_DEL(O1) andalso ?IS_INS_OR_DEL(O2) andalso ?IS_INS_OR_DEL(O3) ->
+%% other side by an existing insertion or deletion needs less than half C characters long for it 
+%% to be advantageous to split.
+cleanup_efficiency32([{O1, _}=A, {O2, _}=B, {equal, X}=E, {O3, _}=C | T], Changed, EditCost, Acc)
+  when O1 =/= O2 andalso ?IS_INS_OR_DEL(O1) andalso ?IS_INS_OR_DEL(O2) andalso ?IS_INS_OR_DEL(O3) ->
     case text_smaller_than(X, EditCost div 2 + 1) of
         true ->
-            %% Split
             Del = {delete, X},
             Ins = {insert, X},
-            cleanup_efficiency([Ins, C | T], true, EditCost, [Del, B, A | Acc]);
+            cleanup_efficiency32([Ins, C | T], true, EditCost, [Del, B, A | Acc]);
         false ->
-            %% Equal is big enough, move delete and equal out of the way.
-            cleanup_efficiency([B, E, C | T], Changed, EditCost, [A |Acc])
+            cleanup_efficiency32([B, E, C | T], Changed, EditCost, [A | Acc])
     end;
-cleanup_efficiency([H|T], Changed, EditCost, Acc) ->
-    cleanup_efficiency(T, Changed, EditCost, [H|Acc]).
+cleanup_efficiency32([H | T], Changed, EditCost, Acc) ->
+    cleanup_efficiency32(T, Changed, EditCost, [H | Acc]).
 
 
-% @doc Return true iff the text is smaller than specified 
-text_smaller_than(_, 0) ->
-    false;
-text_smaller_than(<<>>, _Size) ->
-    true;
-text_smaller_than(<<_C/utf8, Rest/binary>>, Size) when Size > 0 ->
-    text_smaller_than(Rest, Size-1);
-text_smaller_than(<<_C, Rest/binary>>, Size) when Size > 0 ->
-    %% Illegal utf-8 string, just count this as a single character and continue
-    text_smaller_than(Rest, Size-1).
-
 % @doc create a patch from a list of diffs
 make_patch(Diffs) when is_list(Diffs) ->
     %% Reconstruct the source-text from the diffs.
@@ -710,10 +1025,8 @@ make_patch(Diffs, SourceText) when is_list(Diffs) andalso is_binary(SourceText)
 
 make_patch([], _PrePatchText, _PostPatchText, _Count1, _Count2, [Patch|Rest]=Patches) ->
     case Patch#patch.diffs of
-        [] -> 
-            lists:reverse(Rest);
-        _ -> 
-            lists:reverse(Patches)
+        [] -> lists:reverse(Rest);
+        _ -> lists:reverse(Patches)
     end;
     
 make_patch([{insert, Data}=D|T], PrePatchText, PostPatchText, Count1, Count2, [Patch|Rest]) ->
@@ -766,33 +1079,10 @@ make_patch([{equal, Data}|T], PrePatchText, PostPatchText, Count1, Count2, [Patc
         
     make_patch(T, PrePatchText, PostPatchText, Count1+Size, Count2+Size, [P|Rest]).
 
-    
-% @doc Returns true iff Pattern is a unique match inside Text.
-unique_match(Pattern, Text) ->
-    TextSize = size(Text),
-    case binary:match(Text, Pattern) of
-        nomatch -> 
-            error(nomatch);
-        {Start, Length} when Start + 1 + Length < TextSize ->
-            %% We have a match, and we can search..
-            case binary:match(Text, Pattern, [{scope, {Start+1, TextSize-Start-1}}]) of
-                nomatch -> true;
-                {_, _} -> false
-            end;
-        {_, _} ->
-            true
-    end.
-
-
 %%
 %% Helpers
 %%
 
-% @doc Return true iff binary is a single character.
-single_char(<<>>) -> false;
-single_char(<<_C/utf8>>) -> true;
-single_char(Bin) when is_binary(Bin) -> false.
-
 % @doc Return true iff A is a prefix of B
 is_prefix(A, B) when size(A) > size(B) ->
     false;
@@ -806,26 +1096,22 @@ is_suffix(A, B) ->
     size(A) =:= binary:longest_common_suffix([A, B]).
 
 %
-match_front(X1, Y1, A, M, B, N) when X1 < M andalso Y1 < N ->
-    case array:get(X1, A) =:= array:get(Y1, B) of
-        true -> 
-	    match_front(X1+1, Y1+1, A, M, B, N);
-        false -> 
-	    {X1, Y1}
-    end;
+match_front(X1, Y1, A32, M, B32, N) when X1 < M andalso Y1 < N ->
+    APart = binary:part(A32, X1 * 4, (M - X1) * 4),
+    BPart = binary:part(B32, Y1 * 4, (N - Y1) * 4),
+    Steps = binary:longest_common_prefix([APart, BPart]) div 4,
+    {X1 + Steps, Y1 + Steps};
 match_front(X1, Y1, _, _, _, _) ->
     {X1, Y1}.
 
 %
-match_reverse(X1, Y1, A, M, B, N) when X1 < M andalso Y1 < N ->
-    case array:get(M-X1-1, A) =:= array:get(N-Y1-1, B) of
-        true -> 
-	    match_reverse(X1+1, Y1+1, A, M, B, N);
-        false -> 
-	    {X1, Y1}
-    end;
-match_reverse(X1, Y1, _, _, _, _) ->
-    {X1, Y1}.
+match_reverse(X2, Y2, A32, M, B32, N) when X2 < M andalso Y2 < N ->
+    APart = binary:part(A32, 0, (M - X2) * 4),
+    BPart = binary:part(B32, 0, (N - Y2) * 4),
+    Steps = binary:longest_common_suffix([APart, BPart]) div 4,
+    {X2 + Steps, Y2 + Steps};
+match_reverse(X2, Y2, _, _, _, _) ->
+    {X2, Y2}.
 
 
 %% Implementation of the for statement
@@ -837,10 +1123,8 @@ for(From, To, _Step, _Fun, State) when From >= To ->
     State;
 for(From, To, Step, Fun, State) ->
     case Fun(From, State) of
-        {continue, S1} ->
-            for(From + Step, To, Step, Fun, S1);
-        {break, S1} ->
-            S1
+        {continue, S1} -> for(From + Step, To, Step, Fun, S1);
+        {break, S1} -> S1
     end.
 
 split_pre_and_suffix(Text1, Text2) ->
@@ -859,144 +1143,58 @@ split_pre_and_suffix(Text1, Text2) ->
     {Prefix, MiddleText1, MiddleText2, Suffix}.
 
     
-% @doc Return the common prefix of Text1 and Text2. (utf8 aware)
+% @doc Return the common prefix of Text1 and Text2. Works on UTF-32 — always codepoint-aligned.
 common_prefix(Text1, Text2) ->
     Length = binary:longest_common_prefix([Text1, Text2]),
-    Prefix = binary:part(Text1, 0, Length),
-    
-    %% Utf-8 repair the tail of the prefix. It could contain a half utf-8 char.
-    {Prefix1, _} = repair_tail(Prefix),
-    Prefix1.
+    %% Round down to 4-byte boundary (should already be aligned for valid UTF-32).
+    binary:part(Text1, 0, (Length div 4) * 4).
 
-% @doc Return the common prefix of Text1 and Text2 (utf8 aware)
+% @doc Return the common suffix of Text1 and Text2. Works on UTF-32 — always codepoint-aligned.
 common_suffix(Text1, Text2) ->
     Length = binary:longest_common_suffix([Text1, Text2]),
-    Suffix = binary:part(Text1, size(Text1), -Length),
+    binary:part(Text1, byte_size(Text1), -((Length div 4) * 4)).
 
-    %% Utf-8 repair the head of the suffix. Could contain a half utf8 char
-    {_, Suffix1} = repair_head(Suffix),
-    Suffix1.
 
-
-% @doc Count the number of characters in a utf8 binary.
+% @doc Count the number of codepoints in a UTF-8 binary.
+% @deprecated Use text_size32/1 internally. This public function may be removed in a future version.
+-spec text_size(unicode:unicode_binary()) -> non_neg_integer().
 text_size(Text) when is_binary(Text) ->
-    text_size(Text, 0).
+    byte_size(to_utf32(Text)) div 4.
+
+% @doc Count the number of codepoints in a UTF-32 binary. O(1).
+text_size32(Text) when is_binary(Text) ->
+    byte_size(Text) div 4.
 
-text_size(<<>>, Count) ->
-    Count;
-text_size(<<_C/utf8, Rest/binary>>, Count) ->
-    text_size(Rest, Count+1);
-text_size(_, _) ->
-    error(badarg).
+% @doc Return true iff Text has fewer than Size codepoints. O(1) for UTF-32.
+text_smaller_than(_, 0) ->
+    false;
+text_smaller_than(Text, Size) ->
+    byte_size(Text) < Size * 4.
 
 %%
-%% Array utilities
+%% UTF-32 boundary helpers
 %%
 
-% @doc Create an array from a utf8 binary.
-array_from_binary(Bin) when is_binary(Bin) ->
-    array_from_binary(Bin, 0, array:new()).
-
-array_from_binary(<<>>, _N, Array) ->
-    array:fix(Array);
-array_from_binary(<<C/utf8, Rest/binary>>, N, Array) ->
-    array_from_binary(Rest, N+1, array:set(N, C, Array)).
-
-% @doc Create a binary from an array containing unicode characters.
-binary_from_array(Start, End, Array) ->
-    binary_from_array(Start, End, Array, <<>>).
-    
-binary_from_array(N, End, Array, Acc) when N < End ->
-    C = array:get(N, Array),
-    binary_from_array(N+1, End, Array, <<Acc/binary, C/utf8>>);
-binary_from_array(_, _, _, Acc) ->
-    Acc.
-
-%% @doc Checks the trailing bytes for utf8 prefix bytes.
-repair_tail(<<>>) ->
-    {<<>>, <<>>};
-%% Checks 
-repair_tail(Bin) ->
-    Size = size(Bin),
-    Size1 = Size-1, Size2 = Size-2, Size3 = Size-3, Size4 = Size-4,
-    case Bin of
-        %% Valid 1 -byte
-        <<_:Size1/binary, 2#0:1, _A:7>> ->
-             {Bin, <<>>}; 
-
-        %% Invalid 1-byte
-        <<Pre:Size1/binary, 2#110:3, A:5>> ->
-            {Pre, <<2#110:3, A:5>>};
-        <<Pre:Size1/binary, 2#1110:4, A:4>> ->
-            {Pre, <<2#1110:4, A:4>>};
-        <<Pre:Size1/binary, 2#11110:5, A:3>> ->
-            {Pre, <<2#11110:5, A:3>>};
-
-        %% Valid 2-byte ending
-        <<_:Size2/binary, 2#110:3, _A:5, 2#10:2, _B:6>> ->
-             {Bin, <<>>};
-
-        %% Invalid 2-byte ending
-        <<Pre:Size2/binary, 2#1110:4, A:4, 2#10:2, B:6>> ->
-            {Pre, <<2#1110:4, A:4, 2#10:2, B:6>>};
-        <<Pre:Size2/binary, 2#11110:5, A:3, 2#10:2, B:6>> ->
-            {Pre, <<2#11110:5, A:3, 2#10:2, B:6>>};
-
-        %% Valid 3-byte ending
-        <<_:Size3/binary, 2#1110:4, _A:4,  2#10:2, _B:6,  2#10:2, _C:6>> ->
-             {Bin, <<>>};
-
-        %% Invalid 3-byte ending
-        <<Pre:Size3/binary, 2#11110:5, A:3,  2#10:2, B:6, 2#10:2, C:6>> ->
-            {Pre, <<2#11110:5, A:3, 2#10:2, B:6, 2#10:2, C:6>>};
-
-        %% Valid 4-byte ending
-        <<_:Size4/binary, 2#11110:5, _A:3,  2#10:2, _B:6,   2#10:2, _C:6,  2#10:2, _D:6>> ->
-             {Bin, <<>>};
-
-        %% Illegal utf-8 sequence.
-        _ ->
-	    %% Can't repair it, just return
-	    {Bin, <<>>}
+% @doc Convert a UTF-8 binary to UTF-32, crashing on invalid input.
+to_utf32(Bin) ->
+    case unicode:characters_to_binary(Bin, utf8, utf32) of
+        Out when is_binary(Out) ->
+            Out;
+        {error, _, _} ->
+            error(badarg);
+        {incomplete, _, _} ->
+            error(badarg)
     end.
 
-% @doc Checks the beginning of a binary and strips of partial utf-8 encoded bytes.
-repair_head(<<>>) ->
-    {<<>>, <<>>};
-% valid 1-byte beginning
-repair_head(<<2#0:1, _A:7, _Rest/binary>>=Bin) ->
-    {<<>>, Bin};
-% valid 4-byte beginning
-repair_head(<<2#11110:5, _A:3,  2#10:2, _B:6, 2#10:2, _C:6,  2#10:2, _D:6, _Rest/binary>>=Bin) ->
-    {<<>>, Bin};
-% valid 3-byte beginning
-repair_head(<<2#1110:4, _A:4,  2#10:2, _B:6,  2#10:2, _C:6, _Rest/binary>>=Bin) ->
-    {<<>>, Bin};
-% invalid 3-byte beginning
-repair_head(<<2#10:2, A:6, 2#10:2, B:6, 2#10:2, C:6, Rest/binary>>) ->
-    {<<2#10:2, A:6, 2#10:2, B:6, 2#10:2, C:6>>, Rest};
-% valid 2-byte beginning
-repair_head(<<2#110:3, _A:5, 2#10:2, _B:6, _Rest/binary>>=Bin) ->
-    {<<>>, Bin};
-% invalid 2-byte beginnings
-repair_head(<<2#10:2, A:6, 2#10:2, B:6, Rest/binary>>) ->
-    {<<2#10:2, A:6, 2#10:2, B:6>>, Rest};
-% invalid 1-byte beginning
-repair_head(<<2#10:2, A:6, Rest/binary>>) ->
-    {<<2#10:2, A:6>>, Rest};
-repair_head(Bin) ->
-    %% Illegal sequence, can't repair it.
-    {<<>>, Bin}.
-
-
-%% This function can go away when we support OTP 20 and up.
-%%
-int_ceil(Number) ->
-    T = trunc(Number),
-    case (Number - T) of
-        Neg when Neg < 0 -> T;
-        Pos when Pos > 0 -> T + 1;
-        _ -> T
+% @doc Convert a UTF-32 binary to UTF-8, crashing on invalid input.
+to_utf8(Bin) ->
+    case unicode:characters_to_binary(Bin, utf32, utf8) of
+        Out when is_binary(Out) ->
+            Out;
+        {error, _, _} ->
+            error(badarg);
+        {incomplete, _, _} ->
+            error(badarg)
     end.
 
 %%
@@ -1007,50 +1205,11 @@ int_ceil(Number) ->
 
 -include_lib("eunit/include/eunit.hrl").
 
-repair_tail_test() ->
-    ?assertEqual({<<>>, <<>>}, repair_tail(<<>>)),
-    ?assertEqual({<<"aap">>, <<>>}, repair_tail(<<"aap">>)),
-    ?assertEqual({<<200/utf8>>, <<>>}, repair_tail(<<200/utf8>>)),
-    ?assertEqual({<<600/utf8>>, <<>>}, repair_tail(<<600/utf8>>)),
-    ?assertEqual({<<1000/utf8>>, <<>>}, repair_tail(<<1000/utf8>>)),
-
-    ?assertEqual({<<"aap">>, <<200>>}, repair_tail(<<"aap", 200>>)),
-
-    ?assertEqual({<<"test">>, <<240, 159, 159>>}, repair_tail(<<116,101,115,116,240,159,159>>)),
-
-    ok.
-
-repair_head_test() -> 
-    ?assertEqual({<<>>, <<>>}, repair_head(<<>>)),
-    ?assertEqual({<<>>, <<"a">>}, repair_head(<<"a">>)),
-    ?assertEqual({<<>>, <<"aap">>}, repair_head(<<"aap">>)),
-    ?assertEqual({<<>>, <<200/utf8>>}, repair_head(<<200/utf8>>)),
-    ?assertEqual({<<>>, <<600/utf8>>}, repair_head(<<600/utf8>>)),
-    ?assertEqual({<<>>, <<1000/utf8>>}, repair_head(<<1000/utf8>>)),
-
-    %%
-    ?assertEqual({<<2#10:2, 10:6>>, <<"aap">>}, 
-        repair_head(<<2#10:2, 10:6, "aap">>)),
-    ?assertEqual({<<2#10:2, 60:6, 2#10:2, 10:6>>, <<"aap">>}, 
-        repair_head(<<2#10:2, 60:6, 2#10:2, 10:6, "aap">>)),
-    ?assertEqual({<<2#10:2, 60:6, 2#10:2, 10:6, 2#10:2, 13:6>>, <<"aap">>}, 
-        repair_head(<<2#10:2, 60:6, 2#10:2, 10:6, 2#10:2, 13:6, "aap">>)),
-
-    ok.
-    
-
 for_test() ->
     ?assertEqual(9, for(0, 10, fun(I, _N) -> {continue, I} end, undefined)),
     ?assertEqual(0, for(0, 10, fun(I, _N) -> {break, I} end, undefined)),
     ok.
 
-array_test() ->
-    ?assertEqual(20, array:size(array_from_binary(<<"de apen eten bananen">>))),
-    ?assertEqual(<<"broodje aap">>, binary_from_array(0, 11, array_from_binary(<<"broodje aap">>))),
-    ?assertEqual(<<"aa">>, binary_from_array(0, 2, array_from_binary(<<"aap">>))),
-    ?assertEqual(<<"ap">>, binary_from_array(1, 3, array_from_binary(<<"aap">>))),
-    ok.
-
 diff_utf8_test() ->
     ?assertEqual([{equal, <<208,174, 208,189, 208,184, 208,186, 208,190, 208,180>>}], 
         diff(<<208,174,208,189,208,184,208,186,208,190,208,180>>, 
@@ -1077,10 +1236,6 @@ diff_bisect_test() ->
                   {equal,<<" a banana">>}], diff_bisect(<<"fruit flies like a banana">>, 
                                                         <<"fruit flies eat a banana">>)),
 
-
-    %?assertEqual([{delete,<<"cat">>},
-    %              {insert,<<"map">>}], diff_bisect(<<"cat">>, <<"map">>)), 
-
     ?assertEqual([{delete,<<"c">>},
                   {insert,<<"m">>},
                   {equal,<<"a">>},
@@ -1096,139 +1251,278 @@ diff_bisect_test() ->
 
     ?assertEqual([{equal, <<"text">>}],
                  diff_bisect(<<"text">>, <<"text">>)),
-                 
 
     ok.
 
+%% half_match operates on UTF-32 internally; wrap inputs/outputs for testing.
+half_match_utf8(A, B) ->
+    case half_match(to_utf32(A), to_utf32(B)) of
+        undefined -> undefined;
+        {half_match, A1, A2, B1, B2, C} ->
+            {half_match, to_utf8(A1), to_utf8(A2), to_utf8(B1), to_utf8(B2), to_utf8(C)}
+    end.
+
 half_match_test() ->
-    ?assertEqual(undefined, half_match(<<"1234567890">>, <<"abcdef">>)),
-    ?assertEqual(undefined, half_match(<<"12345">>, <<"23">>)),
+    ?assertEqual(undefined, half_match_utf8(<<"1234567890">>, <<"abcdef">>)), ?assertEqual(undefined, half_match_utf8(<<"12345">>, <<"23">>)),
 
     %% Single Match
-    ?assertEqual({half_match, <<"12">>, <<"90">>, <<"a">>, <<"z">>, <<"345678">>}, 
-        half_match(<<"1234567890">>, <<"a345678z">>)),
+    ?assertEqual({half_match, <<"12">>, <<"90">>, <<"a">>, <<"z">>, <<"345678">>}, half_match_utf8(<<"1234567890">>, <<"a345678z">>)),
     ?assertEqual({half_match, <<"a">>, <<"z">>, <<"12">>, <<"90">>, <<"345678">>}, 
-        half_match(<<"a345678z">>, <<"1234567890">>)),
+        half_match_utf8(<<"a345678z">>, <<"1234567890">>)),
     ?assertEqual({half_match, <<"abc">>, <<"z">>, <<"1234">>, <<"0">>, <<"56789">>}, 
-        half_match(<<"abc56789z">>, <<"1234567890">>)),
+        half_match_utf8(<<"abc56789z">>, <<"1234567890">>)),
     ?assertEqual({half_match, <<"a">>, <<"xyz">>, <<"1">>, <<"7890">>, <<"23456">>}, 
-        half_match(<<"a23456xyz">>, <<"1234567890">>)),
+        half_match_utf8(<<"a23456xyz">>, <<"1234567890">>)),
 
     %% Multiple Matches
     ?assertEqual({half_match, <<"12123">>, <<"123121">>, <<"a">>, <<"z">>, <<"1234123451234">>}, 
-        half_match(<<"121231234123451234123121">>, <<"a1234123451234z">>)),
+        half_match_utf8(<<"121231234123451234123121">>, <<"a1234123451234z">>)),
 
     ?assertEqual({half_match, <<"">>, <<"-=-=-=-=-=">>, <<"x">>, <<"">>, <<"x-=-=-=-=-=-=-=">>}, 
-        half_match(<<"x-=-=-=-=-=-=-=-=-=-=-=-=">>, <<"xx-=-=-=-=-=-=-=">>)),
+        half_match_utf8(<<"x-=-=-=-=-=-=-=-=-=-=-=-=">>, <<"xx-=-=-=-=-=-=-=">>)),
 
     ?assertEqual({half_match, <<"-=-=-=-=-=">>, <<"">>, <<"">>, <<"y">>, <<"-=-=-=-=-=-=-=y">>}, 
-        half_match(<<"-=-=-=-=-=-=-=-=-=-=-=-=y">>, <<"-=-=-=-=-=-=-=yy">>)),
+        half_match_utf8(<<"-=-=-=-=-=-=-=-=-=-=-=-=y">>, <<"-=-=-=-=-=-=-=yy">>)),
 
-    % Non-optimal halfmatch.
-    % Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
     ?assertEqual({half_match, <<"qHillo">>, <<"w">>, <<"x">>, <<"Hulloy">>, <<"HelloHe">>}, 
-        half_match(<<"qHilloHelloHew">>, <<"xHelloHeHulloy">>)),
+        half_match_utf8(<<"qHilloHelloHew">>, <<"xHelloHeHulloy">>)),
+
+    ?assertEqual({half_match, <<"qHillo"/utf8>>, <<"w"/utf8>>, <<"x"/utf8>>, <<"eHull💯y"/utf8>>, <<"🐶🐱🐭🐹🐰H❤️"/utf8>>}, 
+        half_match_utf8(<<"qHillo🐶🐱🐭🐹🐰H❤️w"/utf8>>, <<"x🐶🐱🐭🐹🐰H❤️eHull💯y"/utf8>>)),
+
+    %% Unicode: é is 2 UTF-8 bytes but 1 codepoint (4 UTF-32 bytes).
+    %% With the old bug, size(Long) div 4 gave the wrong seed position
+    %% because byte_size in UTF-32 ≠ codepoint_count for multi-byte UTF-8 chars.
+    %% Long = éééééééééé (10 chars), Short = a + éééééééé + z (10 chars).
+    %% half_match should find the 8-char common section of é's.
+    E = <<233/utf8>>,
+    ULong = binary:copy(E, 10),
+    UShort = <<"a", (binary:copy(E, 8))/binary, "z">>,
+    UDiff = diff(ULong, UShort),
+    ?assertEqual(ULong, source_text(UDiff)),
+    ?assertEqual(UShort, destination_text(UDiff)),
+    %% The 8-char run of é must appear as a single equal op.
+    Equal8 = binary:copy(E, 8),
+    ?assert(lists:member({equal, Equal8}, UDiff)),
 
     ok.
 
-
+%% common_prefix/suffix operate on UTF-32; wrap for testing.
 common_prefix_test() ->
-    ?assertEqual(<<>>, common_prefix(<<"Text">>, <<"Next">>)),
-    ?assertEqual(<<"T">>, common_prefix(<<"Text">>, <<"Tax">>)),
-    ?assertEqual(<<"text">>, common_prefix(<<"text">>, <<"text">>)),
+    Prefix = fun(A, B) -> to_utf8(common_prefix(to_utf32(A), to_utf32(B))) end,
 
-    ?assertEqual(<<"test🟡"/utf8>>, common_prefix(<<"test🟡123"/utf8>>, <<"test🟡456"/utf8>>)),
+    ?assertEqual(<<>>, Prefix(<<"Text">>, <<"Next">>)),
+    ?assertEqual(<<"T">>, Prefix(<<"Text">>, <<"Tax">>)),
+    ?assertEqual(<<"text">>, Prefix(<<"text">>, <<"text">>)),
 
-    ?assertEqual(<<"test">>, common_prefix(<<"test🟢123"/utf8>>, <<"test🟡123"/utf8>>)),
-    ?assertEqual(<<"test">>, common_prefix(<<"test🟡123"/utf8>>, <<"test🟢123"/utf8>>)),
-    
-    ?assertEqual(<<"test">>, common_prefix(<<"test🟡123"/utf8>>, <<"test🔵123"/utf8>>)),
-    ?assertEqual(<<"test">>, common_prefix(<<"test🔵123"/utf8>>, <<"test🟡123"/utf8>>)),
+    ?assertEqual(<<"test🟡"/utf8>>, Prefix(<<"test🟡123"/utf8>>, <<"test🟡456"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test🟢123"/utf8>>, <<"test🟡123"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test🟡123"/utf8>>, <<"test🟢123"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test🟡123"/utf8>>, <<"test🔵123"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test🔵123"/utf8>>, <<"test🟡123"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test🟡123"/utf8>>, <<"test⚫️123"/utf8>>)),
+    ?assertEqual(<<"test">>, Prefix(<<"test⚫️123"/utf8>>, <<"test🟡123"/utf8>>)),
 
-    ?assertEqual(<<"test">>, common_prefix(<<"test🟡123"/utf8>>, <<"test⚫️123"/utf8>>)),
-    ?assertEqual(<<"test">>, common_prefix(<<"test⚫️123"/utf8>>, <<"test🟡123"/utf8>>)),
+    ok.
 
+common_suffix_test() ->
+    Suffix = fun(A, B) -> to_utf8(common_suffix(to_utf32(A), to_utf32(B))) end,
 
+    ?assertEqual(<<"ext">>, Suffix(<<"Text">>, <<"Next">>)),
+    ?assertEqual(<<>>, Suffix(<<"Text">>, <<"Tax">>)),
+    ?assertEqual(<<"text">>, Suffix(<<"text">>, <<"text">>)),
     ok.
 
+%% split_pre_and_suffix operates on UTF-32; wrap for testing.
+split_pre_and_suffix_test() ->
+    Split = fun(A, B) ->
+        {P, M1, M2, S} = split_pre_and_suffix(to_utf32(A), to_utf32(B)),
+        {to_utf8(P), to_utf8(M1), to_utf8(M2), to_utf8(S)}
+    end,
 
-common_suffix_test() ->
-    ?assertEqual(<<"ext">>, common_suffix(<<"Text">>, <<"Next">>)),
-    ?assertEqual(<<>>, common_suffix(<<"Text">>, <<"Tax">>)),
-    ?assertEqual(<<"text">>, common_suffix(<<"text">>, <<"text">>)),
+    ?assertEqual({<<>>, <<>>, <<>>, <<>>}, Split(<<>>, <<>>)),
+    ?assertEqual({<<>>, <<"a">>, <<"b">>, <<>>}, Split(<<"a">>, <<"b">>)),
+    ?assertEqual({<<"a">>, <<"b">>, <<"c">>, <<"d">>}, Split(<<"abd">>, <<"acd">>)),
+    ?assertEqual({<<"aa">>, <<"bb">>, <<"cc">>, <<"dd">>}, Split(<<"aabbdd">>, <<"aaccdd">>)),
+    ?assertEqual({<<"aa">>, <<"bb">>, <<"c">>, <<"dd">>}, Split(<<"aabbdd">>, <<"aacdd">>)),
+    ?assertEqual({<<"cat ">>, <<>>, <<"mouse dog ">>, <<>>},
+                 Split(<<"cat ">>, <<"cat mouse dog ">>)),
     ok.
 
-split_pre_and_suffix_test() ->
-    ?assertEqual({<<>>, <<>>, <<>>, <<>>}, split_pre_and_suffix(<<>>, <<>>)),
+text_smaller_than_test() ->
+    %% text_smaller_than now works on UTF-32 binaries.
+    ?assertEqual(true,  text_smaller_than(to_utf32(<<>>), 5)),
+    ?assertEqual(true,  text_smaller_than(to_utf32(<<>>), 1)),
+    ?assertEqual(false, text_smaller_than(to_utf32(<<>>), 0)),
+    ?assertEqual(false, text_smaller_than(to_utf32(<<"abc">>), 0)),
+    ?assertEqual(false, text_smaller_than(to_utf32(<<"abc">>), 1)),
+    ?assertEqual(true,  text_smaller_than(to_utf32(<<"abc">>), 4)),
+
+    %% Multi-byte UTF-8 characters each become exactly 4 bytes in UTF-32.
+    Utf32 = to_utf32(<<1046/utf8, 1011/utf8, 1022/utf8, 127/utf8>>),
+    ?assertEqual(true,  text_smaller_than(Utf32, 5)),
+    ?assertEqual(false, text_smaller_than(Utf32, 4)),
 
-    ?assertEqual({<<>>, <<"a">>, <<"b">>, <<>>}, split_pre_and_suffix(<<"a">>, <<"b">>)),
-    
-    ?assertEqual({<<"a">>, <<"b">>, <<"c">>, <<"d">>}, 
-       split_pre_and_suffix(<<"abd">>, <<"acd">>)),
-    ?assertEqual({<<"aa">>, <<"bb">>, <<"cc">>, <<"dd">>}, 
-       split_pre_and_suffix(<<"aabbdd">>, <<"aaccdd">>)),
-    ?assertEqual({<<"aa">>, <<"bb">>, <<"c">>, <<"dd">>}, 
-       split_pre_and_suffix(<<"aabbdd">>, <<"aacdd">>)),
+    ok.
 
-    ?assertEqual({<<"cat ">>, <<>>, <<"mouse dog ">>, <<>>},
-                 split_pre_and_suffix(<<"cat ">>, <<"cat mouse dog ">>)),
+lines_to_chars_test() ->
+    %% lines_to_chars takes UTF-32 input, returns UTF-32 index sequences and UTF-32 lines.
+    {C1, C2, Lines} = lines_to_chars(to_utf32(<<>>), to_utf32(<<>>)),
+    ?assertEqual(<<>>, C1),
+    ?assertEqual(<<>>, C2),
+    ?assertEqual([], Lines),
+
+    {C3, C4, Lines2} = lines_to_chars(to_utf32(<<"hello\nworld\n">>), to_utf32(<<"hello\nmaas\n">>)),
+    %% Lines are stored as UTF-32 binaries.
+    ?assertEqual([to_utf32(<<"hello\n">>), to_utf32(<<"world\n">>), to_utf32(<<"maas\n">>)], Lines2),
+    ?assertEqual(<<0:32, 1:32>>, C3),
+    ?assertEqual(<<0:32, 2:32>>, C4),
 
-    ok. 
+    ok.
+
+diff_linemode_test() ->
+    ?assertEqual([{equal, <<"hello\n">>}, {delete, <<"world\n">>}, {insert, <<"maas\n">>}], 
+        diff_linemode(<<"hello\nworld\n">>, <<"hello\nmaas\n">>)),
 
-unique_match_test() ->
-    ?assertEqual(true, unique_match(<<"a">>, <<"abc">>)),
-    ?assertEqual(true, unique_match(<<"b">>, <<"abc">>)),
-    ?assertEqual(true, unique_match(<<"c">>, <<"abc">>)),
-    ?assertEqual(false, unique_match(<<"ab">>, <<"abab">>)),
     ok.
 
+diff_options_test() ->
+    A = <<"cat">>,
+    B = <<"map">>,
 
-text_smaller_than_test() ->
-    ?assertEqual(true, text_smaller_than(<<>>, 5)),
-    ?assertEqual(true, text_smaller_than(<<>>, 1)),
+    %% No options — same as diff/2.
+    ?assertEqual(diff(A, B), diff(A, B, [])),
+
+    %% no_linemode: result is structurally equivalent (same source/dest text).
+    NoLinemode = diff(A, B, [no_linemode]),
+    ?assertEqual(source_text(diff(A, B)),      source_text(NoLinemode)),
+    ?assertEqual(destination_text(diff(A, B)), destination_text(NoLinemode)),
+
+    %% semantic option applies cleanup_semantic to the raw diff.
+    ?assertEqual(cleanup_semantic(diff(A, B)), diff(A, B, [semantic])),
 
-    ?assertEqual(false, text_smaller_than(<<>>, 0)),
+    %% efficiency option applies cleanup_efficiency to the raw diff.
+    ?assertEqual(cleanup_efficiency(diff(A, B)), diff(A, B, [efficiency])),
 
-    ?assertEqual(false, text_smaller_than(<<"abc">>, 0)),
-    ?assertEqual(false, text_smaller_than(<<"abc">>, 1)),
-    ?assertEqual(true, text_smaller_than(<<"abc">>, 4)),
+    %% {efficiency, Cost} applies cleanup_efficiency/2 with the given cost.
+    ?assertEqual(cleanup_efficiency(diff(A, B), 2), diff(A, B, [{efficiency, 2}])),
 
-    %% Test if we count characters.
-    Utf8Binary = <<1046/utf8, 1011/utf8, 1022/utf8, 127/utf8>>,
-    ?assertEqual(true, size(Utf8Binary) > 5), % binary is larger due to utf8 encoding
-    ?assertEqual(true, text_smaller_than(Utf8Binary, 5)),
-    ?assertEqual(false, text_smaller_than(Utf8Binary, 4)),
+    %% Both: semantic first, then efficiency.
+    ?assertEqual(
+        cleanup_efficiency(cleanup_semantic(diff(A, B))),
+        diff(A, B, [semantic, efficiency])),
 
-    %% Test illegal utf8 sequence, the chars are counted as normal chars
-    ?assertEqual(false, text_smaller_than(<<149,157,112,8>>, 4)),
+    %% Order of options in list does not affect cleanup order.
+    ?assertEqual(
+        diff(A, B, [semantic, efficiency]),
+        diff(A, B, [efficiency, semantic])),
 
     ok.
 
-lines_to_chars_test() ->
-    ?assertEqual({<<>>, <<>>, []}, lines_to_chars(<<>>, <<>>)),
-
-    %% Simple text
-    ?assertEqual({<<0, 1>>, <<0, 2>>, [<<"hello\n">>, <<"world\n">>, <<"maas\n">>]}, 
-        lines_to_chars(<<"hello\n\world\n">>, <<"hello\nmaas\n">>)),
-
-    %% No newline at the end.
-    ?assertEqual({<<0, 1>>, <<0, 2>>, [<<"hello\n">>, <<"world\n">>, <<"maas">>]}, 
-        lines_to_chars(<<"hello\n\world\n">>, <<"hello\nmaas">>)),
-   
-    %% No newline at the end.
-    ?assertEqual({<<0, 1>>, <<0, 2>>, [<<"hello\n">>, <<"world\n">>, <<"maas">>]}, 
-        lines_to_chars(<<"hello\n\world\n">>, <<"hello\nmaas">>)),
-    
-    %% With empty lines 
-    ?assertEqual({<<0, 1, 2>>, <<0, 1, 3>>, [<<"hello\n">>, <<"\n">>, <<"world\n">>, <<"maas">>]}, 
-        lines_to_chars(<<"hello\n\nworld\n">>, <<"hello\n\nmaas">>)),
+seed_test() ->
+    %% 1. Empty binary: no codepoints, seed is empty.
+    ?assertEqual({0, <<>>}, seed(<<>>, 0)),
+
+    %% 2. Binary shorter than 4 codepoints (3 codepoints): 3 div 4 = 0, seed is empty.
+    Short3 = to_utf32(<<"abc">>),
+    ?assertEqual({0, <<>>}, seed(Short3, 0)),
+
+    %% 3. Exactly 4 codepoints, Start=0: seed is 1 codepoint (the first one).
+    Exact4 = to_utf32(<<"abcd">>),
+    ?assertEqual({0, to_utf32(<<"a">>)}, seed(Exact4, 0)),
+
+    %% 4. 8 codepoints, Start=0: seed is 2 codepoints starting at offset 0.
+    Long8 = to_utf32(<<"12345678">>),
+    ?assertEqual({0, to_utf32(<<"12">>)}, seed(Long8, 0)),
+
+    %% 5. 16 codepoints, Start=8 (8 bytes = 2 codepoints * 4 bytes/codepoint):
+    %%    seed is 4 codepoints; returned Start equals 8 and seed bytes are the correct slice.
+    Long16 = to_utf32(<<"abcdefghijklmnop">>),
+    {S5, Seed5} = seed(Long16, 8),
+    ?assertEqual(8, S5),
+    ?assertEqual(to_utf32(<<"cdef">>), Seed5),
+
+    %% 6. ASCII text round-trip: "1234567890" (10 chars), seed at quarter-way offset.
+    Ascii10 = to_utf32(<<"1234567890">>),
+    %% TotalCodepoints=10, SeedCodepoints=2; Start=0 to keep the offset 4-byte-aligned.
+    {_, SeedAscii} = seed(Ascii10, 0),
+    ?assertEqual(<<"12">>, to_utf8(SeedAscii)),
+
+    %% 7. Multi-byte codepoint alignment: 10 Greek letters (2 UTF-8 bytes each, 4 UTF-32 bytes each).
+    Greek10 = to_utf32(<<"αβγδεζηθικ"/utf8>>),
+    {Start7, Seed7} = seed(Greek10, 0),
+    %% Returned Start is 0.
+    ?assertEqual(0, Start7),
+    %% Seed is 4-byte-aligned.
+    ?assertEqual(0, byte_size(Seed7) rem 4),
+    %% Seed length = (10 div 4) * 4 = 8 bytes = 2 codepoints.
+    ?assertEqual((10 div 4) * 4, byte_size(Seed7)),
+    %% Seed decodes back to the first 2 Greek letters.
+    ?assertEqual(<<"αβ"/utf8>>, to_utf8(Seed7)),
+
+    %% 8. Emoji (4-byte UTF-8 codepoints): 10 emoji, seed is first 2.
+    Emoji10 = to_utf32(<<"🐶🐱🐭🐹🐰🐨🐯🦁🐮🐷"/utf8>>),
+    {_, SeedEmoji} = seed(Emoji10, 0),
+    %% Seed length = (10 div 4) * 4 = 8 bytes = 2 codepoints.
+    ?assertEqual((10 div 4) * 4, byte_size(SeedEmoji)),
+    %% Seed decodes back to the first 2 emoji.
+    ?assertEqual(<<"🐶🐱"/utf8>>, to_utf8(SeedEmoji)),
+
+    %% 9. Seed start offset preserved: non-zero Start is returned unchanged.
+    Long12 = to_utf32(<<"abcdefghijkl">>),
+    {Start9, _} = seed(Long12, 8),
+    ?assertEqual(8, Start9),
+
+    %% 10. Seed is a contiguous slice of Long: binary:part(Long, Start, byte_size(Seed)) =:= Seed.
+    Long20 = to_utf32(<<"abcdefghijklmnopqrst">>),
+    {Start10, Seed10} = seed(Long20, 8),
+    ?assertEqual(Seed10, binary:part(Long20, Start10, byte_size(Seed10))),
 
     ok.
 
+aligned_utf32_match_test() ->
+    ?assertEqual(nomatch, aligned_utf32_match(<<>>, <<0,0,0,0>>, 0)),
+    ?assertEqual(nomatch, aligned_utf32_match(<<>>, <<0,0,0,0>>, 4)),
 
-diff_linemode_test() ->
-    ?assertEqual([{equal, <<"hello\n">>}, {delete, <<"world\n">>}, {insert, <<"maas\n">>}], 
-        diff_linemode(<<"hello\nworld\n">>, <<"hello\nmaas\n">>)),
+    ?assertError(function_clause, aligned_utf32_match(<<>>, <<0,0,0,0>>, 3)),
+
+    ?assertEqual({0, 4}, aligned_utf32_match(<<1,2,3,4>>, <<1,2,3,4>>, 0)),
+    ?assertEqual({4, 4}, aligned_utf32_match(<<0,0,0,0, 1,2,3,4>>, <<1,2,3,4>>, 0)),
+
+    %% These will binary match, but the match is not on a utf32 boundary
+    ?assertEqual(nomatch, aligned_utf32_match(<<0,0,1,2, 3,4,5,6>>, <<1,2,3,4>>, 0)),
+    ?assertEqual({8,4}, aligned_utf32_match(<<0,0,1,2, 3,4,5,6, 1,2,3,4>>, <<1,2,3,4>>, 0)),
+    ?assertEqual({8,4}, aligned_utf32_match(<<0,0,1,2, 3,4,5,6, 1,2,3,4>>, <<1,2,3,4>>, 4)),
+    ?assertEqual(nomatch, aligned_utf32_match(<<0,0,1,2, 3,4,5,1, 2,3,4,0>>, <<1,2,3,4>>, 4)),
+
+    %% Some longer matches
+    ?assertEqual({40, 20}, aligned_utf32_match(to_utf32(<<"the quick brown fox jumps over the lazy dog"/utf8>>),
+                                               to_utf32(<<"brown"/utf8>>), 0)),
+    ?assertEqual(nomatch, aligned_utf32_match(to_utf32(<<"the quick brown fox jumps over the lazy dog"/utf8>>),
+                                              to_utf32(<<"blue"/utf8>>), 0)),
+
+    %% All emoticon matches emoticons
+    ?assertEqual(nomatch, aligned_utf32_match(to_utf32(<<"😔😟😕🙁☹️😣😖😫😩🥺🥶"/utf8>>),
+                                              to_utf32(<<"💩"/utf8>>), 0)),
+    ?assertEqual({16,12}, aligned_utf32_match(to_utf32(<<"😔😟😕🙁☹️💩😣😖😫😩🥺🥶"/utf8>>),
+                                              to_utf32(<<"☹️💩"/utf8>>), 0)),
+
+    ok.
+
+common_overlap_test() ->
+    A = to_utf32(<<"Fire at Will">>),
+    B = to_utf32(<<"William Riker is number one">>),
+    ?assertEqual(4, common_overlap(A, B)),
+    ok.
+
+common_overlap_loop_test() ->
+    Abc = to_utf32(<<"abc">>),
+    Cde = to_utf32(<<"cde">>),
+    ?assertEqual(1, common_overlap_loop(Abc, Cde, text_size32(Cde), 0, 1)),
+
+    Abcdef = to_utf32(<<"abcdef">>),
+    Efde = to_utf32(<<"efde">>),
+    ?assertEqual(2, common_overlap_loop(Abcdef, Efde, text_size32(Efde), 0, 1)),
 
     ok.
 
diff --git a/src/diffy_simple_patch.erl b/src/diffy_simple_patch.erl
index 66a6ae8..29b5753 100644
--- a/src/diffy_simple_patch.erl
+++ b/src/diffy_simple_patch.erl
@@ -2,8 +2,9 @@
 %% @copyright 2014 Maas-Maarten Zeeman
 %%
 %% @doc Diffy, an erlang diff match and patch implementation 
+%% @end
 %%
-%% Copyright 2014 Maas-Maarten Zeeman
+%% Copyright 2014-2026 Maas-Maarten Zeeman
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
diff --git a/src/diffy_term.erl b/src/diffy_term.erl
index ed25387..23c23b6 100644
--- a/src/diffy_term.erl
+++ b/src/diffy_term.erl
@@ -3,8 +3,9 @@
 %%
 %% @doc Diffy, an erlang diff match and patch implementation 
 %%      Adapted from diffy.erl for simple diff on a list of Erlang terms
+%% @end
 %%
-%% Copyright 2014-2015 Maas-Maarten Zeeman, Marc Worrell
+%% Copyright 2014-2026 Maas-Maarten Zeeman, Marc Worrell
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -25,11 +26,10 @@
     diff/2
 ]).
 
--type diff_op() :: delete | equal | insert.
--type diff() :: {diff_op(), term()}.
+-type diff() :: {diffy:diff_op(), term()}.
 -type diffs() :: list(diff()).
 
--export_type([ diffs/0 ]).
+-export_type([ diff/0, diffs/0 ]).
 
 -spec diff(list(), list()) -> diffs().
 diff(A, A) ->
@@ -41,7 +41,7 @@ diff([], B) ->
 diff(A, B) when is_list(A), is_list(B) ->
     {Dict0, N} = term_dict(A, dict:new(), 0),
     {Dict, _N} = term_dict(B, Dict0, N),
-    Diff = diffy:diff(map_terms(A, Dict), map_terms(B, Dict)),
+    Diff = diffy:diff(map_terms(A, Dict), map_terms(B, Dict), [no_linemode]),
     unmap_diff(Diff, Dict).
 
 term_dict([], D, N) ->
@@ -68,7 +68,6 @@ unmap_diff_1({Op, B}, RDict) ->
     {Op, [ dict:fetch(C, RDict) || C <- Cs ]}.
 
 
-
 -ifdef(TEST).
 
 -include_lib("eunit/include/eunit.hrl").
@@ -88,7 +87,5 @@ diffy_term_test() ->
         diffy_term:diff([a,b,c,d,e], [a,e,b,c,d])),
     ok.
 
-
 -endif.
 
-
diff --git a/test/diffy_tests.erl b/test/diffy_tests.erl
index 42065a4..c7c9781 100644
--- a/test/diffy_tests.erl
+++ b/test/diffy_tests.erl
@@ -1,9 +1,9 @@
 %% @author Maas-Maarten Zeeman <mmzeeman@xs4all.nl>
-%% @copyright 2014 Maas-Maarten Zeeman
+%% @copyright 2014-2026 Maas-Maarten Zeeman
 %%
 %% @doc Diffy, an erlang diff match and patch implementation 
 %%
-%% Copyright 2014 Maas-Maarten Zeeman
+%% Copyright 2014-2026 Maas-Maarten Zeeman
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -23,32 +23,59 @@
 -include_lib("proper/include/proper.hrl").
 -include_lib("eunit/include/eunit.hrl").
 
+-dialyzer({no_opaque, [
+    cleanup_merge_prop_test/0,
+    cleanup_efficiency_prop_test/0,
+    cleanup_semantic_prop_test/0,
+    random_inner_diff_prop_test/0,
+    random_diffs_prop_test/0
+]}).
+
+-define(NUM_TESTS, 500).
+
 %%
 %% Properties
 %%
 
 prop_cleanup_merge() ->
-    ?FORALL(Diffs, diffy:diffs(),
+    ?FORALL(Diffs, list({diff_op(), proper_unicode:utf8()}),
         begin
             SourceText = diffy:source_text(Diffs),
             DestinationText = diffy:destination_text(Diffs),
-
             CleanDiffs = cleanup_merge(Diffs),
 
-            SourceText == diffy:source_text(CleanDiffs) andalso
-            DestinationText == diffy:destination_text(CleanDiffs)
+            SourceText =:= diffy:source_text(CleanDiffs)
+            andalso DestinationText =:= diffy:destination_text(CleanDiffs)
+        end).
+
+prop_cleanup_merge_idempotent() ->
+    ?FORALL(Diffs, list({diff_op(), proper_unicode:utf8()}),
+        begin
+            % Cleaning the diffs again should not result in more changes
+            Cleaned = cleanup_merge(Diffs),
+            Cleaned =:= cleanup_merge(Cleaned)
         end).
 
 prop_cleanup_efficiency() ->
-    ?FORALL(Diffs, diffy:diffs(),
+    ?FORALL(Diffs, list({diff_op(), proper_unicode:utf8()}),
         begin
             SourceText = diffy:source_text(Diffs),
             DestinationText = diffy:destination_text(Diffs),
-
             EfficientDiffs = cleanup_efficiency(Diffs),
 
-            SourceText == diffy:source_text(EfficientDiffs) andalso
-            DestinationText == diffy:destination_text(EfficientDiffs)
+            SourceText =:= diffy:source_text(EfficientDiffs)
+            andalso DestinationText =:= diffy:destination_text(EfficientDiffs)
+        end).
+
+prop_cleanup_semantic() ->
+    ?FORALL(Diffs, list({diff_op(), proper_unicode:utf8()}),
+        begin
+            SourceText = diffy:source_text(Diffs),
+            DestinationText = diffy:destination_text(Diffs),
+            SemanticDiffs = cleanup_semantic(Diffs),
+
+            SourceText =:= diffy:source_text(SemanticDiffs)
+            andalso DestinationText =:= diffy:destination_text(SemanticDiffs)
         end).
 
 html_like() ->
@@ -58,7 +85,7 @@ html_like() ->
                                         {2, utf8(4)},              % Some small portions of unicode chars.
                                         {2, range($0, $9)},        % numbers
                                         {2, $\s},                  % whitespace
-                                        {4,  $\n},                 % linebreaks
+                                        {4, $\n},                 % linebreaks
                                         {2, oneof([$., $-, $!, $?, $,])}   % punctuation
                                        ]))).
 
@@ -123,8 +150,13 @@ pretty_html_test() ->
     ?assertEqual(<<"<span>test</span>">>, pretty_html([{equal, <<"test">>}])),
     ?assertEqual(<<"<del style='background:#ffe6e6;'>foo</del><span>test</span>">>, 
         pretty_html([{delete, <<"foo">>}, {equal, <<"test">>}])),
+
     ?assertEqual(<<"<ins style='background:#e6ffe6;'>foo</ins><span>test</span>">>, 
         pretty_html([{insert, <<"foo">>}, {equal, <<"test">>}])),
+
+    %% escaping.
+    ?assertEqual(<<"<ins style='background:#e6ffe6;'>&lt;span&gt;foo&lt;/span&gt;</ins><span>&amp; &lt; &gt; &quot; &#39;</span>">>, 
+        pretty_html([{insert, <<"<span>foo</span>">>}, {equal, <<"& < > \" '">>}])),
     ok.
 
 source_text_test() ->
@@ -209,8 +241,26 @@ cleanup_merge_test() ->
 
     ok.
 
+%% delete/insert/delete — the two deletes merge, then insert must be re-checked
+%% against the equal below it, which should then slide
+requeue_i_test() ->
+    ?assertEqual([{delete, <<"aXa">>}, {insert, <<"b">>}],
+                 cleanup_merge([{delete, <<"a">>}, {insert, <<"b">>}, {delete, <<"Xa">>}])).
+
+%% Three consecutive deletes separated by inserts collapse correctly
+triple_delete_test() ->
+    ?assertEqual([{delete, <<"abc">>}, {insert, <<"xyz">>}],
+                 cleanup_merge([{delete, <<"a">>}, {insert, <<"x">>},
+                                {delete, <<"b">>}, {insert, <<"y">>},
+                                {delete, <<"c">>}, {insert, <<"z">>}])).
+
+%% After sliding, the two equals on either side should merge into one
+slide_merge_test() ->
+    ?assertEqual([{insert, <<"aX">>}, {equal, <<"abc">>}],
+                 cleanup_merge([{equal, <<"a">>}, {insert, <<"Xa">>}, {equal, <<"bc">>}])).
+
 cleanup_merge_prop_test() ->
-    ?assertEqual(true, proper:quickcheck(prop_cleanup_merge(), [{numtests, 500}, {to_file, user}])),
+    ?assertEqual(true, proper:quickcheck(prop_cleanup_merge(), [{numtests, ?NUM_TESTS}, {to_file, user}])),
     ok.
 
 cleanup_semantic_test() ->
@@ -225,27 +275,39 @@ cleanup_semantic_test() ->
     ?assertEqual([{delete, <<"abc">>}, {insert, <<"ABC">>}, {equal, <<"1234">>}, {delete, <<"wxyz">>}], 
         cleanup_semantic([{delete, <<"abc">>}, {insert, <<"ABC">>}, {equal, <<"1234">>}, {delete, <<"wxyz">>}])),
 
-    % % Simple elimination.
-    % ?assertEqual([{delete, <<"abc">>}, {insert, <<"b">>}], 
-    %     cleanup_semantic([{delete, <<"a">>}, {equal, <<"b">>}, {delete, <<"c">>}])),
+    % Simple elimination.
+    ?assertEqual([{delete, <<"abc">>}, {insert, <<"b">>}], 
+        cleanup_semantic([{delete, <<"a">>}, {equal, <<"b">>}, {delete, <<"c">>}])),
 
-    % % Multiple eliminations.
-    % ?assertEqual([{delete, <<"AB_AB">>}, {insert, <<"1A2_1A2">>}], 
-    %     cleanup_semantic([{insert, <<"1">>}, {equal, <<"A">>}, {delete, <<"B">>}, {insert, <<"2">>}, 
-    %         {equal, <<"_">>}, {insert, <<"1">>}, {equal, <<"A">>}, {delete, <<"B">>}, {insert, <<"2">>}])),
+    % Multiple eliminations.
+    ?assertEqual([{delete, <<"AB_AB">>}, {insert, <<"1A2_1A2">>}],
+        cleanup_semantic([{insert, <<"1">>}, {equal, <<"A">>}, {delete, <<"B">>}, {insert, <<"2">>},
+            {equal, <<"_">>}, {insert, <<"1">>}, {equal, <<"A">>}, {delete, <<"B">>}, {insert, <<"2">>}])),
+
+    % Regression test for UTF-8 data loss in cleanup_semantic_overlaps
+    % Ins1 = <<0,32,204,128,0,0>> (size 6, text_size 5)
+    % Ins2 = <<0,0,0,0,0,0,0,0>> (size 8, text_size 8)
+    % Total Dest size 14, text_size 13
+    Diffs = [{delete,<<0,0,0,0,0,0,0,0>>},{insert,<<0,32,204,128,0,0>>},{insert,<<0,0,0,0,0,0,0,0>>}],
+    Cleaned = cleanup_semantic(Diffs),
+    ?assertEqual(diffy:destination_text(Diffs), diffy:destination_text(Cleaned)),
 
     ok.
 
 cleanup_efficiency_prop_test() ->
-    ?assertEqual(true, proper:quickcheck(prop_cleanup_efficiency(), [{numtests, 500}, {to_file, user}])),
+    ?assertEqual(true, proper:quickcheck(prop_cleanup_efficiency(), [{numtests, ?NUM_TESTS}, {to_file, user}])),
+    ok.
+
+cleanup_semantic_prop_test() ->
+    ?assertEqual(true, proper:quickcheck(prop_cleanup_semantic(), [{numtests, ?NUM_TESTS}, {to_file, user}])),
     ok.
 
 random_diffs_prop_test() ->
-    ?assertEqual(true, proper:quickcheck(prop_make_diff(), [{numtests, 500}, {to_file, user}])),
+    ?assertEqual(true, proper:quickcheck(prop_make_diff(), [{numtests, ?NUM_TESTS}, {to_file, user}])),
     ok.
 
 random_inner_diff_prop_test() ->
-    ?assertEqual(true, proper:quickcheck(prop_inner_diff(), [{numtests, 500}, {to_file, user}])),
+    ?assertEqual(true, proper:quickcheck(prop_inner_diff(), [{numtests, ?NUM_TESTS}, {to_file, user}])),
     ok.
 
 cleanup_efficiency_test() ->
@@ -261,7 +323,7 @@ cleanup_efficiency_test() ->
         cleanup_efficiency([{delete, <<"ab">>}, {insert, <<"12">>}, {equal, <<"xyz">>}, {delete, <<"cd">>}, {insert, <<"34">>}])),
 
     % Three-edit elimination
-    ?assertEqual([{insert, <<"12x34">>}, {delete, <<"xcd">>}], 
+    ?assertEqual([{delete, <<"xcd">>}, {insert, <<"12x34">>}], 
         cleanup_efficiency([{insert, <<"12">>}, {equal, <<"x">>}, {delete, <<"cd">>}, {insert, <<"34">>}])),
 
     % Backpass elimination
@@ -320,11 +382,109 @@ diff_test() ->
                             <<"cat mouse dog ">>)),
     ok.
 
+  
+diff_linemode_corners_test() ->
+    %% Empty inputs.
+    ?assertEqual([], diffy:diff_linemode(<<>>, <<>>)),
+    ?assertEqual([{insert, <<"hello\n">>}], diffy:diff_linemode(<<>>, <<"hello\n">>)),
+    ?assertEqual([{delete, <<"hello\n">>}], diffy:diff_linemode(<<"hello\n">>, <<>>)),
+
+    %% Identical input — single equal op.
+    ?assertEqual([{equal, <<"hello\nworld\n">>}],
+        diffy:diff_linemode(<<"hello\nworld\n">>, <<"hello\nworld\n">>)),
+
+    %% No newline at end of file — last line treated as its own token.
+    ?assertEqual(
+        [{equal, <<"hello\n">>}, {delete, <<"world">>}, {insert, <<"maas">>}],
+        diffy:diff_linemode(<<"hello\nworld">>, <<"hello\nmaas">>)),
+
+    %% Blank lines — exercise is_blankline_start/end and the \n\n pattern.
+    %% The rediff within cleanup_line_diff splits b\n vs c\n at character level.
+    ?assertEqual(
+        [{equal, <<"a\n\n">>}, {delete, <<"b">>}, {insert, <<"c">>}, {equal, <<"\nd\n">>}],
+        diffy:diff_linemode(<<"a\n\nb\nd\n">>, <<"a\n\nc\nd\n">>)),
+
+    %% \r\n line endings — exercises the \r\n\r\n blankline pattern.
+    ?assertEqual(
+        [{equal, <<"hello\r\n">>}, {delete, <<"world\r\n">>}, {insert, <<"maas\r\n">>}],
+        diffy:diff_linemode(<<"hello\r\nworld\r\n">>, <<"hello\r\nmaas\r\n">>)),
+
+    %% Repeated lines — the same line appearing multiple times should reuse the same index.
+    ?assertEqual(
+        [{equal, <<"a\nb\na\n">>}, {insert, <<"b\n">>}],
+        diffy:diff_linemode(<<"a\nb\na\n">>, <<"a\nb\na\nb\n">>)),
+
+    %% Large enough to trigger linemode via compute_diff1 size threshold.
+    %% Build two texts that differ only in one line buried in > 100 chars of context.
+    Prefix = binary:copy(<<"padding line\n">>, 10),
+    Suffix = binary:copy(<<"trailing line\n">>, 10),
+    Text1 = <<Prefix/binary, "old line\n", Suffix/binary>>,
+    Text2 = <<Prefix/binary, "new line\n", Suffix/binary>>,
+    Diffs = diffy:diff(Text1, Text2),
+    %% Source and destination text must be preserved exactly.
+    ?assertEqual(Text1, diffy:source_text(Diffs)),
+    ?assertEqual(Text2, diffy:destination_text(Diffs)),
+    %% Must contain at least one delete and one insert — the changed line.
+    ?assert(lists:any(fun({delete, _}) -> true; (_) -> false end, Diffs)),
+    ?assert(lists:any(fun({insert, _}) -> true; (_) -> false end, Diffs)),
+
+    %% Multi-byte UTF-8 lines — verify encoding survives the linemode round-trip.
+    ?assertEqual(
+        [{equal, <<"héllo\n"/utf8>>}, {delete, <<"wörld\n"/utf8>>}, {insert, <<"wörlt\n"/utf8>>}],
+        diffy:diff_linemode(<<"héllo\nwörld\n"/utf8>>, <<"héllo\nwörlt\n"/utf8>>)),
+
+    %% cleanup_line_diff rediff path — two changed lines adjacent to an equal trigger
+    %% the rediff of accumulated delete+insert data.
+    T1 = <<"aaa\nbbb\nccc\n">>,
+    T2 = <<"aab\nbbc\nccc\n">>,
+    RediffDiffs = diffy:diff_linemode(T1, T2),
+    ?assertEqual(T1, diffy:source_text(RediffDiffs)),
+    ?assertEqual(T2, diffy:destination_text(RediffDiffs)),
+
+    ok.
+
+diff_options_test() ->
+    A = <<"one two x four five">>,
+    B = <<"one TWO x FOUR five">>,
+
+    %% No options — same as diff/2.
+    ?assertEqual(diffy:diff(A, B), diffy:diff(A, B, [])),
+
+    %% no_linemode: result is structurally equivalent (same source/dest text).
+    NoLinemode = diffy:diff(A, B, [no_linemode]),
+    ?assertEqual(diffy:source_text(diffy:diff(A, B)), diffy:source_text(NoLinemode)),
+    ?assertEqual(diffy:destination_text(diffy:diff(A, B)), diffy:destination_text(NoLinemode)),
+
+    %% semantic option applies cleanup_semantic to the raw diff.
+    ?assertEqual(diffy:cleanup_semantic(diffy:diff(A, B)), diffy:diff(A, B, [semantic])),
+
+    %% efficiency option applies cleanup_efficiency to the raw diff.
+    ?assertEqual(diffy:cleanup_efficiency(diffy:diff(A, B)), diffy:diff(A, B, [efficiency])),
+
+    %% {efficiency, Cost} applies cleanup_efficiency/2 with the given cost.
+    ?assertEqual(diffy:cleanup_efficiency(diffy:diff(A, B), 2), diffy:diff(A, B, [{efficiency, 2}])),
+
+    %% Both: semantic first, then efficiency.
+    ?assertEqual(
+        diffy:cleanup_efficiency(diffy:cleanup_semantic(diffy:diff(A, B))),
+        diffy:diff(A, B, [semantic, efficiency])),
+
+    %% Order of options in list does not affect cleanup order.
+    ?assertEqual(
+        diffy:diff(A, B, [semantic, efficiency]),
+        diffy:diff(A, B, [efficiency, semantic])),
+
+    ok.
+
+
 
 %%
 %% Helpers
 %%
 
+diff_op() ->
+    oneof([insert, delete, equal]).
+
 pretty_html(Diffs) ->
     iolist_to_binary(diffy:pretty_html(Diffs)).