|
@doc false |
|
def to_utf8(str) do |
|
utf8 = to_utf8(str, 0, 0, str, []) |
|
IO.iodata_to_binary(utf8) |
|
end |
|
|
|
@dialyzer {:no_improper_lists, to_utf8: 5, to_utf8_escape: 5} |
|
|
|
defp to_utf8(<<valid::utf8, rest::bytes>>, from, len, original, acc) do |
|
to_utf8(rest, from, len + utf8_size(valid), original, acc) |
|
end |
|
|
|
defp to_utf8(<<_invalid, rest::bytes>>, from, len, original, acc) do |
|
acc = [acc | binary_part(original, from, len)] |
|
to_utf8_escape(rest, from + len, 1, original, acc) |
|
end |
|
|
|
defp to_utf8(<<>>, from, len, original, acc) do |
|
[acc | binary_part(original, from, len)] |
|
end |
|
|
|
defp to_utf8_escape(<<valid::utf8, rest::bytes>>, from, len, original, acc) do |
|
acc = [acc | "�"] |
|
to_utf8(rest, from + len, utf8_size(valid), original, acc) |
|
end |
|
|
|
defp to_utf8_escape(<<_invalid, rest::bytes>>, from, len, original, acc) do |
|
to_utf8_escape(rest, from, len + 1, original, acc) |
|
end |
|
|
|
defp to_utf8_escape(<<>>, _from, _len, _original, acc) do |
|
[acc | "�"] |
|
end |
|
|
|
# UTF-8 encodes code points in one to four bytes |
|
@compile inline: [utf8_size: 1] |
|
defp utf8_size(codepoint) when codepoint <= 0x7F, do: 1 |
|
defp utf8_size(codepoint) when codepoint <= 0x7FF, do: 2 |
|
defp utf8_size(codepoint) when codepoint <= 0xFFFF, do: 3 |
|
defp utf8_size(codepoint) when codepoint <= 0x10FFFF, do: 4 |
Consider replacing
ch/lib/ch/row_binary.ex
Lines 619 to 658 in 972f8d5