From 2632be803cc92173f21278eda1d68a989f758b54 Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Fri, 30 Sep 2016 18:05:33 +0300 Subject: [PATCH 1/5] Added "escape" option for the advanced display of non-printable and wide characters --- examples/try_me_escape.pl | 59 +++++++++ lib/Data/Printer.pm | 209 +++++++++++++++++++++++++------ t/40-escape.t | 251 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 482 insertions(+), 37 deletions(-) create mode 100755 examples/try_me_escape.pl create mode 100644 t/40-escape.t diff --git a/examples/try_me_escape.pl b/examples/try_me_escape.pl new file mode 100755 index 0000000..7e3becc --- /dev/null +++ b/examples/try_me_escape.pl @@ -0,0 +1,59 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use open qw(:std :utf8); + +# This sample code is available to you so you +# can see Data::Printer working out of the box. +# It can be used as a quick way to test your +# color palette scheme! + +use DDP { + show_unicode => 1, + color => { escaped => 'white', }, +}; + +my $string = "string" # basiclatin + . "\0" # c0controls + . "with" # basiclatin + . "\x{A0}" # latin1 + . "vertical" # basiclatin + . "\x{0B}" # c0controls + . "tabulation," # basiclatin + . "\x{88}" # c1controls (HTS) + . "record" # basiclatin + . "\x{1E}" # c0controls + . "separator, new" # basiclatin + . "\n" # c0controls + . "line and " # basiclatin + . "\x{7F}" # c0controls (DEL) + . "юникод", # multibyte + ; + +print STDERR qq{as is: "$string"\n}; + + +print STDERR "as hex: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'hex', c1controls => 'hex', latin1 => 'hex', multibyte => 'hex' }, +; + +print STDERR "as char: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'char', c1controls => 'char', latin1 => 'char', multibyte => 'char', }, +; + +print STDERR "as picture: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'picture', c1controls => 'picture', latin1 => 'picture', multibyte => 'picture', }, +; + +print STDERR "as picture/hex: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'picture', c1controls => 'hex', latin1 => 'picture', multibyte => 'picture', }, +; \ No newline at end of file diff --git a/lib/Data/Printer.pm b/lib/Data/Printer.pm index 845a52d..c15f548 100644 --- a/lib/Data/Printer.pm +++ b/lib/Data/Printer.pm @@ -117,6 +117,60 @@ my $properties = { _tie => 0, # used internally }; +my %c0controls_char = ( + "\0" => '\0', # NULL (NUL) + "\a" => '\a', # BELL (BEL) + "\b" => '\b', # BACKSPACE (BS) + "\e" => '\e', # ESCAPE (ESC) + "\f" => '\f', # FORM FEED (FF) + "\n" => '\n', # LINE FEED (LF) + "\r" => '\r', # CARRIAGE RETURN (CR) + "\t" => '\t', # CHARACTER TABULATION (HT) +); + +my %c0controls_picture = ( + "\N{U+0000}" => "\N{U+2400}", # NULL (NUL) + "\N{U+0001}" => "\N{U+2401}", # START OF HEADING (SOH) + "\N{U+0002}" => "\N{U+2402}", # START OF TEXT (STX) + "\N{U+0003}" => "\N{U+2403}", # END OF TEXT (ETX) + "\N{U+0004}" => "\N{U+2404}", # END OF TRANSMISSION (EOT) + "\N{U+0005}" => "\N{U+2405}", # ENQUIRY (ENQ) + "\N{U+0006}" => "\N{U+2406}", # ACKNOWLEDGE (ACK) + "\N{U+0007}" => "\N{U+2407}", # BELL (BEL) + "\N{U+0008}" => "\N{U+2408}", # BACKSPACE (BS) + "\N{U+0009}" => "\N{U+2409}", # CHARACTER TABULATION (HT) + "\N{U+000A}" => "\N{U+240A}", # LINE FEED (LF) + "\N{U+000B}" => "\N{U+240B}", # LINE TABULATION (VT) + "\N{U+000C}" => "\N{U+240C}", # FORM FEED (FF) + "\N{U+000D}" => "\N{U+240D}", # CARRIAGE RETURN (CR) + "\N{U+000E}" => "\N{U+240E}", # SHIFT OUT (SO) + "\N{U+000F}" => "\N{U+240F}", # SHIFT IN (SI) + "\N{U+0010}" => "\N{U+2410}", # DATA LINK ESCAPE (DLE) + "\N{U+0011}" => "\N{U+2411}", # DEVICE CONTROL ONE (DC1) + "\N{U+0012}" => "\N{U+2412}", # DEVICE CONTROL TWO (DC2) + "\N{U+0013}" => "\N{U+2413}", # DEVICE CONTROL THREE (DC3) + "\N{U+0014}" => "\N{U+2414}", # DEVICE CONTROL FOUR (DC4) + "\N{U+0015}" => "\N{U+2415}", # NEGATIVE ACKNOWLEDGE (NAK) + "\N{U+0016}" => "\N{U+2416}", # SYNCHRONOUS IDLE (SYN) + "\N{U+0017}" => "\N{U+2417}", # END OF TRANSMISSION BLOCK (ETB) + "\N{U+0018}" => "\N{U+2418}", # CANCEL (CAN) + "\N{U+0019}" => "\N{U+2419}", # END OF MEDIUM (EM) + "\N{U+001A}" => "\N{U+241A}", # SUBSTITUTE (SUB) + "\N{U+001B}" => "\N{U+241B}", # ESCAPE (ESC) + "\N{U+001C}" => "\N{U+241C}", # INFORMATION SEPARATOR FOUR (FS) + "\N{U+001D}" => "\N{U+241D}", # INFORMATION SEPARATOR THREE (GS) + "\N{U+001E}" => "\N{U+241E}", # INFORMATION SEPARATOR TWO (RS) + "\N{U+001F}" => "\N{U+241F}", # INFORMATION SEPARATOR ONE (US) + "\N{U+007F}" => "\N{U+2421}", # DELETE (DEL) +); + +my %basiclatin_picture = ( + "\N{U+0020}" => "\N{U+2420}", # SPACE (SP) +); + +my %latin1_picture = ( + "\N{U+00A0}" => "\N{U+2423}", # SPACE (NBSP) +); sub import { my $class = shift; @@ -378,54 +432,135 @@ sub _is_number { } sub _escape_chars { - my ($str, $orig_color, $p) = @_; + my ( $str, $orig_color, $p ) = @_; - $orig_color = color( $orig_color ); + $orig_color = color($orig_color); my $esc_color = color( $p->{color}{escaped} ); - # if we're escaping everything then we don't need to keep swapping - # colors in and out, and we need to return right away because - # we no longer need to print_escapes - if ($p->{escape_chars} eq 'all') { - return $esc_color - . join('', map { sprintf '\x{%02x}', ord $_ } split //, $str) - . $orig_color + # for backward compatibility + if ( $p->{escape_chars} eq 'all' ) { + $p->{escape}->{c0controls} = 'hex'; + $p->{escape}->{basiclatin} = 'hex'; + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonascii' ) { + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonlatin1' ) { + $p->{escape}->{multibyte} = 'hex'; } - $str =~ s/\e/$esc_color\\e$orig_color/g if $p->{print_escapes}; - - if ($p->{escape_chars} eq 'nonascii') { - $str =~ s{([^\x{00}-\x{7f}]+)}{ - $esc_color - . (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1) - . $orig_color - }ge; - } elsif ($p->{escape_chars} eq 'nonlatin1') { - $str =~ s{([^\x{00}-\x{ff}]+)}{ - $esc_color - . (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1) . $orig_color - }ge; + # for backward compatibility + if ( $p->{print_escapes} ) { + $p->{escape}->{c0controls} = 'char'; } - if ($p->{print_escapes}) { - my %escaped = ( - "\n" => '\n', - "\r" => '\r', - "\t" => '\t', - "\f" => '\f', - "\b" => '\b', - "\a" => '\a', - ); - foreach my $k ( keys %escaped ) { - $str =~ s/$k/$esc_color$escaped{$k}$orig_color/g; + # color ON/OFF + my $color = 0; + + my @str = map { + my $esc_char = _escape_char( $_, $p ); + + if ($esc_char) { + if ( !$color ) { + $color = 1; + $esc_color . $esc_char; + } + else { + $esc_char; + } } - } - # always escape the null character - $str =~ s/\0/$esc_color\\0$orig_color/g; + else { + if ($color) { + $color = 0; + $orig_color . $_; + } + else { + $_; + } + } + + } split //, $str; + + # if the last character had been colored + push( @str, $orig_color ) if $color; - return $str; + return join( '', @str ); } +sub _escape_char { + my ( $c, $p ) = @_; + + my $n = ord $c; + + # C0 Controls (0..31, 127) (0000..001F, 007F) + if ( ( $n >= 0 and $n <= 31 ) or $n == 127 ) { + if ( $p->{escape}->{c0controls} ) { + if ( $p->{escape}->{c0controls} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{c0controls} eq 'char' ) { + $c0controls_char{$c} if ( exists $c0controls_char{$c} ); + } + elsif ( $p->{escape}->{c0controls} eq 'picture' ) { + $c0controls_picture{$c} if ( exists $c0controls_picture{$c} ); + } + } + else { + + # always escape the null character (well, almost always...) + if ( $c eq "\0" ) { + '\0'; + } + } + } + + # Basic Latin (32..126) (0020..007E) + elsif ( $n >= 32 and $n <= 126 ) { + if ( $p->{escape}->{basiclatin} ) { + if ( $p->{escape}->{basiclatin} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{basiclatin} eq 'picture' ) { + $basiclatin_picture{$c} if ( exists $basiclatin_picture{$c} ); + } + } + } + + # C1 Controls (128..159) (0080..009F) + elsif ( $n >= 128 and $n <= 159 ) { + if ( $p->{escape}->{c1controls} ) { + if ( $p->{escape}->{c1controls} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + } + } + + # Latin-1 Supplement (160..255) (00A0..00FF) + elsif ( $n >= 160 and $n <= 255 ) { + if ( $p->{escape}->{latin1} ) { + if ( $p->{escape}->{latin1} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{latin1} eq 'picture' ) { + $latin1_picture{$c} if ( exists $latin1_picture{$c} ); + } + } + } + + # To multibyte... and beyond! (256...) (0100...) + elsif ( $n >= 256 ) { + if ( $p->{escape}->{multibyte} ) { + if ( $p->{escape}->{multibyte} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + } + } +} sub ARRAY { my ($item, $p) = @_; diff --git a/t/40-escape.t b/t/40-escape.t new file mode 100644 index 0000000..1bfdcc4 --- /dev/null +++ b/t/40-escape.t @@ -0,0 +1,251 @@ +use strict; +use warnings; +use utf8; +use open qw(:std :utf8); + +use Test::More; + +BEGIN { + delete $ENV{ANSI_COLORS_DISABLED}; + delete $ENV{DATAPRINTERRC}; + use File::HomeDir::Test; # avoid user's .dataprinter + use_ok ('Term::ANSIColor'); + use_ok ( + 'Data::Printer', + return_value => 'dump', + colored => 1, + ); +}; + +my $ec = color('bright_red'); +my $oc = color('bright_yellow'); + +my @stuff = ( + + # C0 Controls + { original => "\0", + c0controls => { + hex => $ec . '\x{00}' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '␀' . $oc, + }, + basiclatin => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + c1controls => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + latin1 => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + multibyte => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + } + }, + { original => "\a", + c0controls => { + hex => $ec . '\x{07}' . $oc, + char => $ec . '\a' . $oc, + picture => $ec . '␇' . $oc, + } + }, + { original => "\b", + c0controls => { + hex => $ec . '\x{08}' . $oc, + char => $ec . '\b' . $oc, + picture => $ec . '␈' . $oc, + } + }, + { original => "\e", + c0controls => { + hex => $ec . '\x{1b}' . $oc, + char => $ec . '\e' . $oc, + picture => $ec . '␛' . $oc, + } + }, + { original => "\f", + c0controls => { + hex => $ec . '\x{0c}' . $oc, + char => $ec . '\f' . $oc, + picture => $ec . '␌' . $oc, + } + }, + { original => "\n", + c0controls => { + hex => $ec . '\x{0a}' . $oc, + char => $ec . '\n' . $oc, + picture => $ec . '␊' . $oc, + } + }, + { original => "\r", + c0controls => { + hex => $ec . '\x{0d}' . $oc, + char => $ec . '\r' . $oc, + picture => $ec . '␍' . $oc, + } + }, + { original => "\t", + c0controls => { + hex => $ec . '\x{09}' . $oc, + char => $ec . '\t' . $oc, + picture => $ec . '␉' . $oc, + } + }, + { original => "\x{1E}", + c0controls => { + hex => $ec . '\x{1e}' . $oc, + picture => $ec . '␞' . $oc, + } + }, + { original => "\x{7F}", + c0controls => { + hex => $ec . '\x{7f}' . $oc, + picture => $ec . '␡' . $oc, + } + }, + + # Basic Latin + { original => " ", + basiclatin => { + hex => $ec . '\x{20}' . $oc, + picture => $ec . '␠' . $oc, + } + }, + { original => "~", + basiclatin => { + hex => $ec . '\x{7e}' . $oc, + } + }, + + # C1 Controls + { original => "\x{80}", # PAD + c1controls => { + hex => $ec . '\x{80}' . $oc, + } + }, + { original => "\x{9F}", # APC + c1controls => { + hex => $ec . '\x{9f}' . $oc, + } + }, + + # Latin-1 Supplement + { original => "\x{A0}", # NBSP + latin1 => { + hex => $ec . '\x{a0}' . $oc, + picture => $ec . '␣' . $oc, + } + }, + { original => "ÿ", + latin1 => { + hex => $ec . '\x{ff}' . $oc, + } + }, + + # Multibyte + { original => "ё", + multibyte => { + hex => $ec . '\x{451}' . $oc, + } + }, + { original => "■", + multibyte => { + hex => $ec . '\x{ffed}' . $oc, + } + }, + { original => "😀", + multibyte => { + hex => $ec . '\x{1f600}' . $oc, + } + }, + + # Mix + { original => "string" # basiclatin + . "\0" # c0controls + . "with" # basiclatin + . "\x{A0}" # latin1 + . "vertical" # basiclatin + . "\x{0B}" # c0controls + . "tabulation," # basiclatin + . "\x{88}" # c1controls (HTS) + . "record" # basiclatin + . "\x{1E}" # c0controls + . "separator, new" # basiclatin + . "\n" # c0controls + . "line and " # basiclatin + . "\x{7F}" # c0controls (DEL) + . "юникод", # multibyte + c0controls => { + hex => "string${ec}\\x{00}${oc}with\x{A0}vertical${ec}\\x{0b}${oc}tabulation,\x{88}record${ec}\\x{1e}${oc}separator, new${ec}\\x{0a}${oc}line and ${ec}\\x{7f}${oc}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new${ec}\\n${oc}line and \x{7f}юникод", + picture => "string${ec}␀${oc}with\x{A0}vertical${ec}␋${oc}tabulation,\x{88}record${ec}␞${oc}separator, new${ec}␊${oc}line and ${ec}␡${oc}юникод", + }, + basiclatin => { + hex => ${ec} . '\x{73}\x{74}\x{72}\x{69}\x{6e}\x{67}' . '\0' . '\x{77}\x{69}\x{74}\x{68}' + . ${oc} . "\x{A0}" + . ${ec} . '\x{76}\x{65}\x{72}\x{74}\x{69}\x{63}\x{61}\x{6c}' + . ${oc} . "\x{0B}" + . ${ec} . '\x{74}\x{61}\x{62}\x{75}\x{6c}\x{61}\x{74}\x{69}\x{6f}\x{6e}\x{2c}' + . ${oc} . "\x{88}" + . ${ec} . '\x{72}\x{65}\x{63}\x{6f}\x{72}\x{64}' + . ${oc} . "\x{1E}" + . ${ec} . '\x{73}\x{65}\x{70}\x{61}\x{72}\x{61}\x{74}\x{6f}\x{72}\x{2c}\x{20}\x{6e}\x{65}\x{77}' + . ${oc} . "\n" + . ${ec} . '\x{6c}\x{69}\x{6e}\x{65}\x{20}\x{61}\x{6e}\x{64}\x{20}' + . ${oc} . "\x{7f}" . 'юникод', + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator,${ec}␠${oc}new\nline${ec}␠${oc}and${ec}␠${oc}\x{7f}юникод", + }, + c1controls => { + hex => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,${ec}\\x{88}${oc}record\x{1E}separator, new\nline and \x{7f}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + latin1 => { + hex => "string${ec}\\0${oc}with" + . ${ec} . '\x{a0}' + . ${oc} . "vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with${ec}␣${oc}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + multibyte => { + hex => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}" + . ${ec} . '\x{44e}\x{43d}\x{438}\x{43a}\x{43e}\x{434}' + . ${oc}, + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + }, +); + +for my $range ( qw(c0controls basiclatin c1controls latin1 multibyte) ) { + for my $esc ( qw(hex char picture) ) { + + foreach my $item (@stuff) { + + my $printed = join('', map { sprintf( '\x{%02x}', ord $_) } split //, $item->{original} ); + my $colored = $item->{$range}->{$esc} || $item->{original}; + + is( + p( $item->{original}, escape => {$range => $esc} ), + color('reset') + . '"' + . color('bright_yellow') + . $colored + . color('reset') + . '"', + qq{$range to $esc for "$printed"} + ); + } + } +} + +done_testing; From a3b863b31aa5ab9d369651695f6bd66ae3a0e83f Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Fri, 30 Sep 2016 18:05:33 +0300 Subject: [PATCH 2/5] Added "escape" option for the advanced display of non-printable and wide characters --- examples/try_me_escape.pl | 59 +++++++++ lib/Data/Printer.pm | 209 +++++++++++++++++++++++++------ t/40-escape.t | 251 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 482 insertions(+), 37 deletions(-) create mode 100755 examples/try_me_escape.pl create mode 100644 t/40-escape.t diff --git a/examples/try_me_escape.pl b/examples/try_me_escape.pl new file mode 100755 index 0000000..7e3becc --- /dev/null +++ b/examples/try_me_escape.pl @@ -0,0 +1,59 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use open qw(:std :utf8); + +# This sample code is available to you so you +# can see Data::Printer working out of the box. +# It can be used as a quick way to test your +# color palette scheme! + +use DDP { + show_unicode => 1, + color => { escaped => 'white', }, +}; + +my $string = "string" # basiclatin + . "\0" # c0controls + . "with" # basiclatin + . "\x{A0}" # latin1 + . "vertical" # basiclatin + . "\x{0B}" # c0controls + . "tabulation," # basiclatin + . "\x{88}" # c1controls (HTS) + . "record" # basiclatin + . "\x{1E}" # c0controls + . "separator, new" # basiclatin + . "\n" # c0controls + . "line and " # basiclatin + . "\x{7F}" # c0controls (DEL) + . "юникод", # multibyte + ; + +print STDERR qq{as is: "$string"\n}; + + +print STDERR "as hex: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'hex', c1controls => 'hex', latin1 => 'hex', multibyte => 'hex' }, +; + +print STDERR "as char: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'char', c1controls => 'char', latin1 => 'char', multibyte => 'char', }, +; + +print STDERR "as picture: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'picture', c1controls => 'picture', latin1 => 'picture', multibyte => 'picture', }, +; + +print STDERR "as picture/hex: "; +p $string, + print_escapes => 0, + escape => { c0controls => 'picture', c1controls => 'hex', latin1 => 'picture', multibyte => 'picture', }, +; \ No newline at end of file diff --git a/lib/Data/Printer.pm b/lib/Data/Printer.pm index 845a52d..c15f548 100644 --- a/lib/Data/Printer.pm +++ b/lib/Data/Printer.pm @@ -117,6 +117,60 @@ my $properties = { _tie => 0, # used internally }; +my %c0controls_char = ( + "\0" => '\0', # NULL (NUL) + "\a" => '\a', # BELL (BEL) + "\b" => '\b', # BACKSPACE (BS) + "\e" => '\e', # ESCAPE (ESC) + "\f" => '\f', # FORM FEED (FF) + "\n" => '\n', # LINE FEED (LF) + "\r" => '\r', # CARRIAGE RETURN (CR) + "\t" => '\t', # CHARACTER TABULATION (HT) +); + +my %c0controls_picture = ( + "\N{U+0000}" => "\N{U+2400}", # NULL (NUL) + "\N{U+0001}" => "\N{U+2401}", # START OF HEADING (SOH) + "\N{U+0002}" => "\N{U+2402}", # START OF TEXT (STX) + "\N{U+0003}" => "\N{U+2403}", # END OF TEXT (ETX) + "\N{U+0004}" => "\N{U+2404}", # END OF TRANSMISSION (EOT) + "\N{U+0005}" => "\N{U+2405}", # ENQUIRY (ENQ) + "\N{U+0006}" => "\N{U+2406}", # ACKNOWLEDGE (ACK) + "\N{U+0007}" => "\N{U+2407}", # BELL (BEL) + "\N{U+0008}" => "\N{U+2408}", # BACKSPACE (BS) + "\N{U+0009}" => "\N{U+2409}", # CHARACTER TABULATION (HT) + "\N{U+000A}" => "\N{U+240A}", # LINE FEED (LF) + "\N{U+000B}" => "\N{U+240B}", # LINE TABULATION (VT) + "\N{U+000C}" => "\N{U+240C}", # FORM FEED (FF) + "\N{U+000D}" => "\N{U+240D}", # CARRIAGE RETURN (CR) + "\N{U+000E}" => "\N{U+240E}", # SHIFT OUT (SO) + "\N{U+000F}" => "\N{U+240F}", # SHIFT IN (SI) + "\N{U+0010}" => "\N{U+2410}", # DATA LINK ESCAPE (DLE) + "\N{U+0011}" => "\N{U+2411}", # DEVICE CONTROL ONE (DC1) + "\N{U+0012}" => "\N{U+2412}", # DEVICE CONTROL TWO (DC2) + "\N{U+0013}" => "\N{U+2413}", # DEVICE CONTROL THREE (DC3) + "\N{U+0014}" => "\N{U+2414}", # DEVICE CONTROL FOUR (DC4) + "\N{U+0015}" => "\N{U+2415}", # NEGATIVE ACKNOWLEDGE (NAK) + "\N{U+0016}" => "\N{U+2416}", # SYNCHRONOUS IDLE (SYN) + "\N{U+0017}" => "\N{U+2417}", # END OF TRANSMISSION BLOCK (ETB) + "\N{U+0018}" => "\N{U+2418}", # CANCEL (CAN) + "\N{U+0019}" => "\N{U+2419}", # END OF MEDIUM (EM) + "\N{U+001A}" => "\N{U+241A}", # SUBSTITUTE (SUB) + "\N{U+001B}" => "\N{U+241B}", # ESCAPE (ESC) + "\N{U+001C}" => "\N{U+241C}", # INFORMATION SEPARATOR FOUR (FS) + "\N{U+001D}" => "\N{U+241D}", # INFORMATION SEPARATOR THREE (GS) + "\N{U+001E}" => "\N{U+241E}", # INFORMATION SEPARATOR TWO (RS) + "\N{U+001F}" => "\N{U+241F}", # INFORMATION SEPARATOR ONE (US) + "\N{U+007F}" => "\N{U+2421}", # DELETE (DEL) +); + +my %basiclatin_picture = ( + "\N{U+0020}" => "\N{U+2420}", # SPACE (SP) +); + +my %latin1_picture = ( + "\N{U+00A0}" => "\N{U+2423}", # SPACE (NBSP) +); sub import { my $class = shift; @@ -378,54 +432,135 @@ sub _is_number { } sub _escape_chars { - my ($str, $orig_color, $p) = @_; + my ( $str, $orig_color, $p ) = @_; - $orig_color = color( $orig_color ); + $orig_color = color($orig_color); my $esc_color = color( $p->{color}{escaped} ); - # if we're escaping everything then we don't need to keep swapping - # colors in and out, and we need to return right away because - # we no longer need to print_escapes - if ($p->{escape_chars} eq 'all') { - return $esc_color - . join('', map { sprintf '\x{%02x}', ord $_ } split //, $str) - . $orig_color + # for backward compatibility + if ( $p->{escape_chars} eq 'all' ) { + $p->{escape}->{c0controls} = 'hex'; + $p->{escape}->{basiclatin} = 'hex'; + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonascii' ) { + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonlatin1' ) { + $p->{escape}->{multibyte} = 'hex'; } - $str =~ s/\e/$esc_color\\e$orig_color/g if $p->{print_escapes}; - - if ($p->{escape_chars} eq 'nonascii') { - $str =~ s{([^\x{00}-\x{7f}]+)}{ - $esc_color - . (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1) - . $orig_color - }ge; - } elsif ($p->{escape_chars} eq 'nonlatin1') { - $str =~ s{([^\x{00}-\x{ff}]+)}{ - $esc_color - . (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1) . $orig_color - }ge; + # for backward compatibility + if ( $p->{print_escapes} ) { + $p->{escape}->{c0controls} = 'char'; } - if ($p->{print_escapes}) { - my %escaped = ( - "\n" => '\n', - "\r" => '\r', - "\t" => '\t', - "\f" => '\f', - "\b" => '\b', - "\a" => '\a', - ); - foreach my $k ( keys %escaped ) { - $str =~ s/$k/$esc_color$escaped{$k}$orig_color/g; + # color ON/OFF + my $color = 0; + + my @str = map { + my $esc_char = _escape_char( $_, $p ); + + if ($esc_char) { + if ( !$color ) { + $color = 1; + $esc_color . $esc_char; + } + else { + $esc_char; + } } - } - # always escape the null character - $str =~ s/\0/$esc_color\\0$orig_color/g; + else { + if ($color) { + $color = 0; + $orig_color . $_; + } + else { + $_; + } + } + + } split //, $str; + + # if the last character had been colored + push( @str, $orig_color ) if $color; - return $str; + return join( '', @str ); } +sub _escape_char { + my ( $c, $p ) = @_; + + my $n = ord $c; + + # C0 Controls (0..31, 127) (0000..001F, 007F) + if ( ( $n >= 0 and $n <= 31 ) or $n == 127 ) { + if ( $p->{escape}->{c0controls} ) { + if ( $p->{escape}->{c0controls} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{c0controls} eq 'char' ) { + $c0controls_char{$c} if ( exists $c0controls_char{$c} ); + } + elsif ( $p->{escape}->{c0controls} eq 'picture' ) { + $c0controls_picture{$c} if ( exists $c0controls_picture{$c} ); + } + } + else { + + # always escape the null character (well, almost always...) + if ( $c eq "\0" ) { + '\0'; + } + } + } + + # Basic Latin (32..126) (0020..007E) + elsif ( $n >= 32 and $n <= 126 ) { + if ( $p->{escape}->{basiclatin} ) { + if ( $p->{escape}->{basiclatin} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{basiclatin} eq 'picture' ) { + $basiclatin_picture{$c} if ( exists $basiclatin_picture{$c} ); + } + } + } + + # C1 Controls (128..159) (0080..009F) + elsif ( $n >= 128 and $n <= 159 ) { + if ( $p->{escape}->{c1controls} ) { + if ( $p->{escape}->{c1controls} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + } + } + + # Latin-1 Supplement (160..255) (00A0..00FF) + elsif ( $n >= 160 and $n <= 255 ) { + if ( $p->{escape}->{latin1} ) { + if ( $p->{escape}->{latin1} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + elsif ( $p->{escape}->{latin1} eq 'picture' ) { + $latin1_picture{$c} if ( exists $latin1_picture{$c} ); + } + } + } + + # To multibyte... and beyond! (256...) (0100...) + elsif ( $n >= 256 ) { + if ( $p->{escape}->{multibyte} ) { + if ( $p->{escape}->{multibyte} eq 'hex' ) { + sprintf( '\x{%02x}', $n ); + } + } + } +} sub ARRAY { my ($item, $p) = @_; diff --git a/t/40-escape.t b/t/40-escape.t new file mode 100644 index 0000000..1bfdcc4 --- /dev/null +++ b/t/40-escape.t @@ -0,0 +1,251 @@ +use strict; +use warnings; +use utf8; +use open qw(:std :utf8); + +use Test::More; + +BEGIN { + delete $ENV{ANSI_COLORS_DISABLED}; + delete $ENV{DATAPRINTERRC}; + use File::HomeDir::Test; # avoid user's .dataprinter + use_ok ('Term::ANSIColor'); + use_ok ( + 'Data::Printer', + return_value => 'dump', + colored => 1, + ); +}; + +my $ec = color('bright_red'); +my $oc = color('bright_yellow'); + +my @stuff = ( + + # C0 Controls + { original => "\0", + c0controls => { + hex => $ec . '\x{00}' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '␀' . $oc, + }, + basiclatin => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + c1controls => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + latin1 => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + }, + multibyte => { + hex => $ec . '\0' . $oc, + char => $ec . '\0' . $oc, + picture => $ec . '\0' . $oc, + } + }, + { original => "\a", + c0controls => { + hex => $ec . '\x{07}' . $oc, + char => $ec . '\a' . $oc, + picture => $ec . '␇' . $oc, + } + }, + { original => "\b", + c0controls => { + hex => $ec . '\x{08}' . $oc, + char => $ec . '\b' . $oc, + picture => $ec . '␈' . $oc, + } + }, + { original => "\e", + c0controls => { + hex => $ec . '\x{1b}' . $oc, + char => $ec . '\e' . $oc, + picture => $ec . '␛' . $oc, + } + }, + { original => "\f", + c0controls => { + hex => $ec . '\x{0c}' . $oc, + char => $ec . '\f' . $oc, + picture => $ec . '␌' . $oc, + } + }, + { original => "\n", + c0controls => { + hex => $ec . '\x{0a}' . $oc, + char => $ec . '\n' . $oc, + picture => $ec . '␊' . $oc, + } + }, + { original => "\r", + c0controls => { + hex => $ec . '\x{0d}' . $oc, + char => $ec . '\r' . $oc, + picture => $ec . '␍' . $oc, + } + }, + { original => "\t", + c0controls => { + hex => $ec . '\x{09}' . $oc, + char => $ec . '\t' . $oc, + picture => $ec . '␉' . $oc, + } + }, + { original => "\x{1E}", + c0controls => { + hex => $ec . '\x{1e}' . $oc, + picture => $ec . '␞' . $oc, + } + }, + { original => "\x{7F}", + c0controls => { + hex => $ec . '\x{7f}' . $oc, + picture => $ec . '␡' . $oc, + } + }, + + # Basic Latin + { original => " ", + basiclatin => { + hex => $ec . '\x{20}' . $oc, + picture => $ec . '␠' . $oc, + } + }, + { original => "~", + basiclatin => { + hex => $ec . '\x{7e}' . $oc, + } + }, + + # C1 Controls + { original => "\x{80}", # PAD + c1controls => { + hex => $ec . '\x{80}' . $oc, + } + }, + { original => "\x{9F}", # APC + c1controls => { + hex => $ec . '\x{9f}' . $oc, + } + }, + + # Latin-1 Supplement + { original => "\x{A0}", # NBSP + latin1 => { + hex => $ec . '\x{a0}' . $oc, + picture => $ec . '␣' . $oc, + } + }, + { original => "ÿ", + latin1 => { + hex => $ec . '\x{ff}' . $oc, + } + }, + + # Multibyte + { original => "ё", + multibyte => { + hex => $ec . '\x{451}' . $oc, + } + }, + { original => "■", + multibyte => { + hex => $ec . '\x{ffed}' . $oc, + } + }, + { original => "😀", + multibyte => { + hex => $ec . '\x{1f600}' . $oc, + } + }, + + # Mix + { original => "string" # basiclatin + . "\0" # c0controls + . "with" # basiclatin + . "\x{A0}" # latin1 + . "vertical" # basiclatin + . "\x{0B}" # c0controls + . "tabulation," # basiclatin + . "\x{88}" # c1controls (HTS) + . "record" # basiclatin + . "\x{1E}" # c0controls + . "separator, new" # basiclatin + . "\n" # c0controls + . "line and " # basiclatin + . "\x{7F}" # c0controls (DEL) + . "юникод", # multibyte + c0controls => { + hex => "string${ec}\\x{00}${oc}with\x{A0}vertical${ec}\\x{0b}${oc}tabulation,\x{88}record${ec}\\x{1e}${oc}separator, new${ec}\\x{0a}${oc}line and ${ec}\\x{7f}${oc}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new${ec}\\n${oc}line and \x{7f}юникод", + picture => "string${ec}␀${oc}with\x{A0}vertical${ec}␋${oc}tabulation,\x{88}record${ec}␞${oc}separator, new${ec}␊${oc}line and ${ec}␡${oc}юникод", + }, + basiclatin => { + hex => ${ec} . '\x{73}\x{74}\x{72}\x{69}\x{6e}\x{67}' . '\0' . '\x{77}\x{69}\x{74}\x{68}' + . ${oc} . "\x{A0}" + . ${ec} . '\x{76}\x{65}\x{72}\x{74}\x{69}\x{63}\x{61}\x{6c}' + . ${oc} . "\x{0B}" + . ${ec} . '\x{74}\x{61}\x{62}\x{75}\x{6c}\x{61}\x{74}\x{69}\x{6f}\x{6e}\x{2c}' + . ${oc} . "\x{88}" + . ${ec} . '\x{72}\x{65}\x{63}\x{6f}\x{72}\x{64}' + . ${oc} . "\x{1E}" + . ${ec} . '\x{73}\x{65}\x{70}\x{61}\x{72}\x{61}\x{74}\x{6f}\x{72}\x{2c}\x{20}\x{6e}\x{65}\x{77}' + . ${oc} . "\n" + . ${ec} . '\x{6c}\x{69}\x{6e}\x{65}\x{20}\x{61}\x{6e}\x{64}\x{20}' + . ${oc} . "\x{7f}" . 'юникод', + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator,${ec}␠${oc}new\nline${ec}␠${oc}and${ec}␠${oc}\x{7f}юникод", + }, + c1controls => { + hex => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,${ec}\\x{88}${oc}record\x{1E}separator, new\nline and \x{7f}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + latin1 => { + hex => "string${ec}\\0${oc}with" + . ${ec} . '\x{a0}' + . ${oc} . "vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}юникод", + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with${ec}␣${oc}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + multibyte => { + hex => "string${ec}\\0${oc}with\x{A0}vertical\x{0B}tabulation,\x{88}record\x{1E}separator, new\nline and \x{7f}" + . ${ec} . '\x{44e}\x{43d}\x{438}\x{43a}\x{43e}\x{434}' + . ${oc}, + char => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + picture => "string${ec}\\0${oc}with\x{A0}vertical\x{0b}tabulation,\x{88}record\x{1e}separator, new\nline and \x{7f}юникод", + }, + }, +); + +for my $range ( qw(c0controls basiclatin c1controls latin1 multibyte) ) { + for my $esc ( qw(hex char picture) ) { + + foreach my $item (@stuff) { + + my $printed = join('', map { sprintf( '\x{%02x}', ord $_) } split //, $item->{original} ); + my $colored = $item->{$range}->{$esc} || $item->{original}; + + is( + p( $item->{original}, escape => {$range => $esc} ), + color('reset') + . '"' + . color('bright_yellow') + . $colored + . color('reset') + . '"', + qq{$range to $esc for "$printed"} + ); + } + } +} + +done_testing; From 4f01ab76401c8bb8cdda8d5a180370eba54d0acc Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Fri, 30 Sep 2016 21:39:17 +0300 Subject: [PATCH 3/5] Test "print_escapes" for the string "mixed" is fixed (now colored the entire string instead of each character separately) --- t/30-print_escapes.t | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/t/30-print_escapes.t b/t/30-print_escapes.t index 389d9d0..d451643 100644 --- a/t/30-print_escapes.t +++ b/t/30-print_escapes.t @@ -60,7 +60,6 @@ foreach my $item (@stuff) { $mixed->{original} .= $item->{original}; $mixed->{unescaped} .= $item->{unescaped}; - $mixed->{colored} .= $colored; is( p( $item->{original} ), @@ -74,6 +73,11 @@ foreach my $item (@stuff) { ); } +$mixed->{colored} = color('bright_red') + . $mixed->{unescaped} + . color('bright_yellow') + ; + is( p( $mixed->{original} ), color('reset') From a4bae58242f0dccadc6fb28fc43447ebcbc365a2 Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Mon, 3 Oct 2016 12:14:42 +0300 Subject: [PATCH 4/5] Removed unnecessary default values --- lib/Data/Printer.pm | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/Data/Printer.pm b/lib/Data/Printer.pm index c15f548..2e98308 100644 --- a/lib/Data/Printer.pm +++ b/lib/Data/Printer.pm @@ -44,8 +44,6 @@ my $properties = { 'show_weak' => 1, 'show_readonly' => 0, 'show_lvalue' => 1, - 'print_escapes' => 0, - 'escape_chars' => 'none', 'quote_keys' => 'auto', 'scalar_quotes' => '"', 'use_prototypes' => 1, @@ -438,20 +436,22 @@ sub _escape_chars { my $esc_color = color( $p->{color}{escaped} ); # for backward compatibility - if ( $p->{escape_chars} eq 'all' ) { - $p->{escape}->{c0controls} = 'hex'; - $p->{escape}->{basiclatin} = 'hex'; - $p->{escape}->{c1controls} = 'hex'; - $p->{escape}->{latin1} = 'hex'; - $p->{escape}->{multibyte} = 'hex'; - } - elsif ( $p->{escape_chars} eq 'nonascii' ) { - $p->{escape}->{c1controls} = 'hex'; - $p->{escape}->{latin1} = 'hex'; - $p->{escape}->{multibyte} = 'hex'; - } - elsif ( $p->{escape_chars} eq 'nonlatin1' ) { - $p->{escape}->{multibyte} = 'hex'; + if ( defined $p->{escape_chars} ) { + if ( $p->{escape_chars} eq 'all' ) { + $p->{escape}->{c0controls} = 'hex'; + $p->{escape}->{basiclatin} = 'hex'; + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonascii' ) { + $p->{escape}->{c1controls} = 'hex'; + $p->{escape}->{latin1} = 'hex'; + $p->{escape}->{multibyte} = 'hex'; + } + elsif ( $p->{escape_chars} eq 'nonlatin1' ) { + $p->{escape}->{multibyte} = 'hex'; + } } # for backward compatibility From 9bdc6715d08e7c607e6a4e30a9679ec214b7c792 Mon Sep 17 00:00:00 2001 From: Mikhail Ivanov Date: Wed, 5 Oct 2016 21:22:56 +0300 Subject: [PATCH 5/5] Description of option "escape" is added --- lib/Data/Printer.pm | 63 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/lib/Data/Printer.pm b/lib/Data/Printer.pm index 2e98308..476800f 100644 --- a/lib/Data/Printer.pm +++ b/lib/Data/Printer.pm @@ -1,6 +1,7 @@ package Data::Printer; use strict; use warnings; +use utf8; use Term::ANSIColor qw(color colored); use Scalar::Util; use Sort::Naturally; @@ -1541,6 +1542,15 @@ customization options available, as shown below (with default values): print_escapes => 0, # print non-printable chars as "\n", "\t", etc. escape_chars => 'none', # escape chars into \x{...} form. Values are # "none", "nonascii", "nonlatin1", "all" + + escape => { + c0controls => 'picture', # Escape chars "c0controls" into "picture" form, + multibyte => 'hex', # and "multibyte" into "\x{XX}" form. + # Keys are "c0controls", "basiclatin", + # "c1controls", "latin1" and "multibyte". + # Values are "hex", "char", "picture". + }, + quote_keys => 'auto', # quote hash keys (1 for always, 0 for never). # 'auto' will quote when key is empty/space-only. scalar_quotes => '"', # the quote symbols to enclose scalar values @@ -1584,6 +1594,59 @@ customization options available, as shown below (with default values): Note: setting C to C<0> will also set C and C to C<0>. +=head2 Escaping of characters + +Occasionally there is necessity to display some characters differently from how +they are stored in the string. For example it can be non-printable characters +like newline character (LF) or multibyte characters of which you want to know +what bytes they consist of. + +In this case you can point out which characters and how should be displayed. + +The key 'which characters' is assigned by names of characters blocks from the Unicode table: + +=over 4 + +=item * C - C0 Controls (U+0000..U+001F, U+007F) + +=item * C - Basic Latin (U+0020..U+007E) + +=item * C - C1 Controls (U+0080..U+009F) + +=item * C - Latin-1 Supplement (U+00A0..U+00FF) + +=item * C - All the rest (U+0100...) + +=back + +Value 'how' is assigned by the following names: + +=over 4 + +=item * C - Hexadecimal escape sequence (for example C for the tab character) + +=item * C - Character escape sequence (for example C<\t> for the tab character) + +=item * C - Special Unicode pictures (for example C<␉> for the tab character) + +=back + +You can assign one or several blocks of characters and for each block a way of +displaying can be set. + +For example: + + use Data::Printer { + escape => { + c0controls => 'picture', + multibyte => 'hex' + } + }; + +In this example characters from block C0 Controls will be displayed as special +Unicode picturess and characters witn numbers more than FF will be displayed as +hexadecimal escape sequences. + =head1 FILTERS Data::Printer offers you the ability to use filters to override