Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions examples/try_me_escape.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use open qw(:std :utf8);

# This sample code is available to you so you
# can see Data::Printer working out of the box.
# It can be used as a quick way to test your
# color palette scheme!

use DDP {
show_unicode => 1,
color => { escaped => 'white', },
};

my $string = "string" # basiclatin
. "\0" # c0controls
. "with" # basiclatin
. "\x{A0}" # latin1
. "vertical" # basiclatin
. "\x{0B}" # c0controls
. "tabulation," # basiclatin
. "\x{88}" # c1controls (HTS)
. "record" # basiclatin
. "\x{1E}" # c0controls
. "separator, new" # basiclatin
. "\n" # c0controls
. "line and " # basiclatin
. "\x{7F}" # c0controls (DEL)
. "юникод", # multibyte
;

print STDERR qq{as is: "$string"\n};


print STDERR "as hex: ";
p $string,
print_escapes => 0,
escape => { c0controls => 'hex', c1controls => 'hex', latin1 => 'hex', multibyte => 'hex' },
;

print STDERR "as char: ";
p $string,
print_escapes => 0,
escape => { c0controls => 'char', c1controls => 'char', latin1 => 'char', multibyte => 'char', },
;

print STDERR "as picture: ";
p $string,
print_escapes => 0,
escape => { c0controls => 'picture', c1controls => 'picture', latin1 => 'picture', multibyte => 'picture', },
;

print STDERR "as picture/hex: ";
p $string,
print_escapes => 0,
escape => { c0controls => 'picture', c1controls => 'hex', latin1 => 'picture', multibyte => 'picture', },
;
276 changes: 237 additions & 39 deletions lib/Data/Printer.pm
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package Data::Printer;
use strict;
use warnings;
use utf8;
use Term::ANSIColor qw(color colored);
use Scalar::Util;
use Sort::Naturally;
Expand Down Expand Up @@ -44,8 +45,6 @@ my $properties = {
'show_weak' => 1,
'show_readonly' => 0,
'show_lvalue' => 1,
'print_escapes' => 0,
'escape_chars' => 'none',
'quote_keys' => 'auto',
'scalar_quotes' => '"',
'use_prototypes' => 1,
Expand Down Expand Up @@ -117,6 +116,60 @@ my $properties = {
_tie => 0, # used internally
};

my %c0controls_char = (
"\0" => '\0', # NULL (NUL)
"\a" => '\a', # BELL (BEL)
"\b" => '\b', # BACKSPACE (BS)
"\e" => '\e', # ESCAPE (ESC)
"\f" => '\f', # FORM FEED (FF)
"\n" => '\n', # LINE FEED (LF)
"\r" => '\r', # CARRIAGE RETURN (CR)
"\t" => '\t', # CHARACTER TABULATION (HT)
);

my %c0controls_picture = (
"\N{U+0000}" => "\N{U+2400}", # NULL (NUL)
"\N{U+0001}" => "\N{U+2401}", # START OF HEADING (SOH)
"\N{U+0002}" => "\N{U+2402}", # START OF TEXT (STX)
"\N{U+0003}" => "\N{U+2403}", # END OF TEXT (ETX)
"\N{U+0004}" => "\N{U+2404}", # END OF TRANSMISSION (EOT)
"\N{U+0005}" => "\N{U+2405}", # ENQUIRY (ENQ)
"\N{U+0006}" => "\N{U+2406}", # ACKNOWLEDGE (ACK)
"\N{U+0007}" => "\N{U+2407}", # BELL (BEL)
"\N{U+0008}" => "\N{U+2408}", # BACKSPACE (BS)
"\N{U+0009}" => "\N{U+2409}", # CHARACTER TABULATION (HT)
"\N{U+000A}" => "\N{U+240A}", # LINE FEED (LF)
"\N{U+000B}" => "\N{U+240B}", # LINE TABULATION (VT)
"\N{U+000C}" => "\N{U+240C}", # FORM FEED (FF)
"\N{U+000D}" => "\N{U+240D}", # CARRIAGE RETURN (CR)
"\N{U+000E}" => "\N{U+240E}", # SHIFT OUT (SO)
"\N{U+000F}" => "\N{U+240F}", # SHIFT IN (SI)
"\N{U+0010}" => "\N{U+2410}", # DATA LINK ESCAPE (DLE)
"\N{U+0011}" => "\N{U+2411}", # DEVICE CONTROL ONE (DC1)
"\N{U+0012}" => "\N{U+2412}", # DEVICE CONTROL TWO (DC2)
"\N{U+0013}" => "\N{U+2413}", # DEVICE CONTROL THREE (DC3)
"\N{U+0014}" => "\N{U+2414}", # DEVICE CONTROL FOUR (DC4)
"\N{U+0015}" => "\N{U+2415}", # NEGATIVE ACKNOWLEDGE (NAK)
"\N{U+0016}" => "\N{U+2416}", # SYNCHRONOUS IDLE (SYN)
"\N{U+0017}" => "\N{U+2417}", # END OF TRANSMISSION BLOCK (ETB)
"\N{U+0018}" => "\N{U+2418}", # CANCEL (CAN)
"\N{U+0019}" => "\N{U+2419}", # END OF MEDIUM (EM)
"\N{U+001A}" => "\N{U+241A}", # SUBSTITUTE (SUB)
"\N{U+001B}" => "\N{U+241B}", # ESCAPE (ESC)
"\N{U+001C}" => "\N{U+241C}", # INFORMATION SEPARATOR FOUR (FS)
"\N{U+001D}" => "\N{U+241D}", # INFORMATION SEPARATOR THREE (GS)
"\N{U+001E}" => "\N{U+241E}", # INFORMATION SEPARATOR TWO (RS)
"\N{U+001F}" => "\N{U+241F}", # INFORMATION SEPARATOR ONE (US)
"\N{U+007F}" => "\N{U+2421}", # DELETE (DEL)
);

my %basiclatin_picture = (
"\N{U+0020}" => "\N{U+2420}", # SPACE (SP)
);

my %latin1_picture = (
"\N{U+00A0}" => "\N{U+2423}", # SPACE (NBSP)
);

sub import {
my $class = shift;
Expand Down Expand Up @@ -378,54 +431,137 @@ sub _is_number {
}

sub _escape_chars {
my ($str, $orig_color, $p) = @_;
my ( $str, $orig_color, $p ) = @_;

$orig_color = color( $orig_color );
$orig_color = color($orig_color);
my $esc_color = color( $p->{color}{escaped} );

# if we're escaping everything then we don't need to keep swapping
# colors in and out, and we need to return right away because
# we no longer need to print_escapes
if ($p->{escape_chars} eq 'all') {
return $esc_color
. join('', map { sprintf '\x{%02x}', ord $_ } split //, $str)
. $orig_color
# for backward compatibility
if ( defined $p->{escape_chars} ) {
if ( $p->{escape_chars} eq 'all' ) {
$p->{escape}->{c0controls} = 'hex';
$p->{escape}->{basiclatin} = 'hex';
$p->{escape}->{c1controls} = 'hex';
$p->{escape}->{latin1} = 'hex';
$p->{escape}->{multibyte} = 'hex';
}
elsif ( $p->{escape_chars} eq 'nonascii' ) {
$p->{escape}->{c1controls} = 'hex';
$p->{escape}->{latin1} = 'hex';
$p->{escape}->{multibyte} = 'hex';
}
elsif ( $p->{escape_chars} eq 'nonlatin1' ) {
$p->{escape}->{multibyte} = 'hex';
}
}

$str =~ s/\e/$esc_color\\e$orig_color/g if $p->{print_escapes};

if ($p->{escape_chars} eq 'nonascii') {
$str =~ s{([^\x{00}-\x{7f}]+)}{
$esc_color
. (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1)
. $orig_color
}ge;
} elsif ($p->{escape_chars} eq 'nonlatin1') {
$str =~ s{([^\x{00}-\x{ff}]+)}{
$esc_color
. (join '', map { sprintf '\x{%02x}', ord $_ } split //, $1) . $orig_color
}ge;
# for backward compatibility
if ( $p->{print_escapes} ) {
$p->{escape}->{c0controls} = 'char';
}

if ($p->{print_escapes}) {
my %escaped = (
"\n" => '\n',
"\r" => '\r',
"\t" => '\t',
"\f" => '\f',
"\b" => '\b',
"\a" => '\a',
);
foreach my $k ( keys %escaped ) {
$str =~ s/$k/$esc_color$escaped{$k}$orig_color/g;
# color ON/OFF
my $color = 0;

my @str = map {
my $esc_char = _escape_char( $_, $p );

if ($esc_char) {
if ( !$color ) {
$color = 1;
$esc_color . $esc_char;
}
else {
$esc_char;
}
}
}
# always escape the null character
$str =~ s/\0/$esc_color\\0$orig_color/g;
else {
if ($color) {
$color = 0;
$orig_color . $_;
}
else {
$_;
}
}

} split //, $str;

return $str;
# if the last character had been colored
push( @str, $orig_color ) if $color;

return join( '', @str );
}

sub _escape_char {
my ( $c, $p ) = @_;

my $n = ord $c;

# C0 Controls (0..31, 127) (0000..001F, 007F)
if ( ( $n >= 0 and $n <= 31 ) or $n == 127 ) {
if ( $p->{escape}->{c0controls} ) {
if ( $p->{escape}->{c0controls} eq 'hex' ) {
sprintf( '\x{%02x}', $n );
}
elsif ( $p->{escape}->{c0controls} eq 'char' ) {
$c0controls_char{$c} if ( exists $c0controls_char{$c} );
}
elsif ( $p->{escape}->{c0controls} eq 'picture' ) {
$c0controls_picture{$c} if ( exists $c0controls_picture{$c} );
}
}
else {

# always escape the null character (well, almost always...)
if ( $c eq "\0" ) {
'\0';
}
}
}

# Basic Latin (32..126) (0020..007E)
elsif ( $n >= 32 and $n <= 126 ) {
if ( $p->{escape}->{basiclatin} ) {
if ( $p->{escape}->{basiclatin} eq 'hex' ) {
sprintf( '\x{%02x}', $n );
}
elsif ( $p->{escape}->{basiclatin} eq 'picture' ) {
$basiclatin_picture{$c} if ( exists $basiclatin_picture{$c} );
}
}
}

# C1 Controls (128..159) (0080..009F)
elsif ( $n >= 128 and $n <= 159 ) {
if ( $p->{escape}->{c1controls} ) {
if ( $p->{escape}->{c1controls} eq 'hex' ) {
sprintf( '\x{%02x}', $n );
}
}
}

# Latin-1 Supplement (160..255) (00A0..00FF)
elsif ( $n >= 160 and $n <= 255 ) {
if ( $p->{escape}->{latin1} ) {
if ( $p->{escape}->{latin1} eq 'hex' ) {
sprintf( '\x{%02x}', $n );
}
elsif ( $p->{escape}->{latin1} eq 'picture' ) {
$latin1_picture{$c} if ( exists $latin1_picture{$c} );
}
}
}

# To multibyte... and beyond! (256...) (0100...)
elsif ( $n >= 256 ) {
if ( $p->{escape}->{multibyte} ) {
if ( $p->{escape}->{multibyte} eq 'hex' ) {
sprintf( '\x{%02x}', $n );
}
}
}
}

sub ARRAY {
my ($item, $p) = @_;
Expand Down Expand Up @@ -1406,6 +1542,15 @@ customization options available, as shown below (with default values):
print_escapes => 0, # print non-printable chars as "\n", "\t", etc.
escape_chars => 'none', # escape chars into \x{...} form. Values are
# "none", "nonascii", "nonlatin1", "all"

escape => {
c0controls => 'picture', # Escape chars "c0controls" into "picture" form,
multibyte => 'hex', # and "multibyte" into "\x{XX}" form.
# Keys are "c0controls", "basiclatin",
# "c1controls", "latin1" and "multibyte".
# Values are "hex", "char", "picture".
},

quote_keys => 'auto', # quote hash keys (1 for always, 0 for never).
# 'auto' will quote when key is empty/space-only.
scalar_quotes => '"', # the quote symbols to enclose scalar values
Expand Down Expand Up @@ -1449,6 +1594,59 @@ customization options available, as shown below (with default values):

Note: setting C<multiline> to C<0> will also set C<index> and C<indent> to C<0>.

=head2 Escaping of characters

Occasionally there is necessity to display some characters differently from how
they are stored in the string. For example it can be non-printable characters
like newline character (LF) or multibyte characters of which you want to know
what bytes they consist of.

In this case you can point out which characters and how should be displayed.

The key 'which characters' is assigned by names of characters blocks from the Unicode table:

=over 4

=item * C<c0controls> - C0 Controls (U+0000..U+001F, U+007F)

=item * C<basiclatin> - Basic Latin (U+0020..U+007E)

=item * C<c1controls> - C1 Controls (U+0080..U+009F)

=item * C<latin1> - Latin-1 Supplement (U+00A0..U+00FF)

=item * C<multibyte> - All the rest (U+0100...)

=back

Value 'how' is assigned by the following names:

=over 4

=item * C<hex> - Hexadecimal escape sequence (for example C<x{0D}> for the tab character)

=item * C<char> - Character escape sequence (for example C<\t> for the tab character)

=item * C<picture> - Special Unicode pictures (for example C<␉> for the tab character)

=back

You can assign one or several blocks of characters and for each block a way of
displaying can be set.

For example:

use Data::Printer {
escape => {
c0controls => 'picture',
multibyte => 'hex'
}
};

In this example characters from block C0 Controls will be displayed as special
Unicode picturess and characters witn numbers more than FF will be displayed as
hexadecimal escape sequences.

=head1 FILTERS

Data::Printer offers you the ability to use filters to override
Expand Down
Loading