package charnames; use strict; use warnings; our $VERSION = '1.48'; use unicore::Name; # mktables-generated algorithmically-defined names use _charnames (); # The submodule for this where most of the work gets done use bytes (); # for $bytes::hint_bits use re "/aa"; # Everything in here should be ASCII # Translate between Unicode character names and their code points. # This is a wrapper around the submodule C<_charnames>. This design allows # C<_charnames> to be autoloaded to enable use of \N{...}, but requires this # module to be explicitly requested for the functions API. $Carp::Internal{ (__PACKAGE__) } = 1; sub import { shift; ## ignore class name _charnames->import(@_); } # Cache of already looked-up values. This is set to only contain # official values, and user aliases can't override them, so scoping is # not an issue. my %viacode; sub viacode { return _charnames::viacode(@_); } sub vianame { if (@_ != 1) { _charnames::carp "charnames::vianame() expects one name argument"; return () } # Looks up the character name and returns its ordinal if # found, undef otherwise. my $arg = shift; return () unless length $arg; if ($arg =~ /^U\+([0-9a-fA-F]+)$/) { # khw claims that this is poor interface design. The function should # return either a an ord or a chr for all inputs; not be bipolar. But # can't change it because of backward compatibility. New code can use # string_vianame() instead. my $ord = CORE::hex $1; return pack("U", $ord) if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits); _charnames::carp _charnames::not_legal_use_bytes_msg($arg, chr $ord); return; } # The first 1 arg means wants an ord returned; the second that we are in # runtime, and this is the first level routine called from the user return _charnames::lookup_name($arg, 1, 1); } # vianame sub string_vianame { # Looks up the character name and returns its string representation if # found, undef otherwise. if (@_ != 1) { _charnames::carp "charnames::string_vianame() expects one name argument"; return; } my $arg = shift; return () unless length $arg; if ($arg =~ /^U\+([0-9a-fA-F]+)$/) { my $ord = CORE::hex $1; return pack("U", $ord) if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits); _charnames::carp _charnames::not_legal_use_bytes_msg($arg, chr $ord); return; } # The 0 arg means wants a string returned; the 1 arg means that we are in # runtime, and this is the first level routine called from the user return _charnames::lookup_name($arg, 0, 1); } # string_vianame 1; __END__ =encoding utf8 =head1 NAME charnames - access to Unicode character names and named character sequences; also define character names =head1 SYNOPSIS use charnames ':full'; print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n"; print "\N{LATIN CAPITAL LETTER E WITH VERTICAL LINE BELOW}", " is an officially named sequence of two Unicode characters\n"; use charnames ':loose'; print "\N{Greek small-letter sigma}", "can be used to ignore case, underscores, most blanks," "and when you aren't sure if the official name has hyphens\n"; use charnames ':short'; print "\N{greek:Sigma} is an upper-case sigma.\n"; use charnames qw(cyrillic greek); print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n"; use utf8; use charnames ":full", ":alias" => { e_ACUTE => "LATIN SMALL LETTER E WITH ACUTE", mychar => 0xE8000, # Private use area "自転車に乗る人" => "BICYCLIST" }; print "\N{e_ACUTE} is a small letter e with an acute.\n"; print "\N{mychar} allows me to name private use characters.\n"; print "And I can create synonyms in other languages,", " such as \N{自転車に乗る人} for "BICYCLIST (U+1F6B4)\n"; use charnames (); print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" printf "%04X", charnames::vianame("GOTHIC LETTER AHSA"); # prints # "10330" print charnames::vianame("LATIN CAPITAL LETTER A"); # prints 65 on # ASCII platforms; # 193 on EBCDIC print charnames::string_vianame("LATIN CAPITAL LETTER A"); # prints "A" =head1 DESCRIPTION Pragma C