#! /usr/local/bin/perl -w # $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $ # # Copyright (c) 2001 Japan Network Information Center. All rights reserved. # # By using this file, you agree to the terms and conditions set forth bellow. # # LICENSE TERMS AND CONDITIONS # # The following License Terms and Conditions apply, unless a different # license is obtained from Japan Network Information Center ("JPNIC"), # a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, # Chiyoda-ku, Tokyo 101-0047, Japan. # # 1. Use, Modification and Redistribution (including distribution of any # modified or derived work) in source and/or binary forms is permitted # under this License Terms and Conditions. # # 2. Redistribution of source code must retain the copyright notices as they # appear in each source code file, this License Terms and Conditions. # # 3. Redistribution in binary form must reproduce the Copyright Notice, # this License Terms and Conditions, in the documentation and/or other # materials provided with the distribution. For the purposes of binary # distribution the "Copyright Notice" refers to the following language: # "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." # # 4. The name of JPNIC may not be used to endorse or promote products # derived from this Software without specific prior written approval of # JPNIC. # # 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF # ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. # use v5.6.0; # for pack('U') use bytes; use lib qw(.); use SparseMap; use Getopt::Long; (my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/; my @map_bits = (9, 7, 5); my @proh_bits = (7, 7, 7); my @unas_bits = (7, 7, 7); my @bidi_bits = (9, 7, 5); my @bidi_types = ('OTHERS', 'R_AL', 'L'); my $dir = '.'; my @versions = (); GetOptions('dir=s', \$dir) or die usage(); @versions = @ARGV; print_header(); bits_definition("MAP", @map_bits); bits_definition("PROH", @proh_bits); bits_definition("UNAS", @unas_bits); bits_definition("BIDI", @bidi_bits); generate_data($_) foreach @ARGV; sub usage { die "Usage: $0 [-dir dir] version..\n"; } sub generate_data { my $version = shift; generate_mapdata($version, "$dir/nameprep.$version.map"); generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited"); generate_unassigneddata($version, "$dir/nameprep.$version.unassigned"); generate_bididata($version, "$dir/nameprep.$version.bidi"); } # # Generate mapping data. # sub generate_mapdata { my $version = shift; my $file = shift; my $map = SparseMap::Int->new(BITS => [@map_bits], MAX => 0x110000, MAPALL => 1, DEFAULT => 0); open FILE, $file or die "cannot open $file: $!\n"; my $mapbuf = "\0"; # dummy my %maphash = (); while () { if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { my $same_as = $1; if (grep {$_ eq $same_as} @versions > 0) { generate_map_ref($version, $same_as); close FILE; return; } next; } next if /^\#/; next if /^\s*$/; register_map($map, \$mapbuf, \%maphash, $_); } close FILE; generate_map($version, $map, \$mapbuf); } # # Generate prohibited character data. # sub generate_prohibiteddata { my $version = shift; my $file = shift; my $proh = SparseMap::Bit->new(BITS => [@proh_bits], MAX => 0x110000); open FILE, $file or die "cannot open $file: $!\n"; while () { if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { my $same_as = $1; if (grep {$_ eq $same_as} @versions > 0) { generate_prohibited_ref($version, $same_as); close FILE; return; } next; } next if /^\#/; next if /^\s*$/; register_prohibited($proh, $_); } close FILE; generate_prohibited($version, $proh); } # # Generate unassigned codepoint data. # sub generate_unassigneddata { my $version = shift; my $file = shift; my $unas = SparseMap::Bit->new(BITS => [@unas_bits], MAX => 0x110000); open FILE, $file or die "cannot open $file: $!\n"; while () { if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { my $same_as = $1; if (grep {$_ eq $same_as} @versions > 0) { generate_unassigned_ref($version, $same_as); close FILE; return; } next; } next if /^\#/; next if /^\s*$/; register_unassigned($unas, $_); } close FILE; generate_unassigned($version, $unas); } # # Generate data of bidi "R" or "AL" characters. # sub generate_bididata { my $version = shift; my $file = shift; my $bidi = SparseMap::Int->new(BITS => [@bidi_bits], MAX => 0x110000); open FILE, $file or die "cannot open $file: $!\n"; my $type = 0; while () { if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { my $same_as = $1; if (grep {$_ eq $same_as} @versions > 0) { generate_unassigned_ref($version, $same_as); close FILE; return; } next; } if (/^%\s*BIDI_TYPE\s+(\S+)$/) { my $i = 0; for ($i = 0; $i < @bidi_types; $i++) { if ($1 eq $bidi_types[$i]) { $type = $i; last; } } die "unrecognized line: $_" if ($i >= @bidi_types); next; } next if /^\#/; next if /^\s*$/; register_bidi($bidi, $type, $_); } close FILE; generate_bidi($version, $bidi); } sub print_header { print <<"END"; /* \$Id\$ */ /* $myid */ /* * Do not edit this file! * This file is generated from NAMEPREP specification. */ END } sub bits_definition { my $name = shift; my @bits = @_; my $i = 0; foreach my $n (@bits) { print "#define ${name}_BITS_$i\t$n\n"; $i++; } print "\n"; } sub register_map { my ($map, $bufref, $hashref, $line) = @_; my ($from, $to) = split /;/, $line; my @fcode = map {hex($_)} split ' ', $from; my @tcode = map {hex($_)} split ' ', $to; my $ucs4 = pack('V*', @tcode); $ucs4 =~ s/\000+$//; my $offset; if (exists $hashref->{$ucs4}) { $offset = $hashref->{$ucs4}; } else { $offset = length $$bufref; $$bufref .= pack('C', length($ucs4)) . $ucs4; $hashref->{$ucs4} = $offset; } die "unrecognized line: $line" if @fcode != 1; $map->add($fcode[0], $offset); } sub generate_map { my ($version, $map, $bufref) = @_; $map->fix(); print $map->cprog(NAME => "nameprep_${version}_map"); print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n"; print_uchararray($$bufref); print "};\n\n"; } sub generate_map_ref { my ($version, $refversion) = @_; print <<"END"; #define nameprep_${version}_map_imap nameprep_${refversion}_map_imap #define nameprep_${version}_map_table nameprep_${refversion}_map_table #define nameprep_${version}_map_data nameprep_${refversion}_map_data END } sub print_uchararray { my @chars = unpack 'C*', $_[0]; my $i = 0; foreach my $v (@chars) { if ($i % 12 == 0) { print "\n" if $i != 0; print "\t"; } printf "%3d, ", $v; $i++; } print "\n"; } sub register_prohibited { my $proh = shift; register_bitmap($proh, @_); } sub register_unassigned { my $unas = shift; register_bitmap($unas, @_); } sub register_bidi { my $bidi = shift; my $type = shift; register_intmap($bidi, $type, @_); } sub generate_prohibited { my ($version, $proh) = @_; generate_bitmap($proh, "nameprep_${version}_prohibited"); print "\n"; } sub generate_prohibited_ref { my ($version, $refversion) = @_; print <<"END"; #define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap #define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap END } sub generate_unassigned { my ($version, $unas) = @_; generate_bitmap($unas, "nameprep_${version}_unassigned"); print "\n"; } sub generate_unassigned_ref { my ($version, $refversion) = @_; print <<"END"; #define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap #define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap END } sub generate_bidi { my ($version, $bidi) = @_; $bidi->fix(); print $bidi->cprog(NAME => "nameprep_${version}_bidi"); print "\n"; print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n"; foreach my $type (@bidi_types) { printf "\tidn_biditype_%s, \n", lc($type); } print "};\n\n"; } sub generate_bidi_ref { my ($version, $refversion) = @_; print <<"END"; #define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap #define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table END } sub register_bitmap { my $map = shift; my $line = shift; /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; my $start = hex($1); my $end = defined($2) ? hex($2) : undef; if (defined $end) { $map->add($start .. $end); } else { $map->add($start); } } sub register_intmap { my $map = shift; my $value = shift; my $line = shift; /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; my $start = hex($1); my $end = defined($2) ? hex($2) : $start; for (my $i = $start; $i <= $end; $i++) { $map->add($i, $value); } } sub generate_bitmap { my $map = shift; my $name = shift; $map->fix(); #$map->stat(); print $map->cprog(NAME => $name); }