#!/usr/bin/env perl #-*- Mode: perl; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- ############################################### # Please : do not use this functions yet as they need renaming and the order of the # parameters are going to chamge. Chema. ############################################### # Functions for tokenizing files # # Copyright (C) 2001 Ximian, Inc. # # Authors: Chema Celorio # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Library General Public License as published # by the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU Library General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. $SCRIPTSDIR = "@scriptsdir@"; if ($SCRIPTSDIR =~ /^@scriptsdir[@]/) { $SCRIPTSDIR = "."; $DOTIN = ".in"; } require "$SCRIPTSDIR/file.pl$DOTIN"; # -------------------------------- Test --------------------------- sub gst_tokenizer_test { my ($file) = @_; my $in, $out; my $token; $out = \@dummy_arrayt; # Otherwise push @$out doesn't work $in = &gst_tokenize ($file); while (defined ($token = &gst_tokenize_get_token ($in, $out))) { # Replace a token if ("subnet" eq $token) { my $subnet = &gst_tokenize_get_token ($in, $out); &gst_tokenize_replace_token (out, "123\.123\.123\.123"); } # Append tokens if ("option" eq $token) { &gst_tokenize_append_token ($out, " now"); } # Remove and skip till if ("range" eq $token) { &gst_tokenize_remove_till ($out,";"); &gst_tokenize_skip_till ($in, ";"); } } &gst_file_buffer_save ($out, $file . ".new"); printf STDERR "Tested tokenizer with $file. Output in $file.new\n"; printf STDERR "You can :\n"; printf STDERR "diff $file $file.new\n"; } #&gst_tokenizer_test ("/etc/dhcpd.conf"); # -------------------------------- token functions ------------------- sub gst_tokenize_warning { my ($mess) = @_; printf STDERR "Warning ***: $mess\n"; } sub gst_tokenize_error { my ($mess) = @_; printf STDERR "Error ***: $mess\n"; exit; } sub gst_tokenize_verify_token { my ($in, $verifyme, $out) = @_; my $token = &gst_tokenize_get_token ($in, $out); if ($token ne $verifyme) { &gst_tokenize_warning ("Expected \"" . $verifyme . "\" found \"" . $token . "\" at line ?"); } } sub gst_tokenize_get_token { my ($in, $out) = @_; my $token = ""; while ($token eq "") { if (not ($token = shift @$in)) { return undef; } if (defined $out) { push (@$out, $token); } $token =~ s/\s//g; $token =~ s/\#.*//; } return $token; } sub gst_tokenize_get_token_unclean { my ($in, $out) = @_; my $token = ""; if (not ($token = shift @$in)) { return undef; } if (defined $out) { push (@$out, $token); } return $token; } sub gst_tokenize_get_token_till { my ($in, $out, $till_me) = @_; my $resp; while (defined ($token = &gst_tokenize_get_token ($in, $out)) and ($token ne $till_me)){ if (defined $resp) { $resp .= " "; } $resp .= $token; } return $resp; } sub gst_tokenize_replace_token { my ($out, $new_token) = @_; my $token = pop (@$out); my $clean = $token; $clean =~ s/\s//g; $clean =~ s/\#.*//; $token =~ s/$clean/$new_token/; push (@$out, $token); } sub gst_tokenize_append_token { my ($out, $token) = @_; # Fixme, split tokens while adding them so that # we maintain consistency push (@$out, $token); } sub gst_tokenize_remove_token { my ($out, $number) = @_; if (not defined ($number)) { $number = 1; } while ($number gt 0) { pop (@$out); $number --; } } sub gst_tokenize_skip_token { my ($in, $number) = @_; if (not defined ($number)) { $number = 1; } while ($number gt 0) { shift (@$in); $number --; } } sub gst_tokenize_undo { my ($in, $out, $number) = @_; if (not defined ($number)) { $number = 1; } while ($number gt 0) { @$in = (my $token = pop @$out, @$in); $token =~ s/\s//g; $token =~ s/\#.*//; if ($token ne "") { $number --; } } } sub gst_tokenize_advance_till { my ($in, $till_me, $out) = @_; while (defined (my $token = &gst_tokenize_get_token ($in, $out))) { if ($token eq $till_me) { return; } } } sub gst_tokenize_skip_till { my ($in, $till_me) = @_; my $token; while (1) { $token = &gst_tokenize_get_token ($in, $out); &gst_tokenize_remove_token ($out); if ($token eq $till_me) { return; } } } sub gst_tokenize_remove_till { my ($out, $till_me) = @_; my @dont_remove; while (1) { my $removed = pop (@$out); $clean = $removed; $clean =~ s/\s//g; $clean =~ s/\#.*//; if ($clean eq "") { # Don't remove lines we ignore @dont_remove = ($removed, @dont_remove); } if ($clean eq $till_me) { push (@$out, $removed); @$out = (@$out, @dont_remove); return; } } } # -------------------------------- tokenizer ------------------- sub gst_tokenize { my ($file) = @_; my $buf = &gst_file_buffer_load ($file); my @out; my $out_index; while (@$buf) { my $line = $$buf[0]; my $i = 0; if (&gst_ignore_line ($line)) { $out[$out_index++] = $line; shift @$buf; next; } # first skip all the white space/tabs my $line_copy = $line; $line_copy =~ s/^(\s+)//; # save what we removed for later my $leading = $1; # Now, handle token delimiters $line_copy =~ s/\;/ \; /g; # ";" is a token by itself $line_copy =~ s/,/ , /g; # "," is a token by itself # Remove leading white space again, this time don't save it # since we added it ourselves $line_copy =~ s/^(\s+)//; # Get the first token in the string by splitting it my @list = split (' ', $line_copy); my $token = $list[0]; # Add the leading white space that we removed $token = $leading . $token; # Now remove our token from the buffer, to "eat" it from it $$buf[0] =~ s/$token//; # Tambet had this code, which looks right but when i run it # it behaves the same as the one above. It escapes the # pattern before running the s// if (0) { my $pattern = $token; $pattern =~ s/(\W)/\\$1/g; $$buf[0] =~ s/$pattern//; } # This is a special case, if what follows our token is a newline # we want it to be part of the token so that if we delete it we will # also delete the newline. If we don't do this, our files can build up # a lot of newlines. We can't do this above because split will consume # newlines. # # Use the remainder of the last operation where we remove $token from $buf $_ = $'; # We have 2 matches posible # a) whitespace + newline and # b) ws + comment (with an implicit newline) if ((/^\s*\n/) || (/^\s*\#/)) { # If it matches, add it to our token and remove it from $buf $token = $token . $_ ; $$buf[0] =~ s/$_//; } $out[$out_index++] = $token; # If we removed all of the current line, shift buffer # so that in the next iteration $line is not empy if ($$buf[0] eq "") { shift @$buf; next; } } #construc the input file to compare it. if (0) { my $res = ""; @copy = @out; while (@copy) { $res .= $copy[0]; shift @copy; } } return \@out; } 1;