# vim: set cindent expandtab ts=4 sw=4: # # Copyright (c) 1998-2005 Chi-Keung Ho. All rights reserved. # # This programe is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # Extmail - a high-performance webmail to maildir # $Id$ package Ext::Utils; use strict; use Exporter; use Fcntl qw(SEEK_CUR :flock); use vars qw(@ISA @EXPORT @EXPORT_OK); @ISA = qw(Exporter); @EXPORT = qw(str2url url2str html_escape reset_working_path get_remoteip str2ncr from_to filename2std untaint nl2br _index _substr _length time_offset txt2link expire_calc human_size txt2html html2txt htmlsanity foldername_ok myceil ); @EXPORT_OK = qw(lock unlock haslock); # Converts a string into URL-encoded format sub str2url { my $rv = $_[0]; $rv =~ s/([^A-Za-z0-9])/sprintf("%%%2.2X", ord($1))/ge; return $rv; } # Converts a URL-encoded string to the original sub url2str { my $rv = $_[0]; $rv =~ s/\+/ /g; # if it's javascript escaped string, we call ucs4_to_utf8 if ($rv =~ /%u([0-9a-fA-F]{4})/) { $rv=~ s/%u([0-9a-fA-F]{4})/ucs4_to_utf8(hex($1))/ge; return $rv; } # XXX here we replace c with C, or perl will complain, but # we should take further research the difference between # c and C $rv =~ s/%(..)/pack("C",hex($1))/ge; return $rv; } # convert UCS4 to UTF8 - from openwebmail # string passed by with javascript escape() will encode CJK char to unicode # like %u5B78%u9577, this is used to turn %u.... back to the CJK char # eg: $str=~ s/%u([0-9a-fA-F]{4})/ucs4_to_utf8(hex($1))/ge; sub ucs4_to_utf8 { my $val=$_[0]; my $c; if ($val < 0x7f){ #0000-007f $c .= chr($val); } elsif ($val < 0x800) { #0080-0800 $c .= chr(0xC0 | ($val / 64)); $c .= chr(0x80 | ($val % 64)); } else { #0800-ffff $c .= chr(0xe0 | (($val / 64) / 64)); $c .= chr(0x80 | (($val / 64) % 64)); $c .= chr(0x80 | ($val % 64)); } } # get remote client ipv4 address sub get_remoteip { $ENV{REMOTE_ADDR}; } # html_escape - to escape from special HTML tags sub html_escape { my $s = $_[0]; my $o = $_[1] || 'NO_AMP'; return $s unless($s); $s =~ s!&!&!g unless ($o eq 'NO_AMP'); $s =~ s!'!'!g; $s =~ s!"!"!g unless ($o eq 'NO_QUOTES'); $s =~ s!>!>!g unless ($o eq 'NO_QUOTE'); $s =~ s!\n!g; $s; } sub human_size { my $s = $_[0]; if($s<1024) { return sprintf("%0.2f", $s/1024)."K"; }elsif($s<1024*1024) { return sprintf("%0.1f", $s/1024)."K"; }else { return sprintf("%0.1f", $s/(1024*1024))."M"; } } sub myceil { my $num = shift; return 0 if ($num ==0); return int($num)+1 if ($num =~ /\./); $num; } # html or text filter # # how to convert a text to html format ? # # step1: escape &, ', "", >, <, \r*\n # step2: special convert space to # step3: do txt2link conversion (with care) # step4: convert space back to sub txt2html { my $str = shift; my %opt = @_; if ($opt{html_escape}) { $str = html_escape($str, 'NO_SPACE'); } if ($opt{txt2link}) { $str = txt2link($str); $str =~ s/ / /gs; $str =~ s/$1://$2!gs; $s; } sub html2txt { my $s = shift; $s =~ s!\r*\n!!g; $s =~ s!!\r\n!gi; $s =~ s!

!\r\n\r\n!gi; $s =~ s!!\r\n!gi; $s =~ s!]*?>.*?!!gsi; $s =~ s!<\s*/?\s*br\s*/?\s*>!\r\n!gsi; $s =~ s!<[\!]*?[^<>]*?>!!gsi; $s =~ s!&(quot|\#34);!"!gi; $s =~ s!&(amp|\#38);!&!gi; $s =~ s!&(lt|\#60);!!gi; $s =~ s!&(nbsp|\#160);! !gi; $s =~ s!&(iexcl|\#161);!chr(161)!egi; $s =~ s!&(cent|\#162);!chr(162)!egi; $s =~ s!&(pound|\#163);!chr(163)!egi; $s =~ s!&(copy|\#169);!chr(169)!egi; $s =~ s!&\#(\d+);!chr($1)!egi; $s; } sub htmlsanity { my $html = shift; $html =~ s!.*!!gsi; $html =~ m!]+>(.*)!is; $html = $1 || $html; $html; } # nl2br - convert newline to
\n, same as php4 nl2br() sub nl2br { my $s = $_[0]; $s =~ s!(
)*\r*\n!
\n!g; $s; } sub reset_working_path { my $src=$ENV{SCRIPT_FILENAME}; $src=~s#(.*)/[^\/]+$#$1#; chdir(untaint($src)); undef $src; } sub untaint { local $_ = shift; # this line makes param into a new variable. don't remove it. local $1; # fix perl $1 taintness propagation bug m/^(.*)$/s; return $1; } # str2ncr => string to NCR (Numeric character reference) # need perl 5.8.0 abover or Text::Iconv sub str2ncr { my($chst, $str) = @_; my $nstr = ""; my $cvt = ""; return $str unless($str); $chst = _fixcharset($chst); eval { require Text::Iconv; $cvt = Text::Iconv->new($chst, 'UTF-16'); }; if($@=~/Can't locate/) {# means not found undef $@; # cleanup $nstr = $str; # save a copy eval { require Encode; Encode::from_to($nstr, $chst, 'UTF-16LE'); }; return $str if($@); }elsif($@) { return $str; }else { $nstr = $cvt->convert($str); return $str unless($nstr); # return if nstr null } my @s = split(//,$nstr); my $out = ""; for(my $i=0;$i 0x100(00) my $code = ord($s[$i+1])*256+ord($s[$i]); if($code < 128) { $out .= chr($code); }elsif($code != 65279) { # UTF16 prefix $out .= '&#'.$code.';'; } } $out; } # a small enhanced function to do iconv(3) sub from_to { my ($str, $fchar, $tchar) = @_; my $nstr = ''; my $cvt = ''; return $str unless($str); $fchar = _fixcharset($fchar); $tchar = _fixcharset($tchar); eval { require Text::Iconv; $cvt = Text::Iconv->new($fchar, $tchar); }; if($@=~/Can't locate/) {# means not found undef $@; # cleanup $nstr = $str; # save a copy eval { require Encode; Encode::from_to($nstr, $fchar, $tchar); }; return $str if($@); }elsif($@) { return $str; }else { $nstr = $cvt->convert($str); return $str unless($nstr); # return if nstr null } $nstr; } # normalize filename to a standard one sub filename2std { my $filename = shift; if($filename=~/\\/) { # win32 filename, eg: c:\\doc\\test.gif $filename=~ s#.*\\+([^\\]+)$#$1#; }elsif($filename=~/\//) { # Unix path name, eg: /path/to/test.gif $filename=~ s#.*\/+([^\/]+)$#$1#; }else { $filename=~ s#\s##g; # remove all space } $filename } # it will fix a bug in some c iconv(3) lib, that can't handle # gb2312 + some GBlized big5 characters, wait for new fix sub _fixcharset { my $char = shift; if ($char =~ /^gb2312$/i) { return 'GBK'; } $char; } # a filehandler oriented pseudo index function # _index FILEHANDLE SUBSTR POSITION sub _index { my ($fh, $substr, $pos) = @_; my $BUFSIZ = 65536; my ($buf, $offset); if (defined($pos)) { seek($fh, $pos, 0); $offset = $pos; } else { $offset = tell $fh; } my ($i, $len); while (read($fh,$buf,$BUFSIZ)) { $i = index($buf,$substr); # found it and return if($i != -1) { $offset += $i; return $offset; } # search the last new line $i = rindex($buf, "\n"); $len = length($buf); if ($len == $BUFSIZ && $i<($len-1) && ($i!=-1)) { seek($fh, $i+1-$len, 0); $offset += $i+1; next; } $offset += $BUFSIZ; } -1; } # a filehandler oriented pseudo substr function # _substr FILEHANDLE OFFSET LENGTH sub _substr { my ($fh, $offset, $len) = @_; my $pos = tell $fh; # XXX FIXME the $len if (not defined($len) or $len <=0) { seek($fh, 0, 2); # to the end $len = tell($fh)-$offset+1; } my $buf; seek($fh, $offset, 0); # if the $len set to very large, this read # will eat a lot of memory, be careful read($fh, $buf, $len); seek($fh, $pos, 0); return $buf; } # a filehanlder oriented pseudo leng function # _length FILEHANDLE sub _length { my $fh = $_[0]; my $pos = tell $fh; seek ($fh,0,2); # to the end my $len = tell ($fh) + 1; seek ($fh, $pos, 0); return $len; } # a function similar to tell() for sys* call sub systell { sysseek($_[0], 0, SEEK_CUR) } # locking / unlocking function, simple but useful for # unique process. sub lock { my $fh = $_[0]; flock ($fh, LOCK_EX|LOCK_NB); } sub unlock { my $fh = $_[0]; flock ($fh, LOCK_UN); 1; } sub haslock { my $fh = $_[0]; if (lock($fh)) { unlock($fh); return 0; # means no lock } 1; } # a function to convert timezone to time offset compare to GMT sub time_offset { my $timez = shift; $timez =~ s/ //g; if ($timez =~ /^(-|\+)(\d+)/) { my ($tok, $hour) = ($1, $2); $hour =~ s/0//g; $hour ||= 0; return $tok.$hour * 3600; } 0; } sub expire_calc { my $time = shift; my %multi = ( 's' => 1, 'm' => 60, 'h' => 60*60, 'd' => 60*60*24, 'M' => 60*60*24*30, 'y' => 60*60*24*30*365, ); my $offset; if (!$time || (lc $time eq 'now')) { $offset = 0; } elsif ($time =~ /^\d+$/) { # advoid 6h return! return $time; } elsif ($time =~ /^([+-]?(?:\d+|\d*\.\d*))([mhdMy]?)/) { $offset = ($multi{$2} || 1)*$1; } else { return $time; } return (time + $offset); } sub foldername_ok { my ($dir, $len) = @_; if ($dir =~ /[!~#\$\%\^\&\<\>\?\/\\]/) { # [\^\%\/\#\!\~(\)]/) { return 0; } if (defined $len && $len >0) { # utf8 will use 3 bytes as one charactor return 0 if (length $dir > $len); } 1; } 1;