/*
myregex.* - regex() wrapper class
Copyright (C) 1999-2003 Matthew Mueller <donut AT dakotacom.net>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include "myregex.h"
static string regex_match_word_beginning_safe_str;
static string regex_match_word_beginning_str;
static string regex_match_word_end_str;
static bool regex_initialized=0;
static string regex_test_op(const char **ops, const char *pat, const char *match1, const char *match2){
char buf[100];
for (; *ops; ops++) {
sprintf(buf, pat, *ops);
try {
c_regex_nosub rx(buf, REG_EXTENDED);
if (match1==rx && match2==rx)
return *ops;
} catch (RegexEx &e) {
//ignore any errors here, since they just would just mean that op didn't work, so go on and try the next one.
}
}
return "";
}
static void regex_init(void) {
const char *wbeg_ops[]={"\\<", "\\b", "[[:<:]]", "(^|[^A-Za-z0-9])", NULL};
regex_match_word_beginning_str = regex_test_op(wbeg_ops, "%sfoo", "a fooa", "fooa");
if (!regex_match_word_beginning_str.empty() && regex_match_word_beginning_str[0]!='(')
regex_match_word_beginning_safe_str = regex_match_word_beginning_str;
if (regex_match_word_beginning_safe_str.empty()) {
PERROR("\nWARNING: your regex library is not sufficient for nget's internal use.");
if (regex_match_word_beginning_str.empty())
PERROR("It doesn't seem possible to match the beginning or end of a word.\n");
else
PERROR("It can match the beginning or ending of a word, but with an ugly hack\n"
"which can't be used in some cases due to the use of grouping.");
PERROR("Please consider compiling nget with a better regex library, such as PCRE.\n"
"If your library does support word boundary matching, please let me know the\n"
"proper codes for it, so it may be supported in a future version.\n");
}
const char *wend_ops[]={"\\>", "\\b", "[[:>:]]", "($|[^A-Za-z0-9])", NULL};
regex_match_word_end_str = regex_test_op(wend_ops, "foo%s", "afoo a", "afoo");
PDEBUG(DEBUG_MIN,"regex_init regex_match_word_beginning:%s regex_match_word_end:%s",regex_match_word_beginning_str.c_str(), regex_match_word_end_str.c_str());
regex_initialized=true;
}
const string& regex_match_word_beginning_safe(void) {
if (!regex_initialized) regex_init();
return regex_match_word_beginning_safe_str;
}
const string& regex_match_word_beginning(void) {
if (!regex_initialized) regex_init();
return regex_match_word_beginning_str;
}
const string& regex_match_word_end(void){
if (!regex_initialized) regex_init();
return regex_match_word_end_str;
}
void regex_escape_string(const string &s, string &buf){
for (string::const_iterator cp=s.begin(); cp!=s.end(); ++cp) {
if (strchr("{}()|[]\\.+*?^$",*cp))
buf+='\\';//escape special chars
buf+=*cp;
}
}
c_regex_base::c_regex_base(const char * pattern,int cflags){
if (!pattern)
pattern="";
int re_err;
if ((re_err=regcomp(®ex,pattern,cflags))){
char buf[256];
regerror(re_err,®ex,buf,256);
regfree(®ex);
throw RegexEx(Ex_INIT, "regcomp: %s", buf);
}
}
c_regex_base::~c_regex_base(){
regfree(®ex);
}
c_regex_nosub::c_regex_nosub(const char * pattern,int cflags):c_regex_base(pattern,cflags|REG_NOSUB){
}
int c_regex_subs::doregex(regex_t *regex,const char *str){
if ((re_err=regexec(regex,str,nregmatch,regmatch,0)))
return re_err;
freesub();
rnsub=regex->re_nsub;
assert(nregmatch>=rnsub);
if (rnsub>=0 && nregmatch){
int i;
//rsub=new (string*)[rnsub+1];
rsub=new string[rnsub+1];
for (i=0;i<=rnsub;i++){
assert(regmatch[i].rm_eo>=regmatch[i].rm_so);
// printf("doregex: i=%i/%i so=%i eo=%i\n",i,rnsub,regmatch[i].rm_so, regmatch[i].rm_eo);
//
if (regmatch[i].rm_so>=0)
rsub[i].assign(str+regmatch[i].rm_so,regmatch[i].rm_eo-regmatch[i].rm_so);
}
}
return 0;
}
void c_regex_subs::setnregmatch(int num){
if (nregmatch!=num){
if (regmatch)
delete [] regmatch;
nregmatch=num;
if (nregmatch>0)
regmatch=new regmatch_t[nregmatch+1];
else
regmatch=NULL;
}
}
c_regex_subs::c_regex_subs(int nregm):nregmatch(-1){
regmatch=NULL;
setnregmatch(nregm);
rsub=NULL;
rnsub=-1;
}
c_regex_subs::~c_regex_subs(){
freesub();
if (regmatch){
delete [] regmatch;
// free(regmatch);
}
}
void c_regex_subs::freesub(void){
if (rsub){
// for (int i=0;i<=rnsub;i++){
// delete rsub[i];
// }
delete [] rsub;
}
rsub=NULL;
rnsub=-1;
}
int c_regex_r::match(const char *str,c_regex_subs*subs){
subs->setnregmatch(nregmatch);
return subs->doregex(®ex,str);
}
c_regex_subs * c_regex_r::match(const char *str){
c_regex_subs *subs=new c_regex_subs(nregmatch);
// subs->doregex(regex,str,nregmatch);
match(str,subs);
return subs;
}
c_regex_r::c_regex_r(const char * pattern,int cflags):c_regex_base(pattern,cflags){
if (cflags®_NOSUB){
nregmatch=0;
}else{
nregmatch=1;
for (;*pattern!=0;pattern++){
if (*pattern=='(')//this could give more regmatches than we need, considering
nregmatch++;//escaping and such, but its a lot better than a static number
}
//pcre needs more matching space for something?
#ifdef HAVE_PKG_pcre
//nregmatch=nregmatch*15/10;
nregmatch=nregmatch*2;
#endif
}
}
syntax highlighted by Code2HTML, v. 0.9.1