/* pcre_extract.c -- compile and exec a PCRE and return captured strings * (C) 2002 by Matthias Andree * * This library is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU Lesser General Public License as * published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library (look for the COPYING.LGPL file); if * not, write to the Free Software Foundation, Inc., 59 Temple Place, * Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #ifdef TEST #include #endif #include "config.h" #include "critmem.h" #include "ln_log.h" #include "leafnode.h" #include "strlcpy.h" /* This function compiles and executes the regular expression in * "pattern" against the string in "input" and can optionally put the * matching string and captured substring into the output vector. * * Output may be NULL if and only if num is zero. * * If num is nonzero, the matching substring is strdup()ed into * output[0]. If num is > 1, pcre_extract copies the first num - 1 * captured substrings into output[...]. If a capturing subpattern did * not match, the corresponding output[] element is NULL. * * The return value is the same as that of pcre_exec. Short: 0 if num * too small to hold all captured strings, otherwise the number of * captured patterns, where the matching string is always captured, or * the PCRE error code. * * WARNING: if compiling the pattern fails, the program logs this * condition and aborts. This function is NOT safe to be used with * user-accessible patterns. Use constant patterns. * * You MUST free() the valid parts in your output[] vector yourself. You * can use pcre_extract_free (which see) to do this. */ int pcre_extract(const char *input, const char *pattern, char **output, size_t num) { const char *errstr; int errpos, rc, i; const int options = PCRE_CASELESS|PCRE_DOTALL|PCRE_MULTILINE; int ovecs = (num + 1) * 3; int *ovec; pcre *p; if (num > INT_MAX / 3) { ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: array size too large, aborting."); abort(); } ovec = (int *)critmalloc(ovecs * sizeof(int), "pcre_extract"); p = pcre_compile(pattern, options, &errstr, &errpos #ifdef NEW_PCRE_COMPILE , NULL #endif ); if (!p) { ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: cannot compile \"%s\": %s at pos. #%d", pattern, errstr, errpos); free(ovec); return -1; } rc = pcre_exec(p, NULL, input, strlen(input), #ifdef NEW_PCRE_EXEC 0, #endif 0, ovec, ovecs); if (rc >= 0) { for (i = 0 ; i < (int)num && i < rc ; i++) { if (ovec[i*2] < 0) { output[i] = NULL; } else { size_t l = ovec[i*2 + 1] - ovec[i*2]; if (ovec[i*2+1] == ovec[i*2]) { ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: cannot handle copying NULL string, aborting."); abort(); } output[i] = critmalloc(l + 1, "pcre_extract"); (void)memcpy(output[i], input + ovec[i*2], l); output[i][l] = '\0'; } } } free(ovec); pcre_free(p); return rc; } /* free a vector as allocated by pcre_extract * vec should be the same as output in pcre_extract * count should be the value obtained from pcre_extract * it is safe to pass a negative or zero count. * * vec must NOT be NULL unless count is zero or negative. */ void pcre_extract_free(char **vec, int count) { int i; for (i = 0; i < count; i++) { if (vec[i]) { free(vec[i]); vec[i] = NULL; } } } #ifdef TEST #define MAX 30 int debug = 0; int verbose = 0; /* test pcre_extract capturing. */ int main(int argc, char **argv) { char *out[MAX]; /* RATS: ignore */ int rc, n = MAX; if (argc < 3 || argc > 4) { fprintf(stderr, "usage: %s string PCRE [num]\n", argv[0]); exit(1); } if (argc == 4) { n = atoi(argv[3]); if (n > MAX) { n = MAX; printf("warning: clamping max from %d to %d\n", n, MAX); } } rc = pcre_extract(argv[1], argv[2], out, argc == 4 ? atoi(argv[3]) : MAX); printf("pcre_extract returned %d\n", rc); if (rc >= 0) { int i; for(i = 0; i < rc; i++) { printf("substring #%d: \"%s\"\n", i, out[i] ? out[i] : "(NULL)"); } } pcre_extract_free(out, rc); exit(0); } #endif