/* pcre_extract.c -- compile and exec a PCRE and return captured strings
* (C) 2002 by Matthias Andree <matthias.andree@gmx.de>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU Lesser General Public License as
* published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library (look for the COPYING.LGPL file); if
* not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*/
#include <pcre.h>
#include <string.h>
#include <limits.h>
#ifdef TEST
#include <stdio.h>
#endif
#include "config.h"
#include "critmem.h"
#include "ln_log.h"
#include "leafnode.h"
#include "strlcpy.h"
/* This function compiles and executes the regular expression in
* "pattern" against the string in "input" and can optionally put the
* matching string and captured substring into the output vector.
*
* Output may be NULL if and only if num is zero.
*
* If num is nonzero, the matching substring is strdup()ed into
* output[0]. If num is > 1, pcre_extract copies the first num - 1
* captured substrings into output[...]. If a capturing subpattern did
* not match, the corresponding output[] element is NULL.
*
* The return value is the same as that of pcre_exec. Short: 0 if num
* too small to hold all captured strings, otherwise the number of
* captured patterns, where the matching string is always captured, or
* the PCRE error code.
*
* WARNING: if compiling the pattern fails, the program logs this
* condition and aborts. This function is NOT safe to be used with
* user-accessible patterns. Use constant patterns.
*
* You MUST free() the valid parts in your output[] vector yourself. You
* can use pcre_extract_free (which see) to do this.
*/
int pcre_extract(const char *input, const char *pattern, char **output, size_t num)
{
const char *errstr;
int errpos, rc, i;
const int options = PCRE_CASELESS|PCRE_DOTALL|PCRE_MULTILINE;
int ovecs = (num + 1) * 3;
int *ovec;
pcre *p;
if (num > INT_MAX / 3) {
ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: array size too large, aborting.");
abort();
}
ovec = (int *)critmalloc(ovecs * sizeof(int), "pcre_extract");
p = pcre_compile(pattern, options, &errstr, &errpos
#ifdef NEW_PCRE_COMPILE
, NULL
#endif
);
if (!p) {
ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: cannot compile \"%s\": %s at pos. #%d", pattern, errstr, errpos);
free(ovec);
return -1;
}
rc = pcre_exec(p, NULL, input, strlen(input),
#ifdef NEW_PCRE_EXEC
0,
#endif
0, ovec, ovecs);
if (rc >= 0) {
for (i = 0 ; i < (int)num && i < rc ; i++) {
if (ovec[i*2] < 0) {
output[i] = NULL;
} else {
size_t l = ovec[i*2 + 1] - ovec[i*2];
if (ovec[i*2+1] == ovec[i*2]) {
ln_log(LNLOG_SERR, LNLOG_CTOP,
"pcre_extract: cannot handle copying NULL string, aborting.");
abort();
}
output[i] = critmalloc(l + 1, "pcre_extract");
(void)memcpy(output[i], input + ovec[i*2], l);
output[i][l] = '\0';
}
}
}
free(ovec);
pcre_free(p);
return rc;
}
/* free a vector as allocated by pcre_extract
* vec should be the same as output in pcre_extract
* count should be the value obtained from pcre_extract
* it is safe to pass a negative or zero count.
*
* vec must NOT be NULL unless count is zero or negative.
*/
void pcre_extract_free(char **vec, int count)
{
int i;
for (i = 0; i < count; i++) {
if (vec[i]) {
free(vec[i]);
vec[i] = NULL;
}
}
}
#ifdef TEST
#define MAX 30
int debug = 0;
int verbose = 0;
/* test pcre_extract capturing. */
int main(int argc, char **argv)
{
char *out[MAX]; /* RATS: ignore */
int rc, n = MAX;
if (argc < 3 || argc > 4) {
fprintf(stderr, "usage: %s string PCRE [num]\n", argv[0]);
exit(1);
}
if (argc == 4) {
n = atoi(argv[3]);
if (n > MAX) {
n = MAX;
printf("warning: clamping max from %d to %d\n", n, MAX);
}
}
rc = pcre_extract(argv[1], argv[2], out, argc == 4 ? atoi(argv[3]) : MAX);
printf("pcre_extract returned %d\n", rc);
if (rc >= 0) {
int i;
for(i = 0; i < rc; i++) {
printf("substring #%d: \"%s\"\n", i, out[i] ? out[i] : "(NULL)");
}
}
pcre_extract_free(out, rc);
exit(0);
}
#endif
syntax highlighted by Code2HTML, v. 0.9.1