/* pcre_extract.c -- compile and exec a PCRE and return captured strings
 * (C) 2002 by Matthias Andree <matthias.andree@gmx.de>
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU Lesser General Public License as
 * published by the Free Software Foundation.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library (look for the COPYING.LGPL file); if
 * not, write to the Free Software Foundation, Inc., 59 Temple Place,
 * Suite 330, Boston, MA 02111-1307 USA
 */

#include <pcre.h>
#include <string.h>
#include <limits.h>
#ifdef TEST
#include <stdio.h>
#endif

#include "config.h"
#include "critmem.h"
#include "ln_log.h"
#include "leafnode.h"
#include "strlcpy.h"

/* This function compiles and executes the regular expression in
 * "pattern" against the string in "input" and can optionally put the
 * matching string and captured substring into the output vector.
 *
 * Output may be NULL if and only if num is zero.
 *
 * If num is nonzero, the matching substring is strdup()ed into
 * output[0]. If num is > 1, pcre_extract copies the first num - 1
 * captured substrings into output[...]. If a capturing subpattern did
 * not match, the corresponding output[] element is NULL.
 *
 * The return value is the same as that of pcre_exec. Short: 0 if num
 * too small to hold all captured strings, otherwise the number of
 * captured patterns, where the matching string is always captured, or
 * the PCRE error code.
 *
 * WARNING: if compiling the pattern fails, the program logs this
 * condition and aborts. This function is NOT safe to be used with
 * user-accessible patterns. Use constant patterns.
 *
 * You MUST free() the valid parts in your output[] vector yourself. You
 * can use pcre_extract_free (which see) to do this.
 */
int pcre_extract(const char *input, const char *pattern, char **output, size_t num)
{
    const char *errstr;
    int errpos, rc, i;
    const int options = PCRE_CASELESS|PCRE_DOTALL|PCRE_MULTILINE;
    int ovecs = (num + 1) * 3;
    int *ovec;
    pcre *p;

    if (num > INT_MAX / 3) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: array size too large, aborting.");
	abort();
    }

    ovec = (int *)critmalloc(ovecs * sizeof(int), "pcre_extract");

    p = pcre_compile(pattern, options, &errstr, &errpos
#ifdef NEW_PCRE_COMPILE
	    , NULL
#endif
	    );

    if (!p) {
	ln_log(LNLOG_SERR, LNLOG_CTOP, "pcre_extract: cannot compile \"%s\": %s at pos. #%d", pattern, errstr, errpos);
	free(ovec);
	return -1;
    }

    rc = pcre_exec(p, NULL, input, strlen(input), 
#ifdef NEW_PCRE_EXEC
	    0,
#endif
	    0, ovec, ovecs);

    if (rc >= 0) {
	for (i = 0 ; i < (int)num && i < rc ; i++) {
	    if (ovec[i*2] < 0) {
		output[i] = NULL;
	    } else {
		size_t l = ovec[i*2 + 1] - ovec[i*2];
		if (ovec[i*2+1] == ovec[i*2]) {
		    ln_log(LNLOG_SERR, LNLOG_CTOP,
			    "pcre_extract: cannot handle copying NULL string, aborting.");
		    abort();
		}
		output[i] = critmalloc(l + 1, "pcre_extract");
		(void)memcpy(output[i], input + ovec[i*2], l);
		output[i][l] = '\0';
	    }
	}
    }
    free(ovec);
    pcre_free(p);
    return rc;
}

/* free a vector as allocated by pcre_extract
 * vec should be the same as output in pcre_extract
 * count should be the value obtained from pcre_extract
 * it is safe to pass a negative or zero count.
 *   
 * vec must NOT be NULL unless count is zero or negative.
 */
void pcre_extract_free(char **vec, int count)
{
    int i;

    for (i = 0; i < count; i++) {
	if (vec[i]) {
	    free(vec[i]);
	    vec[i] = NULL;
	}
    }
}

#ifdef TEST
#define MAX 30

int debug = 0;
int verbose = 0;

/* test pcre_extract capturing. */
int main(int argc, char **argv)
{
    char *out[MAX]; /* RATS: ignore */
    int rc, n = MAX;

    if (argc < 3 || argc > 4) {
	fprintf(stderr, "usage: %s string PCRE [num]\n", argv[0]);
	exit(1);
    }

    if (argc == 4) {
	n = atoi(argv[3]);
	if (n > MAX) {
	    n = MAX;
	    printf("warning: clamping max from %d to %d\n", n, MAX);
	}
    }
    rc = pcre_extract(argv[1], argv[2], out, argc == 4 ? atoi(argv[3]) : MAX);
    printf("pcre_extract returned %d\n", rc);
    if (rc >= 0) {
	int i;
	for(i = 0; i < rc; i++) {
	    printf("substring #%d: \"%s\"\n", i, out[i] ? out[i] : "(NULL)");
	}
    }
    pcre_extract_free(out, rc);

    exit(0);
}
#endif


syntax highlighted by Code2HTML, v. 0.9.1