/*
PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
Copyright (C) 1999-2006 Markus Mottl
email: markus.mottl@gmail.com
WWW: http://www.ocaml.info
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* $Id: pcre_stubs.c,v 1.23 2005/06/08 23:42:14 mottl Exp $ */
#if defined(_WIN32) && defined(_DLL)
# define PCREextern __declspec(dllexport)
#else
# define PCREextern
#endif
#include <ctype.h>
#include <string.h>
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include <caml/memory.h>
#include <caml/fail.h>
#include <caml/callback.h>
#include <pcre.h>
typedef const unsigned char *chartables; /* Type of chartable sets */
/* Contents of callout data */
struct cod {
value *v_substrings_p; /* Pointer to substrings matched so far */
value *v_cof_p; /* Pointer to callout function */
value v_exn; /* Possible exception raised by callout function */
};
/* Cache for exceptions */
static value *pcre_exc_Not_found = NULL; /* Exception [Not_found] */
static value *pcre_exc_Partial = NULL; /* Exception [Partial] */
static value *pcre_exc_BadPartial = NULL; /* Exception [BadPartial] */
static value *pcre_exc_BadPattern = NULL; /* Exception [BadPattern] */
static value *pcre_exc_BadUTF8 = NULL; /* Exception [BadUTF8] */
static value *pcre_exc_BadUTF8Offset = NULL; /* Exception [BadUTF8Offset] */
static value *pcre_exc_InternalError = NULL; /* Exception [InternalError] */
static value *pcre_exc_MatchLimit = NULL; /* Exception [MatchLimit] */
static value *pcre_exc_Backtrack = NULL; /* Exception [Backtrack] */
/* Cache for polymorphic variants */
static value var_Start_only; /* Variant [`Start_only] */
static value var_ANCHORED; /* Variant [`ANCHORED] */
static value var_Char; /* Variant [`Char char] */
static value var_Not_studied; /* Variant [`Not_studied] */
static value var_Studied; /* Variant [`Studied] */
static value var_Optimal; /* Variant [`Optimal] */
static value None = Val_int(0);
/* Callout handler */
static int pcre_callout_handler(pcre_callout_block* cb)
{
struct cod *cod = (struct cod *) cb->callout_data;
if (cod != NULL) {
/* Callout is available */
value v_res;
/* Set up parameter array */
value v_callout_data = caml_alloc_small(6, 0);
const value v_substrings = *cod->v_substrings_p;
const int capture_top = cb->capture_top;
int subgroups2 = capture_top << 1;
const int subgroups2_1 = subgroups2 - 1;
const int *ovec_src = cb->offset_vector + subgroups2_1;
long int *ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
/* Copy preliminary substring information */
while (subgroups2--) {
*ovec_dst = Val_int(*ovec_src);
--ovec_src; --ovec_dst;
}
Field(v_callout_data, 0) = Val_int(cb->callout_number);
Field(v_callout_data, 1) = v_substrings;
Field(v_callout_data, 2) = Val_int(cb->start_match);
Field(v_callout_data, 3) = Val_int(cb->current_position);
Field(v_callout_data, 4) = Val_int(capture_top);
Field(v_callout_data, 5) = Val_int(cb->capture_last);
Field(v_callout_data, 6) = Val_int(cb->pattern_position);
Field(v_callout_data, 7) = Val_int(cb->next_item_length);
/* Perform callout */
v_res = callback_exn(*cod->v_cof_p, v_callout_data);
if (Is_exception_result(v_res)) {
/* Callout raised an exception */
const value v_exn = Extract_exception(v_res);
if (Field(v_exn, 0) == *pcre_exc_Backtrack) return 1;
cod->v_exn = v_exn;
return PCRE_ERROR_CALLOUT;
}
}
return 0;
}
/* Fetchs the named OCaml-values + caches them and
calculates + caches the variant hash values */
CAMLprim value pcre_ocaml_init(value unit)
{
pcre_exc_Not_found = caml_named_value("Pcre.Not_found");
pcre_exc_Partial = caml_named_value("Pcre.Partial");
pcre_exc_BadPartial = caml_named_value("Pcre.BadPartial");
pcre_exc_BadPattern = caml_named_value("Pcre.BadPattern");
pcre_exc_BadUTF8 = caml_named_value("Pcre.BadUTF8");
pcre_exc_InternalError = caml_named_value("Pcre.InternalError");
pcre_exc_MatchLimit = caml_named_value("Pcre.MatchLimit");
pcre_exc_Backtrack = caml_named_value("Pcre.Backtrack");
var_Start_only = hash_variant("Start_only");
var_ANCHORED = hash_variant("ANCHORED");
var_Char = hash_variant("Char");
var_Not_studied = hash_variant("Not_studied");
var_Studied = hash_variant("Studied");
var_Optimal = hash_variant("Optimal");
pcre_callout = &pcre_callout_handler;
return Val_unit;
}
/* Finalizing deallocation function for chartable sets */
static void pcre_dealloc_tables(value v_table)
{ (pcre_free)((void *) Field(v_table, 1)); }
/* Finalizing deallocation function for compiled regular expressions */
static void pcre_dealloc_regexp(value v_rex)
{
void *extra = (void *) Field(v_rex, 2);
(pcre_free)((void *) Field(v_rex, 1));
if (extra != NULL) (pcre_free)(extra);
}
/* Raises exceptions which take two arguments */
static void raise_with_two_args(value tag, value arg1, value arg2)
{
value v_exc;
/* Protects tag, arg1 and arg2 from being reclaimed by the garbage
collector when the exception value is allocated */
Begin_roots3(tag, arg1, arg2);
v_exc = caml_alloc_small(3, 0);
Field(v_exc, 0) = tag;
Field(v_exc, 1) = arg1;
Field(v_exc, 2) = arg2;
End_roots();
caml_raise(v_exc);
}
/* Makes OCaml-string from PCRE-version */
CAMLprim value pcre_version_stub(value unit) {
return caml_copy_string((char *) pcre_version());
}
/* Makes compiled regular expression from compilation options, an optional
value of chartables and the pattern string */
CAMLprim value pcre_compile_stub(value v_opt, value v_tables, value v_pat)
{
value v_rex; /* Final result -> value of type [regexp] */
const char *error = NULL; /* pointer to possible error message */
int error_ofs = 0; /* offset in the pattern at which error occurred */
/* If v_tables = [None], then pointer to tables is NULL, otherwise
set it to the appropriate value */
chartables tables =
(v_tables == None) ? NULL : (chartables) Field(Field(v_tables, 0), 1);
/* Compiles the pattern */
pcre *regexp = pcre_compile(String_val(v_pat), Int_val(v_opt), &error,
&error_ofs, tables);
/* Raises appropriate exception [BadPattern] if the pattern could not
be compiled */
if (regexp == NULL) raise_with_two_args(*pcre_exc_BadPattern,
caml_copy_string((char *) error),
Val_int(error_ofs));
/* Finalized value: GC will do a full cycle every 500 regexp allocations
(one regexp consumes in average probably less than 100 bytes ->
maximum of 50000 bytes unreclaimed regexps) */
v_rex = caml_alloc_final(4, pcre_dealloc_regexp, 100, 50000);
/* Field[1]: compiled regular expression (Field[0] is finalizing
function! See above!) */
Field(v_rex, 1) = (value) regexp;
/* Field[2]: extra information about regexp when it has been studied
successfully */
Field(v_rex, 2) = (value) NULL;
/* Field[3]: If 0 -> regexp has not yet been studied
1 -> regexp has already been studied */
Field(v_rex, 3) = 0;
return v_rex;
}
/* Studies a regexp */
CAMLprim value pcre_study_stub(value v_rex)
{
/* If it has not yet been studied */
if (! (int) Field(v_rex, 3)) {
const char *error = NULL;
pcre_extra *extra = pcre_study((pcre *) Field(v_rex, 1), 0, &error);
if (error != NULL) invalid_argument((char *) error);
Field(v_rex, 2) = (value) extra;
Field(v_rex, 3) = Val_int(1);
}
return v_rex;
}
/* Sets a match limit for a regular expression imperatively */
CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, value v_lim){
pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
if (extra == NULL) {
extra = pcre_malloc(sizeof(pcre_extra));
extra->flags = PCRE_EXTRA_MATCH_LIMIT;
Field(v_rex, 2) = (value) extra;
}
else {
unsigned long int *flags_ptr = &extra->flags;
*flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr;
}
extra->match_limit = Int_val(v_lim);
return v_rex;
}
/* Gets the match limit of a regular expression if it exists */
CAMLprim value pcre_get_match_limit_stub(value v_rex){
pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
if (extra == NULL) return None;
if (extra->flags & PCRE_EXTRA_MATCH_LIMIT) {
value lim = Val_int(extra->match_limit);
value res = caml_alloc_small(1, 0);
Field(res, 0) = lim;
return res;
}
return None;
}
/* Performs the call to the pcre_fullinfo function */
static value pcre_fullinfo_stub(value v_rex, int what, void *where)
{
return pcre_fullinfo((pcre *) Field(v_rex, 1), (pcre_extra *) Field(v_rex, 2),
what, where);
}
/* Some stubs for info-functions */
/* Generic macro for getting integer results from pcre_fullinfo */
#define make_int_info(name, option) \
CAMLprim value pcre_##name##_stub(value v_rex) \
{ \
int options; \
const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_##option, &options); \
if (ret != 0) \
raise_with_string(*pcre_exc_InternalError, "pcre_##name##_stub"); \
return Val_int(options); \
}
make_int_info(options, OPTIONS)
make_int_info(size, SIZE)
make_int_info(studysize, STUDYSIZE)
make_int_info(capturecount, CAPTURECOUNT)
make_int_info(backrefmax, BACKREFMAX)
make_int_info(namecount, NAMECOUNT)
make_int_info(nameentrysize, NAMEENTRYSIZE)
CAMLprim value pcre_firstbyte_stub(value v_rex)
{
int firstbyte;
const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte);
if (ret != 0) raise_with_string(*pcre_exc_InternalError,
"pcre_firstbyte_stub");
switch (firstbyte) {
case -1 : return var_Start_only; break; /* [`Start_only] */
case -2 : return var_ANCHORED; break; /* [`ANCHORED] */
default :
if (firstbyte < 0 ) /* Should not happen */
raise_with_string(*pcre_exc_InternalError, "pcre_firstbyte_stub");
else {
value v_firstbyte;
/* Allocates the non-constant constructor [`Char of char] and fills
in the appropriate value */
v_firstbyte = caml_alloc_small(2, 0);
Field(v_firstbyte, 0) = var_Char;
Field(v_firstbyte, 1) = Val_int(firstbyte);
return v_firstbyte;
}
}
}
CAMLprim value pcre_firsttable_stub(value v_rex)
{
const unsigned char *ftable;
int ret =
pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable);
if (ret != 0) raise_with_string(*pcre_exc_InternalError,
"pcre_firsttable_stub");
if (ftable == NULL) return None;
else {
value v_res, v_res_str;
char *ptr;
int i;
Begin_roots1(v_rex);
v_res_str = caml_alloc_string(32);
End_roots();
ptr = String_val(v_res_str);
for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; }
Begin_roots1(v_res_str);
/* Allocates [Some string] from firsttable */
v_res = caml_alloc_small(1, 0);
End_roots();
Field(v_res, 0) = v_res_str;
return v_res;
}
}
CAMLprim value pcre_lastliteral_stub(value v_rex)
{
int lastliteral;
const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL,
&lastliteral);
if (ret != 0) raise_with_string(*pcre_exc_InternalError,
"pcre_lastliteral_stub");
if (lastliteral == -1) return None;
if (lastliteral < 0) raise_with_string(*pcre_exc_InternalError,
"pcre_lastliteral_stub");
else {
/* Allocates [Some char] */
value v_res = caml_alloc_small(1, 0);
Field(v_res, 0) = Val_int(lastliteral);
return v_res;
}
}
CAMLprim value pcre_study_stat_stub(value v_rex)
{
/* Generates the appropriate constant constructor [`Optimal] or
[`Studied] if regexp has already been studied */
if (Field(v_rex, 3))
return ((pcre_extra *) Field(v_rex, 2) == NULL) ? var_Optimal : var_Studied;
return var_Not_studied; /* otherwise [`Not_studied] */
}
/* Executes a pattern match with runtime options, a regular expression, a
string offset, a string length, a subject string, a number of subgroup
offsets, an offset vector and an optional callout function */
CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_ofs,
value v_subj, value v_subgroups2, value v_ovec,
value v_maybe_cof)
{
const int ofs = Int_val(v_ofs), len = string_length(v_subj);
if (ofs > len || ofs < 0)
invalid_argument("Pcre.pcre_exec_stub: illegal offset");
{
const pcre *code = (pcre *) Field(v_rex, 1); /* Compiled pattern */
const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2); /* Extra info */
const char *ocaml_subj = String_val(v_subj); /* Subject string */
const int opt = Int_val(v_opt); /* Runtime options */
int subgroups2 = Int_val(v_subgroups2);
const int subgroups2_1 = subgroups2 - 1;
const int subgroups3 = (subgroups2 >> 1) + subgroups2;
/* Special case when no callout functions specified */
if (v_maybe_cof == None) {
int *ovec = (int *) &Field(v_ovec, 0);
/* Performs the match */
const int ret =
pcre_exec(code, extra, ocaml_subj, len, ofs, opt, ovec, subgroups3);
if (ret < 0) {
switch(ret) {
case PCRE_ERROR_NOMATCH : raise_constant(*pcre_exc_Not_found);
case PCRE_ERROR_PARTIAL : raise_constant(*pcre_exc_Partial);
case PCRE_ERROR_MATCHLIMIT : raise_constant(*pcre_exc_MatchLimit);
case PCRE_ERROR_BADPARTIAL : raise_constant(*pcre_exc_BadPartial);
case PCRE_ERROR_BADUTF8 : raise_constant(*pcre_exc_BadUTF8);
case PCRE_ERROR_BADUTF8_OFFSET :
raise_constant(*pcre_exc_BadUTF8Offset);
default :
raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
}
}
else {
const int *ovec_src = ovec + subgroups2_1;
long int *ovec_dst = (long int *) ovec + subgroups2_1;
/* Converts offsets from C-integers to OCaml-Integers
This is a bit tricky, because there are 32- and 64-bit platforms
around and OCaml chooses the larger possibility for representing
integers when available (also in arrays) - not so the PCRE */
while (subgroups2--) {
*ovec_dst = Val_int(*ovec_src);
--ovec_src; --ovec_dst;
}
}
}
/* There are callout functions */
else {
value v_cof = Field(v_maybe_cof, 0);
value v_substrings;
char *subj = malloc(sizeof(char) * len);
int *ovec = malloc(sizeof(int) * subgroups3);
int ret;
struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL };
struct pcre_extra new_extra = { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL };
memcpy(subj, ocaml_subj, len);
Begin_roots3(v_rex, v_cof, v_substrings);
Begin_roots2(v_subj, v_ovec);
v_substrings = caml_alloc_small(2, 0);
End_roots();
Field(v_substrings, 0) = v_subj;
Field(v_substrings, 1) = v_ovec;
cod.v_substrings_p = &v_substrings;
cod.v_cof_p = &v_cof;
new_extra.callout_data = &cod;
if (extra == NULL) {
ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
subgroups3);
}
else {
new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
new_extra.study_data = extra->study_data;
new_extra.match_limit = extra->match_limit;
ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
subgroups3);
}
free(subj);
End_roots();
if (ret < 0) {
free(ovec);
switch(ret) {
case PCRE_ERROR_NOMATCH : raise_constant(*pcre_exc_Not_found);
case PCRE_ERROR_PARTIAL : raise_constant(*pcre_exc_Partial);
case PCRE_ERROR_MATCHLIMIT : raise_constant(*pcre_exc_MatchLimit);
case PCRE_ERROR_BADPARTIAL : raise_constant(*pcre_exc_BadPartial);
case PCRE_ERROR_BADUTF8 : raise_constant(*pcre_exc_BadUTF8);
case PCRE_ERROR_BADUTF8_OFFSET :
raise_constant(*pcre_exc_BadUTF8Offset);
case PCRE_ERROR_CALLOUT : caml_raise(cod.v_exn);
default :
raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
}
}
else {
int *ovec_src = ovec + subgroups2_1;
long int *ovec_dst = &Field(v_ovec, 0) + subgroups2_1;
while (subgroups2--) {
*ovec_dst = Val_int(*ovec_src);
--ovec_src; --ovec_dst;
}
free(ovec);
}
}
}
return Val_unit;
}
/* Byte-code hook for pcre_exec_stub
Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int argn)
{
return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
argv[4], argv[5], argv[6]);
}
/* Generates a new set of chartables for the current locale (see man
page of PCRE */
CAMLprim value pcre_maketables_stub(value unit)
{
/* GC will do a full cycle every 100 table set allocations
(one table set consumes 864 bytes -> maximum of 86400 bytes
unreclaimed table sets) */
const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 864, 86400);
Field(v_res, 1) = (value) pcre_maketables();
return v_res;
}
/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
return Val_bool(isspace(Int_val(v_c)));
}
/* Returns number of substring associated with a name */
CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
{
const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
String_val(v_name));
if (ret == PCRE_ERROR_NOSUBSTRING) invalid_argument("Named string not found");
return Val_int(ret);
}
/* Generic stub for getting integer results from pcre_config */
static int pcre_config_int(int what)
{
int ret;
pcre_config(what, (void *) &ret);
return ret;
}
/* Some stubs for config-functions */
/* Returns boolean indicating UTF8-support */
CAMLprim value pcre_config_utf8_stub(value unit)
{ return Val_bool(pcre_config_int(PCRE_CONFIG_UTF8)); }
/* Returns character used as newline */
CAMLprim value pcre_config_newline_stub(value unit)
{ return Val_int(pcre_config_int(PCRE_CONFIG_NEWLINE)); }
/* Returns number of bytes used for internal linkage of regular expressions */
CAMLprim value pcre_config_link_size_stub(value unit)
{ return Val_int(pcre_config_int(PCRE_CONFIG_LINK_SIZE)); }
/* Returns default limit for calls to internal matching function */
CAMLprim value pcre_config_match_limit_stub(value unit)
{ return Val_int(pcre_config_int(PCRE_CONFIG_MATCH_LIMIT)); }
/* Returns boolean indicating use of stack recursion */
CAMLprim value pcre_config_stackrecurse_stub(value unit)
{ return Val_bool(pcre_config_int(PCRE_CONFIG_STACKRECURSE)); }
syntax highlighted by Code2HTML, v. 0.9.1