#if PY_VERSION_HEX < 0x02020000 || !defined(Py_USING_UNICODE)
#error "Python 2.2+ with unicode support required"
#endif

/* Static Definitions */
#define YYEMPTY -2
#define YYERROR -1
#define YYEOF 0
#define YYINITDEPTH 1000
#define LEXER_INITIAL_BACKTRACKS 20

/* Parsing objects */
typedef struct {
  PyObject_HEAD
  int verbose;
  PyObject *dict;
} parserobject;

typedef struct {
  PyObject *text;
  int last;
  int state;
  Py_UNICODE *end;
  Py_UNICODE *position;

  /* backtracking stack */
  int backtracks;
  Py_UNICODE **positions;
  int allocated;
} lexerobject;

static int parser_yylex(parserobject *, lexerobject *, PyObject **);
static lexerobject *lexer_new(PyObject *);
static void lexer_free(lexerobject *);
static int lexer_save_position(lexerobject *);
static Py_UNICODE *lexer_restore_position(lexerobject *);
static int lexer_charset(parserobject *, Py_UCS4 *, Py_UCS4, int);
static int lexer_match(parserobject *, lexerobject *, Py_UCS4 *);
static void lexer_error(lexerobject *);

static char *unicode_escape(Py_UNICODE *, int);
static PyObject *report_error(int state, PyObject *lval, lexerobject *lexer);
static void print_reduce(int ruleno);
static void print_state_stack(int *stack, int *end);


/* Parser Methods */


#define TRACE(...) if (self->verbose) PySys_WriteStderr(__VA_ARGS__)

#ifdef LEXER_DEBUG
#define REGEX_TRACE(...) TRACE(__VA_ARGS__)
#else
#define REGEX_TRACE(...)
#endif

static char parser_parse_doc[] = "\
parse(string) -> object\n\
Converts the given string to a parse tree and return the top-most\n\
element of the tree.";

static PyObject* parser_parse(register parserobject *self, PyObject *args)
{
  PyObject *text;

  register int yystate;
  register int yyn;

  PyObject *yylval = NULL;
  PyObject *yyval = NULL;

  int state_stack[YYINITDEPTH];
  int *state_ptr;
  PyObject *value_stack[YYINITDEPTH];
  PyObject **value_ptr;

  int yylen;
  int yychar = YYEMPTY;         /* cause a token to be read */
  int yychar1 = 0;

  lexerobject *lexer;

  if (!PyArg_ParseTuple(args, "O:parse", &text))
    return NULL;

  lexer = lexer_new(text);
  if (lexer == NULL)
    return NULL;

  TRACE("Starting parse\n");

  /* Initialize stack pointers
     Waste one element of value and location stack
     so that they stay on the same level as the state stack.
     The wasted elements are never initialized. */
  state_ptr = state_stack - 1;
  value_ptr = value_stack;

  yystate = 0;

  while (1) {
    /* Push a new state, which is found in yystate. */
    /* In all cases, when you get here, the value and location stacks
       have just been pushed.  So pushing a state here evens the stacks. */
    *++state_ptr = yystate;

    TRACE("Entering state %d\n", yystate);

    /* Do appropriate processing given the current state. */
    /* Read a lookahead token if we need one and don't already have one. */

    /* First try to decide what to do without reference to lookahead token. */
    yyn = action_idx[yystate];
    if (yyn == YYFLAG) {
      yyn = default_action[yystate];
      if (yyn == 0) {
        return report_error(yystate, yylval, lexer);
      }

      /* Do a reduction. yyn is the number of a rule to reduce with. */
      if (self->verbose) print_reduce(yyn);

      yylen = rhs_size[yyn];
      state_ptr -= yylen;
      value_ptr -= yylen;
      if (yylen > 0) yyval = value_ptr[1];

      /* Action routines */
      switch (yyn) {
$$parser
      }
      if (!yyval) {
        lexer_free(lexer);
        return NULL;
      }
      *++value_ptr = yyval;

      if (self->verbose) print_state_stack(state_stack, state_ptr);


      /* Now "shift" the result of the reduction. */
      /* Determine what state that goes to, based on the state
         we popped back to and the rule number reduced by. */

      yyn = derives[yyn] - YYNTBASE;
      yystate = goto_idx[yyn] + *state_ptr;
      if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *state_ptr) {
        yystate = yytable[yystate];
      } else {
        yystate = default_goto[yyn];
      }
      continue;
    } /* if (yyn == YYFLAG) */

    /* Not known => get a lookahead token if don't already have one. */

    /* yychar is either YYEMPTY, YYEOF or a valid token in external form */
    if (yychar == YYEMPTY) {
      TRACE("Reading a token: ");
      yychar = parser_yylex(self, lexer, &yylval);
    }

    /* Convert token to internal form (in yychar1) for indexing tables with */

    if (yychar <= 0) {
      if (yychar == YYERROR) {
        lexer_free(lexer);
        return NULL;
      }
      /* This means end-of-input. */
      yychar1 = 0;
      TRACE("Now at end of input.\n");
    } else {
      yychar1 = YYTRANSLATE(yychar);
      TRACE("Next token is %d (%s)\n", yychar, token_names[yychar1]);
      yyn += yychar1;
    }
    if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) {
      /* comes here after end of input */
      yyn = default_action[yystate];
      if (yyn == 0) {
        return report_error(yystate, NULL, lexer);
      }

      /* Do a reduction. yyn is the number of a rule to reduce with. */
      if (self->verbose) print_reduce(yyn);

      yylen = rhs_size[yyn];
      state_ptr -= yylen;
      value_ptr -= yylen;
      if (yylen > 0) yyval = value_ptr[1];

      /* Action routines */
      switch (yyn) {
$$parser
      }
      if (!yyval) {
        lexer_free(lexer);
        return NULL;
      }
      *++value_ptr = yyval;

      if (self->verbose) print_state_stack(state_stack, state_ptr);

      /* Now "shift" the result of the reduction. */
      /* Determine what state that goes to, based on the state
         we popped back to and the rule number reduced by. */

      yyn = derives[yyn] - YYNTBASE;
      yystate = goto_idx[yyn] + *state_ptr;
      if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *state_ptr) {
        yystate = yytable[yystate];
      } else {
        yystate = default_goto[yyn];
      }
      continue;
    }

    yyn = yytable[yyn];

    /* yyn is what to do for this token type in this state.
       Negative => reduce, -yyn is rule number.
       Positive => shift, yyn is new state.
         New state is final state => don't bother to shift
         just return success.
       0, or max negative number => error. */

    if (yyn > YYFLAG && yyn < 0) {
      yyn = -yyn;

      /* Do a reduction. yyn is the number of a rule to reduce with. */
      if (self->verbose) print_reduce(yyn);

      yylen = rhs_size[yyn];
      state_ptr -= yylen;
      value_ptr -= yylen;
      if (yylen > 0) yyval = value_ptr[1];

      /* Action routines */
      switch (yyn) {
$$parser
      }
      if (!yyval) {
        lexer_free(lexer);
        return NULL;
      }
      *++value_ptr = yyval;

      if (self->verbose) print_state_stack(state_stack, state_ptr);

      /* Now "shift" the result of the reduction. */
      /* Determine what state that goes to, based on the state
         we popped back to and the rule number reduced by. */

      yyn = derives[yyn] - YYNTBASE;
      yystate = goto_idx[yyn] + *state_ptr;
      if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *state_ptr) {
        yystate = yytable[yystate];
      } else {
        yystate = default_goto[yyn];
      }
      continue;
    } else if (yyn == YYFINAL) {
      /* Hooray!  Process complete. */
      lexer_free(lexer);
      return value_ptr[-1];
    } else if (yyn <= 0) {
      /* Now it is either 0 or YYFLAG */
      return report_error(yystate, yylval, lexer);
    }

    /* Shift the lookahead token. */

    TRACE("Shifting token %d (%s), ", yychar, token_names[yychar1]);

    if (yychar != YYEOF) {
      yychar = YYEMPTY;
    }

    *++value_ptr = yylval;

    yystate = yyn;
    continue;
  }

  /* should never get here */
  Py_INCREF(Py_None);
  lexer_free(lexer);
  return Py_None;
}

/** lexer routines ****************************************************/

static lexerobject *lexer_new(PyObject *text)
{
  lexerobject *lexer;

  lexer = PyMem_New(lexerobject, 1);
  if (lexer == NULL) {
    PyErr_NoMemory();
    return NULL;
  }

  /* attempt to coerce given object to unicode using default rules */
  lexer->text = PyUnicode_FromObject(text);
  if (lexer->text == NULL) {
    PyMem_Free(lexer);
    return NULL;
  }
  lexer->position = PyUnicode_AS_UNICODE(lexer->text);
  lexer->end = lexer->position + PyUnicode_GET_SIZE(lexer->text);
  lexer->state = LEXER_INITIAL;

  /* create initial backtracking stack */
  lexer->positions = PyMem_New(Py_UNICODE *, LEXER_INITIAL_BACKTRACKS);
  if (lexer->positions == NULL) {
    PyErr_NoMemory();
    Py_DECREF(lexer->text);
    PyMem_Free(lexer);
    return NULL;
  }
  lexer->allocated = LEXER_INITIAL_BACKTRACKS;
  lexer->backtracks = 0;
  return lexer;
}

static void lexer_free(lexerobject *lexer)
{
  PyMem_Free(lexer->positions);
  Py_DECREF(lexer->text);
  PyMem_Free(lexer);
}

static int lexer_save_position(lexerobject *lexer) {
  Py_UNICODE **positions;
  size_t new_allocated;
  int allocated, newsize;

  /* Bypass realloc() when a previous overallocation is large enough
     to accommodate the newsize.
  */
  newsize = lexer->backtracks + 1;
  allocated = lexer->allocated;
  positions = lexer->positions;
  if (newsize >= allocated) {
    /* This over-allocates proportional to the list size, making room
     * for additional growth.  The over-allocation is mild, but is
     * enough to give linear-time amortized behavior over a long
     * sequence of appends() in the presence of a poorly-performing
     * system realloc().
     * The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
     */
    new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize;
    if (PyMem_Resize(positions, Py_UNICODE *, new_allocated) == NULL) {
      PyErr_NoMemory();
      return -1;
    }
    lexer->allocated = new_allocated;
    lexer->positions = positions;
  }
  lexer->positions[lexer->backtracks] = lexer->position;
  lexer->backtracks = newsize;
  return 0;
}

static Py_UNICODE *lexer_restore_position(lexerobject *lexer) {
  assert(lexer->backtracks > 0);
  lexer->position = lexer->positions[--lexer->backtracks];
  return lexer->position;
}

static int lexer_charset(parserobject *self, Py_UCS4 *set, Py_UCS4 ch,
			 int ok)
{
  unsigned char *charset;

  /* check if character is a member of the given set */
  /* Note, the tests are stored sorted to allow for quick exits */

  for (;;) {
    switch (*set++) {

    case LEXER_CHARSET_LITERAL:
      /* <LITERAL> <code> */
      REGEX_TRACE("CHARSET_LITERAL, %d == %d\n", ch, set[0]);
      if (ch < set[0])
        return !ok;
      else if ((Py_UCS4)ch == set[0])
        return ok;
      set++;
      break;

    case LEXER_CHARSET_RANGE:
      /* <RANGE> <lower> <upper> */
      REGEX_TRACE("CHARSET_RANGE, %d <= %d <= %d\n", set[0], ch, set[1]);
      if (ch < set[0])
        return !ok;
      else if (ch <= set[1])
        return ok;
      set += 2;
      break;

    case LEXER_CHARSET_SMALL:
      /* <SMALL> <charset index> */
      REGEX_TRACE("CHARSET_SMALL, index=%d\n", set[0]);
      charset = lexer_charsets[*set++];
      if (ch < 256 && (charset[ch >> 3] & (1 << (ch & 7))))
        return ok;
      break;

    case LEXER_CHARSET_BIG:
      /* <BIG> <blockmap index> */
      REGEX_TRACE("CHARSET_BIG, index=%d\n", set[0]);
      charset = lexer_charsets[lexer_blockmaps[*set++][ch >> 8]];
      if (ch < 65536 && charset[(ch & 255) >> 3] & (1 << (ch & 7)))
        return ok;
      break;

    case LEXER_CHARSET_FAILURE:
      /* nothing matched in charset */
      REGEX_TRACE("CHARSET_FAILURE\n");
      return !ok;

    default:
      REGEX_TRACE("**INTERNAL CHARSET ERROR**\n");
      return -1;
    }
  }
}

/* return values:
   1 -> sucessful match,
   0 -> no match,
   -1 -> error,
*/

#ifdef Py_UNICODE_WIDE
#define GET_CHAR_AND_ADVANCE() ch = *ptr++;
#else
#define GET_CHAR_AND_ADVANCE()                                          \
  if ((0xD800 <= ptr[0] && ptr[0] <= 0xDBFF) &&                         \
      (0xDC00 <= ptr[1] && ptr[1] <= 0xDFFF)) {                         \
    ch = (((ptr[0] & 0x03FF) << 10) | (ptr[1] & 0x03FF)) + 0x00010000;  \
    ptr += 2;                                                           \
  } else {                                                              \
    ch = *ptr++;                                                        \
  }
#endif  
    
static int lexer_match(parserobject *self, lexerobject *lexer, 
		       Py_UCS4 *pattern)
{
  Py_UNICODE *ptr = lexer->position;
  Py_UNICODE *end;
  Py_UCS4 ch;
  int i, count;

  REGEX_TRACE("LEXER_MATCH, position %d\n",
              lexer->position - PyUnicode_AS_UNICODE(lexer->text));

  while (1) {
    switch (*pattern++) {

    case LEXER_OP_FAILURE:
      /* immediate failure */
      REGEX_TRACE("OP_FAILURE\n");
      return 0;

    case LEXER_OP_SUCCESS:
      /* end of pattern */
      REGEX_TRACE("OP_SUCCESS\n");
      lexer->position = ptr;
      return 1;
        
    case LEXER_OP_BOL:
      /* beginning of line */
      /* <BOL> */
      REGEX_TRACE("OP_BOL\n");
      if (ptr == PyUnicode_AS_UNICODE(lexer->text) || ptr[-1] == '\n') 
        break;
      return 0;

    case LEXER_OP_EOL:
      /* end of line */
      /* <EOL> */
      REGEX_TRACE("OP_EOL\n");
      if (ptr >= lexer->end || ptr[0] == '\n') break;
      return 0;
            
    case LEXER_OP_EOF:
      /* end of file */
      /* <EOF> */
      REGEX_TRACE("OP_EOF\n");
      if (ptr >= lexer->end) break;
      return 0;

    case LEXER_OP_ANY:
      /* match anything (except a newline) */
      /* <ANY> */
      REGEX_TRACE("OP_ANY\n");
      if (ptr >= lexer->end || ptr[0] == '\n')
        return 0;
      ptr++;
      break;

    case LEXER_OP_LITERAL:
      /* match literal character */
      /* <LITERAL> <code> */
      if (ptr >= lexer->end)
        return 0;
      GET_CHAR_AND_ADVANCE();
      REGEX_TRACE("OP_LITERAL, %d == %d\n", ch, pattern[0]);
      if (ch != pattern[0])
        return 0;
      pattern++;
      break;

    case LEXER_OP_NOT_LITERAL:
      /* match anything that is not literal character */
      /* <NOT_LITERAL> <code> */
      if (ptr >= lexer->end)
        return 0;
      GET_CHAR_AND_ADVANCE();
      REGEX_TRACE("OP_NOT_LITERAL, %d != %d\n", ch, pattern[0]);
      if (ch == pattern[0])
        return 0;
      pattern++;
      break;

    case LEXER_OP_CHARSET:
      /* match set member */
      /* <CHARSET> <skip> <set> */
      if (ptr >= lexer->end)
        return 0;
      GET_CHAR_AND_ADVANCE();
      REGEX_TRACE("OP_CHARSET, skip %d\n", pattern[0]);
      i = lexer_charset(self, pattern + 1, ch, 1);
      if (i <= 0)
        return i;

      pattern += pattern[0];
      break;
            
    case LEXER_OP_NOT_CHARSET:
      /* match set non-member */
      /* <NOT_CHARSET> <skip> <set> */
      if (ptr >= lexer->end)
        return 0;
      GET_CHAR_AND_ADVANCE();
      REGEX_TRACE("OP_NOT_CHARSET, skip %d\n", pattern[0]);
      i = lexer_charset(self, pattern + 1, ch, 0);
      if (i <= 0)
        return i;

      pattern += pattern[0];
      break;
            
    case LEXER_OP_ASSERT:
      /* lookahead assertion */
      /* <ASSERT> <skip> <pattern> */
      REGEX_TRACE("OP_ASSERT, skip %d\n", pattern[0]);
      lexer->position = ptr;
      i = lexer_match(self, lexer, pattern + 1);
      if (i <= 0)
        return i;

      pattern += pattern[0];
      break;

    case LEXER_OP_BRANCH:
      /* alternation */
      /* <BRANCH> <skip0> <code0> <skip1> <code1> ... <NULL> */

      end = NULL;
      count = 0;
      while (pattern[0]) {
        /* reset start position each time through */
	REGEX_TRACE("OP_BRANCH %d, skip %d\n", count++, pattern[0]);
        lexer->position = ptr;
        
        i = lexer_match(self, lexer, pattern + 1);
        if (i < 0)
          return i;
        else if (i && lexer->position > end) 
          /* successful match which is longer than the current best matched */
          end = lexer->position;
        
        /* advance to the next pattern */
        pattern += pattern[0];
      }

      /* advance pattern past NULL */
      pattern++;

      /* advance to the best matching position if there was a match */
      if (end) {
        lexer->position = ptr = end;
        break;
      }

      return 0;

    case LEXER_OP_REPEAT:
      /* repetition */
      /* <REPEAT> <skip> <1=min> item */
      {
        Py_UCS4 *item = pattern + 2;
        Py_UCS4 *next = pattern + pattern[0];
        int minimum = pattern[1];
        int backtracks;

        lexer->position = ptr;
        for (count = 0, i = 1; i == 1 && count < minimum; count++) {
          REGEX_TRACE("OP_REPEAT<stage 1>, min %d, now %d\n",
                      minimum, count);
          i = lexer_match(self, lexer, item);
        }
        
        /* either internal error or failed minimum matches */
        if (i <= 0) 
          return i;

        backtracks = lexer->backtracks;

        /* match as many items as possible */
        for (; i == 1; count++) {
          REGEX_TRACE("OP_REPEAT<stage 2>, now %d\n", count);
          if (lexer_save_position(lexer) < 0)
            return -1;
          i = lexer_match(self, lexer, item);
        }
          
        if (i < 0) {
          /* internal error */
          lexer->backtracks = backtracks;
          return i;
        }
          
        /* backtracking assert of tail match until success */
        do {
          REGEX_TRACE("OP_REPEAT<stage 3>, now %d\n", count);
          /* update position to previous successful match */
          ptr = lexer_restore_position(lexer);
          if (ptr == NULL)
            return -1;
          i = lexer_match(self, lexer, next);
        } while (i == 0 && --count > minimum);

        /* discard remaining backtrack positions */
        lexer->backtracks = backtracks;

        if (i <= 0) {
          return i;
        }
        pattern = next;
      }
      break;

    case LEXER_OP_REPEAT_RANGE:
      /* repetition */
      /* <REPEAT_RANGE> <skip> <1=min> <2=max> item */
      {
        Py_UCS4 *item = pattern + 3;
        Py_UCS4 *next = pattern + pattern[0];
        int minimum = pattern[1];
        int maximum = pattern[2];
        int backtracks;

        lexer->position = ptr;
        for (count = 0, i = 1; i == 1 && count < minimum; count++) {
          REGEX_TRACE("OP_REPEAT_RANGE<stage 1>, min %d, now %d\n",
                      minimum, count);
          i = lexer_match(self, lexer, item);
        }
        
        /* either internal error or failed minimum matches */
        if (i <= 0) 
          return i;

        backtracks = lexer->backtracks;

        /* consume up to 'maximum' matches */
        for (; i == 1 && count < maximum; count++) {
          REGEX_TRACE("OP_REPEAT_RANGE<stage 2>, max %d, now %d\n",
                      maximum, count);
          if (lexer_save_position(lexer) < 0)
            return -1;
          i = lexer_match(self, lexer, item);
        }

        if (i < 0) {
          /* internal error */
          lexer->backtracks = backtracks;
          return i;
        }

        /* maximum matches reached, update saved position */
        if (i == 1)
          ptr = lexer->position;

        /* backtracking assert of tail match until success */
        do {
          REGEX_TRACE("OP_REPEAT_RANGE<stage 3>, now %d\n", count);
          if (i == 0) {
            /* update position to last successful match */
            ptr = lexer_restore_position(lexer);
            if (ptr == NULL)
              return -1;
          }
          i = lexer_match(self, lexer, next);
        } while (i == 0 && --count > minimum);

        /* discard remaining backtrack positions */
        lexer->backtracks = backtracks;

        if (i <= 0)
          return i;

        pattern = next;
      }
      break;

    default:
      REGEX_TRACE("**INTERNAL MATCH ERROR**\n");
      return -1;
    }
  }
}
static int parser_yylex(parserobject *self, lexerobject *lexer, PyObject **yylval)
{
  int yychar = YYEMPTY;
  int yylen;
  Py_UNICODE *yytext = lexer->position;
  
  while (yytext < lexer->end && yychar == YYEMPTY) {
    Py_UNICODE *best_end = NULL;
    int yyaccept = 0;
    int i;
    Py_UCS4 **patterns = (Py_UCS4 **)lexer_patterns[lexer->state];
    const int *actions = lexer_actions[lexer->state];

    REGEX_TRACE("Using patterns from lexer state %d\n", lexer->state);
    for (i = 0; patterns[i]; i++) {
      int matched;
      /* reset position each time through */
      lexer->position = yytext;

      REGEX_TRACE("--- pattern %d...\n", i);
      matched = lexer_match(self, lexer, patterns[i]);
      
      if (matched > 0 && lexer->position > best_end) {
        /* successful match which is longer than the current best matched */
        best_end = lexer->position;
        yyaccept = i;
      } else if (matched < 0) {
        /* internal error */
	REGEX_TRACE("--- pattern %d internal error\n", i);
        PyErr_SetString(PyExc_RuntimeError,
                        "internal error in regular expression engine");
        return -1;
      }
      REGEX_TRACE("--- pattern %d %s\n", i, matched ? "success" : "failed");
    }
      
    if (best_end == NULL) {
      /* no matches */
      lexer->position = yytext;
      lexer_error(lexer);
      return -1;
    }

    lexer->position = best_end;
    yylen = best_end - yytext;

    /* get the action block for this match */
    switch (actions[yyaccept]) {
$$lexer
    }
  }

  if (yychar == YYEMPTY) {
    /* Reached end of input */
    yychar = YYEOF;
  }

  return yychar;
}

/** Type Object *******************************************************/

static int parser_traverse(parserobject *self, visitproc visit, void *arg)
{
  int rv;

  if (self->dict) {
    rv = visit(self->dict, arg);
    if (rv != 0) return rv;
  }
  return 0;
}

static int parser_clear(parserobject *self)
{
  PyObject *tmp;

  if (self->dict) {
    tmp = self->dict;
    self->dict = NULL;
    Py_DECREF(tmp);
  }
  return 0;
}

static void parser_dealloc(parserobject *self)
{
  parser_clear(self);
  self->ob_type->tp_free((PyObject *) self);
}

static int parser_init(parserobject *self, PyObject *args, PyObject *kwds)
{
  PyObject *debug=NULL;
  static char *kwlist[] = { "debug", NULL };

  if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:" PARSER_NAME, kwlist,
                                   &debug))
    return -1;

  if (debug) {
    self->verbose = PyObject_IsTrue(debug);
  }

  return 0;
}

static PyObject *parser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
  parserobject *self;

  self = (parserobject *) type->tp_alloc(type, 0);
  if (self != NULL) {
    self->dict = PyDict_New();
    if (self->dict == NULL) {
      Py_DECREF(self);
      return NULL;
    }
    self->verbose = 0;
  }
  return (PyObject *) self;
}

static PyMethodDef parser_methods[] = {
  { "parse", (PyCFunction) parser_parse, METH_VARARGS, parser_parse_doc },
  { NULL, NULL }
};

static PyMemberDef parser_members[] = {
  { "debug", T_INT, offsetof(parserobject, verbose) },
  { NULL }
};

static char parser_doc[] = PARSER_NAME "\
([debug]) -> parser\n\
Create a new parser object.\n\
\n\
The optional debug argument, when true, enables the builtin trace facility.\n\
The trace facility uses stderr to display each step taken by the parser.";

static PyTypeObject Parser_Type = {
  /* PyObject_HEAD     */ PyObject_HEAD_INIT(NULL)
  /* ob_size           */ 0,
  /* tp_name           */ PROJECT_NAME "." PARSER_NAME,
  /* tp_basicsize      */ sizeof(parserobject),
  /* tp_itemsize       */ 0,
  /* tp_dealloc        */ (destructor) parser_dealloc,
  /* tp_print          */ (printfunc) 0,
  /* tp_getattr        */ (getattrfunc) 0,
  /* tp_setattr        */ (setattrfunc) 0,
  /* tp_compare        */ (cmpfunc) 0,
  /* tp_repr           */ (reprfunc) 0,
  /* tp_as_number      */ (PyNumberMethods *) 0,
  /* tp_as_sequence    */ (PySequenceMethods *) 0,
  /* tp_as_mapping     */ (PyMappingMethods *) 0,
  /* tp_hash           */ (hashfunc) 0,
  /* tp_call           */ (ternaryfunc) 0,
  /* tp_str            */ (reprfunc) 0,
  /* tp_getattro       */ (getattrofunc) 0,
  /* tp_setattro       */ (setattrofunc) 0,
  /* tp_as_buffer      */ (PyBufferProcs *) 0,
  /* tp_flags          */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
  /* tp_doc            */ (char *) parser_doc,
  /* tp_traverse       */ (traverseproc) parser_traverse,
  /* tp_clear          */ (inquiry) parser_clear,
  /* tp_richcompare    */ (richcmpfunc) 0,
  /* tp_weaklistoffset */ 0,
  /* tp_iter           */ (getiterfunc) 0,
  /* tp_iternext       */ (iternextfunc) 0,
  /* tp_methods        */ (PyMethodDef *) parser_methods,
  /* tp_members        */ (PyMemberDef *) parser_members,
  /* tp_getset         */ (PyGetSetDef *) 0,
  /* tp_base           */ (PyTypeObject *) 0,
  /* tp_dict           */ (PyObject *) 0,
  /* tp_descr_get      */ (descrgetfunc) 0,
  /* tp_descr_set      */ (descrsetfunc) 0,
  /* tp_dictoffset     */ offsetof(parserobject, dict),
  /* tp_init           */ (initproc) parser_init,
  /* tp_alloc          */ (allocfunc) 0,
  /* tp_new            */ (newfunc) parser_new,
  /* tp_free           */ 0,
};

/* Helper functions */

/* caller is responsible for releasing the memory */
static char *unicode_escape(Py_UNICODE *s, int len)
{
  static const char *hexdigit = "0123456789ABCDEF";
  char *repr, *p;
  int i, size;

  /* Do one pass to get the repr'ed size */
  size = 1;  /* zero terminator */
  for (i = 0; i < len; i++) {
#ifdef Py_UNICODE_WIDE
    if (s[i] >= 65536) size += 10;  /* \UHHHHHHHH */
    else
#endif
    if (s[i] >= 256) size += 6;  /* \uHHHH */
    else if (s[i] == 9 || s[i] == 10 || s[i] == 13) size += 2;  /* \t \n \r */
    else if (s[i] < 32 || s[i] >= 128) size += 4;  /* \xHH */
    else size++; /* printable US-ASCII */
  }
  
  repr = p = PyMem_New(char, size + 1);
  if (repr == NULL) return NULL;

  while (len-- > 0) {
    Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
    /* Map 32-bit characters to '\Uxxxxxxxx' */
    if (ch >= 65536) {
      *p++ = '\\';
      *p++ = 'U';
      *p++ = hexdigit[(ch >> 28) & 0xf];
      *p++ = hexdigit[(ch >> 24) & 0xf];
      *p++ = hexdigit[(ch >> 20) & 0xf];
      *p++ = hexdigit[(ch >> 16) & 0xf];
      *p++ = hexdigit[(ch >> 12) & 0xf];
      *p++ = hexdigit[(ch >> 8) & 0xf];
      *p++ = hexdigit[(ch >> 4) & 0xf];
      *p++ = hexdigit[ch & 15];
    }
    /* Map 16-bit characters to '\uxxxx' */
    else 
#endif
    if (ch >= 256) {
      *p++ = '\\';
      *p++ = 'u';
      *p++ = hexdigit[(ch >> 12) & 0xf];
      *p++ = hexdigit[(ch >> 8) & 0xf];
      *p++ = hexdigit[(ch >> 4) & 0xf];
      *p++ = hexdigit[ch & 15];
    }
    /* Map special whitespace to '\t', \n', '\r' */
    else if (ch == 9) {
      *p++ = '\\';
      *p++ = 't';
    }
    else if (ch == 10) {
      *p++ = '\\';
      *p++ = 'n';
    }
    else if (ch == 13) {
      *p++ = '\\';
      *p++ = 'r';
    }
    /* Map non-printable US ASCII to '\xhh' */
    else if (ch < 32 || ch >= 128) {
      *p++ = '\\';
      *p++ = 'x';
      *p++ = hexdigit[(ch >> 4) & 0xf];
      *p++ = hexdigit[ch & 15];
    }
    /* Copy everything else as-is */
    else
      *p++ = (char) ch;
  }

  *p = '\0';

  return repr;
}

static void calculate_position(lexerobject *lexer, int *line, int *column) {
  /* Determine line and column numbers */
  Py_UNICODE *p;

  *line = 1;
  *column = 1;

  for (p = PyUnicode_AS_UNICODE(lexer->text); p < lexer->end; p++) {
    if ((char)*p == '\n') {
      *line += 1;
      *column = 1;
    } else {
      *column += 1;
    }
  }
}

static const char error_format_str[] = "parse error at line %d, column %d: matched '%s'";
static const char error_format_eof_str[] = "parse error at line %d, column %d: reached end-of-input";

static PyObject *report_error(int state, PyObject* lval, lexerobject *lexer) {
  int line, column;
  int ruleno = action_idx[state];
  char *matched = NULL;

  if (lval) {
    matched = unicode_escape(PyUnicode_AS_UNICODE(lval), 
                             PyUnicode_GET_SIZE(lval));
    if (matched == NULL) return NULL;
  }

  calculate_position(lexer, &line, &column);
  Py_DECREF(lexer->text);

  if (ruleno > YYFLAG && ruleno < YYLAST) {
    /* There are expected tokens */
    int x, count;
    int size = 60;  /* Initial format string */
    char *msg;

    /* Start X at -yyn if nec to avoid negative indexes in yycheck.  */
    for (x = (ruleno < 0 ? -ruleno : 0);
         x < (sizeof(token_names) / sizeof(char *)); x++) {
      if (yycheck[x + ruleno] == x) {
        size += strlen(token_names[x]) + 15;
      }
    }

    msg = PyMem_New(char, size);
    if (msg == NULL) {
      PyMem_Del(matched);
      return NULL;
    }

    if (lval) {
      strcpy(msg, error_format_str);
    } else {
      strcpy(msg, error_format_eof_str);
    }

    count = 0;
    for (x = (ruleno < 0 ? -ruleno : 0);
         x < (sizeof(token_names) / sizeof(char *)); x++) {
      if (yycheck[x + ruleno] == x) {
        strcat(msg, count == 0 ? ", expecting '" : " or '");
        strcat(msg, token_names[x]);
        strcat(msg, "'");
        count++;
      }
    }
    if (matched) {
      PyErr_Format(PyExc_SyntaxError, msg, line, column, matched);
    } else {
      PyErr_Format(PyExc_SyntaxError, msg, line, column);
    }
    PyMem_Del(msg);
  } else {
    if (matched) {
      PyErr_Format(PyExc_SyntaxError, error_format_str, line, column, matched);
    } else {
      PyErr_Format(PyExc_SyntaxError, error_format_eof_str, line, column);
    }
  }

  if (matched) {
    PyMem_Del(matched);
  }

  return NULL;
}

static const char lexer_error_str[] = "lexical error at line %d, column %d: no action found for '%s'";

static void lexer_error(lexerobject *lexer)
{
  int line, column;
  char *repr = unicode_escape(lexer->position, (lexer->end - lexer->position));
  if (repr == NULL) return;

  calculate_position(lexer, &line, &column);

  PyErr_Format(PyExc_SyntaxError, lexer_error_str, line, column, repr);

  PyMem_Del(repr);
  return;
}

static void print_reduce(int ruleno) {
  int count;
  const int *token;

  PySys_WriteStderr("Reducing via rule %d (%s), ", ruleno, rule_info[ruleno]);
  /* print the symbols being reduced and their result. */

  count = ruleno;
  token = rhs_tokens;
  while (--count) while (*++token);

  while (*++token) {
    PySys_WriteStderr("%s ", token_names[*token]);
  }
  PySys_WriteStderr("-> %s\n", token_names[derives[ruleno]]);
}

static void print_state_stack(int *stack, int *end) {
  int *curr = stack;

  PySys_WriteStderr("state stack now");
  while (curr <= end) {
    PySys_WriteStderr(" %d", *curr++);
  }
  PySys_WriteStderr("\n");
}

static PyObject *import_from(char *modulename, char *fromname) {
  PyObject *fromlist, *name, *module;

  fromlist = PyTuple_New(1);
  if (fromlist == NULL) return NULL;

  name = PyString_FromString(fromname);
  if (name == NULL) {
    Py_DECREF(fromlist);
    return NULL;
  }
  Py_INCREF(name);
  PyTuple_SET_ITEM(fromlist, 0, name);

  module = PyImport_ImportModuleEx(modulename, NULL, NULL, fromlist);
  Py_DECREF(fromlist);
  if (module == NULL) {
    Py_DECREF(name);
    return NULL;
  }

  fromlist = PyObject_GetAttr(module, name);
  Py_DECREF(module);
  Py_DECREF(name);
  return fromlist;
}

static PyMethodDef module_methods[] = {
  { NULL }
};