static int casecmp(const chr *, const chr *, size_t);
-/* info we need during compilation about a known capturing subexpression */
-struct subinfo
-{
- struct state *left; /* left end of its sub-NFA */
- struct state *right; /* right end of its sub-NFA */
-};
-
/* internal variables, bundled for easy passing around */
struct vars
{
int nexttype; /* type of next token */
chr nextvalue; /* value (if any) of next token */
int lexcon; /* lexical context type (see regc_lex.c) */
- int nsubexp; /* number of known capturing subexpressions */
- struct subinfo *subs; /* info about known capturing subexpressions */
- size_t nsubs; /* allocated length of subs[] vector */
- struct subinfo sub10[10]; /* initial vector, enough for most */
+ int nsubexp; /* subexpression count */
+ struct subre **subs; /* subRE pointer vector */
+ size_t nsubs; /* length of vector */
+ struct subre *sub10[10]; /* initial vector, enough for most */
struct nfa *nfa; /* the NFA */
struct colormap *cm; /* character color map */
color nlcolor; /* color of newline */
v->subs = v->sub10;
v->nsubs = 10;
for (j = 0; j < v->nsubs; j++)
- v->subs[j].left = v->subs[j].right = NULL;
+ v->subs[j] = NULL;
v->nfa = NULL;
v->cm = NULL;
v->nlcolor = COLORLESS;
}
/*
- * moresubs - enlarge capturing-subexpressions vector
+ * moresubs - enlarge subRE vector
*/
static void
moresubs(struct vars *v,
int wanted) /* want enough room for this one */
{
- struct subinfo *p;
+ struct subre **p;
size_t n;
assert(wanted > 0 && (size_t) wanted >= v->nsubs);
if (v->subs == v->sub10)
{
- p = (struct subinfo *) MALLOC(n * sizeof(struct subinfo));
+ p = (struct subre **) MALLOC(n * sizeof(struct subre *));
if (p != NULL)
memcpy(VS(p), VS(v->subs),
- v->nsubs * sizeof(struct subinfo));
+ v->nsubs * sizeof(struct subre *));
}
else
- p = (struct subinfo *) REALLOC(v->subs, n * sizeof(struct subinfo));
+ p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));
if (p == NULL)
{
ERR(REG_ESPACE);
}
v->subs = p;
for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)
- p->left = p->right = NULL;
+ *p = NULL;
assert(v->nsubs == n);
assert((size_t) wanted < v->nsubs);
}
s = newstate(v->nfa);
s2 = newstate(v->nfa);
NOERRN();
+ /* We may not need these arcs, but keep things connected for now */
EMPTYARC(lp, s);
EMPTYARC(s2, rp);
NOERRN();
NOERRN();
if (cap)
{
- /* save the sub-NFA's endpoints for future backrefs to use */
- assert(v->subs[subno].left == NULL);
- v->subs[subno].left = s;
- v->subs[subno].right = s2;
if (atom->capno == 0)
{
/* normal case: just mark the atom as capturing */
t->child = atom;
atom = t;
}
+ assert(v->subs[subno] == NULL);
+ v->subs[subno] = atom;
}
/* postpone everything else pending possible {0} */
break;
case BACKREF: /* the Feature From The Black Lagoon */
INSIST(type != LACON, REG_ESUBREG);
INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
- INSIST(v->subs[v->nextvalue].left != NULL, REG_ESUBREG);
+ INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
NOERRN();
assert(v->nextvalue > 0);
atom = subre(v, 'b', BACKR, lp, rp);
if (atom != NULL)
freesubre(v, atom);
if (atomtype == '(')
- v->subs[subno].left = v->subs[subno].right = NULL;
+ v->subs[subno] = NULL;
delsub(v->nfa, lp, rp);
EMPTYARC(lp, rp);
return top;
NOERRN();
}
+ /*
+ * For what follows, we need the atom to have its own begin/end states
+ * that are distinct from lp/rp, so that we can wrap iteration structure
+ * around it. The parenthesized-atom case above already made suitable
+ * states (and we don't want to modify a capturing subre, since it's
+ * already recorded in v->subs[]). Otherwise, we need more states.
+ */
+ if (atom->begin == lp || atom->end == rp)
+ {
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERRN();
+ moveouts(v->nfa, lp, s);
+ moveins(v->nfa, rp, s2);
+ atom->begin = s;
+ atom->end = s2;
+ }
+ else
+ {
+ /* The atom's OK, but we must temporarily disconnect it from lp/rp */
+ /* (this removes the EMPTY arcs we made above) */
+ delsub(v->nfa, lp, atom->begin);
+ delsub(v->nfa, atom->end, rp);
+ }
+
/*----------
* Prepare a general-purpose state skeleton.
*
* In the no-backrefs case, we want this:
*
- * [lp] ---> [s] ---prefix---> [begin] ---atom---> [end] ---rest---> [rp]
+ * [lp] ---> [s] ---prefix---> ---atom---> ---rest---> [rp]
*
- * where prefix is some repetitions of atom. In the general case we need
+ * where prefix is some repetitions of atom, and "rest" is the remainder
+ * of the branch. In the general case we need:
*
* [lp] ---> [s] ---iterator---> [s2] ---rest---> [rp]
*
- * where the iterator wraps around [begin] ---atom---> [end]
+ * where the iterator wraps around the atom.
*
* We make the s state here for both cases; s2 is made below if needed
*----------
*/
- s = newstate(v->nfa); /* first, new endpoints for the atom */
- s2 = newstate(v->nfa);
- NOERRN();
- moveouts(v->nfa, lp, s);
- moveins(v->nfa, rp, s2);
- NOERRN();
- atom->begin = s;
- atom->end = s2;
s = newstate(v->nfa); /* set up starting state */
NOERRN();
EMPTYARC(lp, s);
{
assert(atom->begin->nouts == 1); /* just the EMPTY */
delsub(v->nfa, atom->begin, atom->end);
- assert(v->subs[subno].left != NULL);
+ assert(v->subs[subno] != NULL);
/*
* And here's why the recursion got postponed: it must wait until the
* skeleton is filled in, because it may hit a backref that wants to
* copy the filled-in skeleton.
*/
- dupnfa(v->nfa, v->subs[subno].left, v->subs[subno].right,
+ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
atom->begin, atom->end);
NOERRN();