--- /dev/null
+/* This program is free software: you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License
+ and a copy of the GCC Runtime Library Exception along with this
+ program; see the files COPYING and COPYING.RUNTIME respectively.
+ If not, see <http://www.gnu.org/licenses/>. */
+
+/* This file, originally provided under the MIT license, is relicensed
+ here under the GPL3 for distribution with InterAspect. The
+ original license is provided at the end of the file but does not
+ apply to any changes to this file or to any other part of the
+ InterAspect distribution. */
+
+/*
+ * Regular expression implementation.
+ * Supports only ( | ) * + ?. No escapes.
+ * Compiles to NFA and then simulates NFA
+ * using Thompson's algorithm.
+ *
+ * See also http://swtch.com/~rsc/regexp/ and
+ * Thompson, Ken. Regular Expression Search Algorithm,
+ * Communications of the ACM 11(6) (June 1968), pp. 419-422.
+ *
+ * Copyright (c) 2007 Russ Cox.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Convert infix regexp re to postfix notation.
+ * Insert . as explicit concatenation operator.
+ * Cheesy parser, return static buffer.
+ */
+char*
+re2post(char *re)
+{
+ int nalt, natom;
+ static char buf[8000];
+ char *dst;
+ struct {
+ int nalt;
+ int natom;
+ } paren[100], *p;
+
+ p = paren;
+ dst = buf;
+ nalt = 0;
+ natom = 0;
+ if(strlen(re) >= sizeof buf/2)
+ return NULL;
+ for(; *re; re++){
+ switch(*re){
+ case '(':
+ if(natom > 1){
+ --natom;
+ *dst++ = '.';
+ }
+ if(p >= paren+100)
+ return NULL;
+ p->nalt = nalt;
+ p->natom = natom;
+ p++;
+ nalt = 0;
+ natom = 0;
+ break;
+ case '|':
+ if(natom == 0)
+ return NULL;
+ while(--natom > 0)
+ *dst++ = '.';
+ nalt++;
+ break;
+ case ')':
+ if(p == paren)
+ return NULL;
+ if(natom == 0)
+ return NULL;
+ while(--natom > 0)
+ *dst++ = '.';
+ for(; nalt > 0; nalt--)
+ *dst++ = '|';
+ --p;
+ nalt = p->nalt;
+ natom = p->natom;
+ natom++;
+ break;
+ case '*':
+ case '+':
+ case '?':
+ if(natom == 0)
+ return NULL;
+ *dst++ = *re;
+ break;
+ default:
+ if(natom > 1){
+ --natom;
+ *dst++ = '.';
+ }
+ *dst++ = *re;
+ natom++;
+ break;
+ }
+ }
+ if(p != paren)
+ return NULL;
+ while(--natom > 0)
+ *dst++ = '.';
+ for(; nalt > 0; nalt--)
+ *dst++ = '|';
+ *dst = 0;
+ return buf;
+}
+
+/*
+ * Represents an NFA state plus zero or one or two arrows exiting.
+ * if c == Match, no arrows out; matching state.
+ * If c == Split, unlabeled arrows to out and out1 (if != NULL).
+ * If c < 256, labeled arrow with character c to out.
+ */
+enum
+{
+ Match = 256,
+ Split = 257
+};
+typedef struct State State;
+struct State
+{
+ int c;
+ State *out;
+ State *out1;
+ int lastlist;
+};
+State matchstate = { Match }; /* matching state */
+int nstate;
+
+/* Allocate and initialize State */
+State*
+state(int c, State *out, State *out1)
+{
+ State *s;
+
+ nstate++;
+ s = malloc(sizeof *s);
+ s->lastlist = 0;
+ s->c = c;
+ s->out = out;
+ s->out1 = out1;
+ return s;
+}
+
+/*
+ * A partially built NFA without the matching state filled in.
+ * Frag.start points at the start state.
+ * Frag.out is a list of places that need to be set to the
+ * next state for this fragment.
+ */
+typedef struct Frag Frag;
+typedef union Ptrlist Ptrlist;
+struct Frag
+{
+ State *start;
+ Ptrlist *out;
+};
+
+/* Initialize Frag struct. */
+Frag
+frag(State *start, Ptrlist *out)
+{
+ Frag n = { start, out };
+ return n;
+}
+
+/*
+ * Since the out pointers in the list are always
+ * uninitialized, we use the pointers themselves
+ * as storage for the Ptrlists.
+ */
+union Ptrlist
+{
+ Ptrlist *next;
+ State *s;
+};
+
+/* Create singleton list containing just outp. */
+Ptrlist*
+list1(State **outp)
+{
+ Ptrlist *l;
+
+ l = (Ptrlist*)outp;
+ l->next = NULL;
+ return l;
+}
+
+/* Patch the list of states at out to point to start. */
+void
+patch(Ptrlist *l, State *s)
+{
+ Ptrlist *next;
+
+ for(; l; l=next){
+ next = l->next;
+ l->s = s;
+ }
+}
+
+/* Join the two lists l1 and l2, returning the combination. */
+Ptrlist*
+append(Ptrlist *l1, Ptrlist *l2)
+{
+ Ptrlist *oldl1;
+
+ oldl1 = l1;
+ while(l1->next)
+ l1 = l1->next;
+ l1->next = l2;
+ return oldl1;
+}
+
+/*
+ * Convert postfix regular expression to NFA.
+ * Return start state.
+ */
+State*
+post2nfa(char *postfix)
+{
+ char *p;
+ Frag stack[1000], *stackp, e1, e2, e;
+ State *s;
+
+ // fprintf(stderr, "postfix: %s\n", postfix);
+
+ if(postfix == NULL)
+ return NULL;
+
+ #define push(s) *stackp++ = s
+ #define pop() *--stackp
+
+ stackp = stack;
+ for(p=postfix; *p; p++){
+ switch(*p){
+ default:
+ s = state(*p, NULL, NULL);
+ push(frag(s, list1(&s->out)));
+ break;
+ case '.': /* catenate */
+ e2 = pop();
+ e1 = pop();
+ patch(e1.out, e2.start);
+ push(frag(e1.start, e2.out));
+ break;
+ case '|': /* alternate */
+ e2 = pop();
+ e1 = pop();
+ s = state(Split, e1.start, e2.start);
+ push(frag(s, append(e1.out, e2.out)));
+ break;
+ case '?': /* zero or one */
+ e = pop();
+ s = state(Split, e.start, NULL);
+ push(frag(s, append(e.out, list1(&s->out1))));
+ break;
+ case '*': /* zero or more */
+ e = pop();
+ s = state(Split, e.start, NULL);
+ patch(e.out, s);
+ push(frag(s, list1(&s->out1)));
+ break;
+ case '+': /* one or more */
+ e = pop();
+ s = state(Split, e.start, NULL);
+ patch(e.out, s);
+ push(frag(e.start, list1(&s->out1)));
+ break;
+ }
+ }
+
+ e = pop();
+ if(stackp != stack)
+ return NULL;
+
+ patch(e.out, &matchstate);
+ return e.start;
+#undef pop
+#undef push
+}
+
+typedef struct List List;
+struct List
+{
+ State **s;
+ int n;
+};
+List l1, l2;
+static int listid;
+
+void addstate(List*, State*);
+void step(List*, int, List*);
+
+/* Compute initial state list */
+List*
+startlist(State *start, List *l)
+{
+ l->n = 0;
+ listid++;
+ addstate(l, start);
+ return l;
+}
+
+/* Check whether state list contains a match. */
+int
+ismatch(List *l)
+{
+ int i;
+
+ for(i=0; i<l->n; i++)
+ if(l->s[i] == &matchstate)
+ return 1;
+ return 0;
+}
+
+/* Add s to l, following unlabeled arrows. */
+void
+addstate(List *l, State *s)
+{
+ if(s == NULL || s->lastlist == listid)
+ return;
+ s->lastlist = listid;
+ if(s->c == Split){
+ /* follow unlabeled arrows */
+ addstate(l, s->out);
+ addstate(l, s->out1);
+ return;
+ }
+ l->s[l->n++] = s;
+}
+
+/*
+ * Step the NFA from the states in clist
+ * past the character c,
+ * to create next NFA state set nlist.
+ */
+void
+step(List *clist, int c, List *nlist)
+{
+ int i;
+ State *s;
+
+ listid++;
+ nlist->n = 0;
+ for(i=0; i<clist->n; i++){
+ s = clist->s[i];
+ if(s->c == c)
+ addstate(nlist, s->out);
+ }
+}
+
+/* Run NFA to determine whether it matches s. */
+int
+match(State *start, char *s)
+{
+ int i, c;
+ List *clist, *nlist, *t;
+
+ clist = startlist(start, &l1);
+ nlist = &l2;
+ for(; *s; s++){
+ c = *s & 0xFF;
+ step(clist, c, nlist);
+ t = clist; clist = nlist; nlist = t; /* swap clist, nlist */
+ }
+ return ismatch(clist);
+}
+
+int
+main(int argc, char **argv)
+{
+ int i;
+ char *post;
+ State *start;
+
+ if(argc < 3){
+ fprintf(stderr, "usage: nfa regexp string...\n");
+ return 1;
+ }
+
+ post = re2post(argv[1]);
+ if(post == NULL){
+ fprintf(stderr, "bad regexp %s\n", argv[1]);
+ return 1;
+ }
+
+ start = post2nfa(post);
+ if(start == NULL){
+ fprintf(stderr, "error in post2nfa %s\n", post);
+ return 1;
+ }
+
+ l1.s = malloc(nstate*sizeof l1.s[0]);
+ l2.s = malloc(nstate*sizeof l2.s[0]);
+ for(i=2; i<argc; i++)
+ if(match(start, argv[i]))
+ printf("%s\n", argv[i]);
+ return 0;
+}
+
+/* The original license for this file is provided here but does not
+ apply to any changes to the file or to any other part of the
+ InterAspect distribution. */
+
+/*
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the
+ * Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+ * sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall
+ * be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ * KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+ * OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */