Initial community commit

author: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
committer: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
commit: 20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree: 12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/nu/regexp.cpp
parent: 537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download: winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
1 files changed, 233 insertions, 0 deletions
diff --git a/Src/nu/regexp.cpp b/Src/nu/regexp.cpp
new file mode 100644
index 00000000..6908a49a
--- /dev/null
+++ b/Src/nu/regexp.cpp
@@ -0,0 +1,233 @@
+#include "regexp.h"
+// TODO: make a little more multi-byte safe
+
+
+
+// regexp match functions
+
+// A match means the entire string TEXT is used up in matching.
+// In the pattern string:
+//      `*' matches any sequence of characters (zero or more)
+//      `?' matches any character
+//      [SET] matches any character in the specified set,
+//      [!SET] or [^SET] matches any character not in the specified set.
+
+// A set is composed of characters or ranges; a range looks like
+// character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
+// minimal set of characters allowed in the [..] pattern construct.
+// Other characters are allowed (ie. 8 bit characters) if your system
+// will support them.
+
+// To suppress the special syntactic significance of any of `[]*?!^-\',
+// and match the character exactly, precede it with a `\'.
+
+enum {
+    MATCH_VALID = 1,    /* valid match */
+    MATCH_END,        /* premature end of pattern string */
+    MATCH_ABORT,      /* premature end of text string */
+    MATCH_RANGE,      /* match failure on [..] construct */
+    MATCH_LITERAL,    /* match failure on literal match */
+    MATCH_PATTERN,    /* bad pattern */
+};
+
+enum {
+    PATTERN_VALID = 0,    /* valid pattern */
+    PATTERN_ESC = -1,     /* literal escape at end of pattern */
+    PATTERN_RANGE = -2,   /* malformed range in [..] construct */
+    PATTERN_CLOSE = -3,   /* no end bracket in [..] construct */
+    PATTERN_EMPTY = -4,   /* [..] contstruct is empty */
+};
+
+int Matche(const regchar_t *p, const regchar_t *t);
+
+// TODO: make this multi-byte aware
+int matche_after_star(const regchar_t *p, const regchar_t *t)
+{
+	register int match = 0;
+	register regchar_t nextp;
+	/* pass over existing ? and * in pattern */
+	while ( *p == '?' || *p == '*' )
+	{
+		/* take one char for each ? and + */
+		if (*p == '?')
+		{
+			/* if end of text then no match */
+			if (!*t++) return MATCH_ABORT;
+		}
+		/* move to next char in pattern */
+		p++;
+	}
+	/* if end of pattern we have matched regardless of text left */
+	if (!*p) return MATCH_VALID;
+	/* get the next character to match which must be a literal or '[' */
+	nextp = *p;
+	if (nextp == '\\')
+	{
+		nextp = p[1];
+		/* if end of text then we have a bad pattern */
+		if (!nextp) return MATCH_PATTERN;
+	}
+	/* Continue until we run out of text or definite result seen */
+	do
+	{
+		/* a precondition for matching is that the next character
+		   in the pattern match the next character in the text or that
+		   the next pattern char is the beginning of a range.  Increment
+		   text pointer as we go here */
+		if (nextp == *t || nextp == '[') match = Matche(p, t);
+		/* if the end of text is reached then no match */
+		if (!*t++) match = MATCH_ABORT;
+	}
+	while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN);
+	/* return result */
+	return match;
+}
+
+
+int Matche(const regchar_t *p, const regchar_t *t)
+{
+	regchar_t range_start, range_end;  /* start and end in range */
+
+	bool invert;             /* is this [..] or [!..] */
+	bool member_match;       /* have I matched the [..] construct? */
+	bool loop;               /* should I terminate? */
+
+	for ( ; *p; p++, t++)
+	{
+		/* if this is the end of the text then this is the end of the match */
+		if (!*t)
+		{
+			return (*p == '*' && *++p == '\0') ? MATCH_VALID : MATCH_ABORT;
+		}
+		/* determine and react to pattern type */
+		switch (*p)
+		{
+		case '?':  /* single any character match */
+			break;
+		case '*':  /* multiple any character match */
+			return matche_after_star (p, t);
+
+			/* [..] construct, single member/exclusion character match */
+		case '[':
+			{
+				/* move to beginning of range */
+				p++;
+				/* check if this is a member match or exclusion match */
+				invert = false;
+				if (*p == '!' || *p == '^')
+				{
+					invert = true;
+					p++;
+				}
+				/* if closing bracket here or at range start then we have a malformed pattern */
+				if (*p == ']')
+					return MATCH_PATTERN;
+
+				member_match = false;
+				loop = true;
+				while (loop)
+				{
+					/* if end of construct then loop is done */
+					if (*p == ']')
+					{
+						loop = false;
+						continue;
+					}
+					/* matching a '!', '^', '-', '\' or a ']' */
+					if (*p == '\\')
+						range_start = range_end = *++p;
+					else
+						range_start = range_end = *p;
+					/* if end of pattern then bad pattern (Missing ']') */
+					if (!*p)
+						return MATCH_PATTERN;
+					/* check for range bar */
+					if (*++p == '-')
+					{
+						/* get the range end */
+						range_end = *++p;
+						/* if end of pattern or construct then bad pattern */
+						if (range_end == '\0' || range_end == ']') return MATCH_PATTERN;
+						/* special character range end */
+						if (range_end == '\\')
+						{
+							range_end = *++p;
+							/* if end of text then we have a bad pattern */
+							if (!range_end) return MATCH_PATTERN;
+						}
+						/* move just beyond this range */
+						p++;
+					}
+					/* if the text character is in range then match found.
+					   make sure the range letters have the proper
+					   relationship to one another before comparison */
+					if (range_start < range_end)
+					{
+						if (*t >= range_start && *t <= range_end)
+						{
+							member_match = true;
+							loop = false;
+						}
+					}
+					else
+					{
+						if (*t >= range_end && *t <= range_start)
+						{
+							member_match = true;
+							loop = false;
+						}
+					}
+				}
+				/* if there was a match in an exclusion set then no match */
+				/* if there was no match in a member set then no match */
+				if ((invert && member_match) || !(invert || member_match))
+					return MATCH_RANGE;
+				/* if this is not an exclusion then skip the rest of the [...] construct that already matched. */
+				if (member_match)
+				{
+					while (p && *p != ']')
+					{
+						/* bad pattern (Missing ']') */
+						if (!*p)
+							return MATCH_PATTERN;
+						/* skip exact match */
+						if (*p == '\\')
+						{
+							p++;
+							/* if end of text then we have a bad pattern */
+							if (!*p)
+								return MATCH_PATTERN;
+						}
+						/* move to next pattern char */
+						p++;
+					}
+				}
+				break;
+			}
+		case '\\':   /* next character is quoted and must match exactly */
+			/* move pattern pointer to quoted char and fall through */
+			p++;
+			/* if end of text then we have a bad pattern */
+			if (!*p)
+				return MATCH_PATTERN;
+			/* must match this character exactly */
+		default:
+			if (*p != *t)
+				return MATCH_LITERAL;
+		}
+	}
+	/* if end of text not reached then the pattern fails */
+	if (*t)
+		return MATCH_END;
+	else return MATCH_VALID;
+}
+
+bool Match(const regchar_t *match, const regchar_t *string)
+{
+	if (!match)
+		return true;
+	int error_type;
+	
+	error_type = Matche(match, string);
+	return (error_type == MATCH_VALID);
+}
author	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
committer	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
commit	20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree	12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/nu/regexp.cpp
parent	537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download	winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz