aboutsummaryrefslogtreecommitdiff
path: root/Src/nu/regexp.cpp
diff options
context:
space:
mode:
authorJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
committerJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
commit20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/nu/regexp.cpp
parent537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
downloadwinamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
Initial community commit
Diffstat (limited to 'Src/nu/regexp.cpp')
-rw-r--r--Src/nu/regexp.cpp233
1 files changed, 233 insertions, 0 deletions
diff --git a/Src/nu/regexp.cpp b/Src/nu/regexp.cpp
new file mode 100644
index 00000000..6908a49a
--- /dev/null
+++ b/Src/nu/regexp.cpp
@@ -0,0 +1,233 @@
+#include "regexp.h"
+// TODO: make a little more multi-byte safe
+
+
+
+// regexp match functions
+
+// A match means the entire string TEXT is used up in matching.
+// In the pattern string:
+// `*' matches any sequence of characters (zero or more)
+// `?' matches any character
+// [SET] matches any character in the specified set,
+// [!SET] or [^SET] matches any character not in the specified set.
+
+// A set is composed of characters or ranges; a range looks like
+// character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
+// minimal set of characters allowed in the [..] pattern construct.
+// Other characters are allowed (ie. 8 bit characters) if your system
+// will support them.
+
+// To suppress the special syntactic significance of any of `[]*?!^-\',
+// and match the character exactly, precede it with a `\'.
+
+enum {
+ MATCH_VALID = 1, /* valid match */
+ MATCH_END, /* premature end of pattern string */
+ MATCH_ABORT, /* premature end of text string */
+ MATCH_RANGE, /* match failure on [..] construct */
+ MATCH_LITERAL, /* match failure on literal match */
+ MATCH_PATTERN, /* bad pattern */
+};
+
+enum {
+ PATTERN_VALID = 0, /* valid pattern */
+ PATTERN_ESC = -1, /* literal escape at end of pattern */
+ PATTERN_RANGE = -2, /* malformed range in [..] construct */
+ PATTERN_CLOSE = -3, /* no end bracket in [..] construct */
+ PATTERN_EMPTY = -4, /* [..] contstruct is empty */
+};
+
+int Matche(const regchar_t *p, const regchar_t *t);
+
+// TODO: make this multi-byte aware
+int matche_after_star(const regchar_t *p, const regchar_t *t)
+{
+ register int match = 0;
+ register regchar_t nextp;
+ /* pass over existing ? and * in pattern */
+ while ( *p == '?' || *p == '*' )
+ {
+ /* take one char for each ? and + */
+ if (*p == '?')
+ {
+ /* if end of text then no match */
+ if (!*t++) return MATCH_ABORT;
+ }
+ /* move to next char in pattern */
+ p++;
+ }
+ /* if end of pattern we have matched regardless of text left */
+ if (!*p) return MATCH_VALID;
+ /* get the next character to match which must be a literal or '[' */
+ nextp = *p;
+ if (nextp == '\\')
+ {
+ nextp = p[1];
+ /* if end of text then we have a bad pattern */
+ if (!nextp) return MATCH_PATTERN;
+ }
+ /* Continue until we run out of text or definite result seen */
+ do
+ {
+ /* a precondition for matching is that the next character
+ in the pattern match the next character in the text or that
+ the next pattern char is the beginning of a range. Increment
+ text pointer as we go here */
+ if (nextp == *t || nextp == '[') match = Matche(p, t);
+ /* if the end of text is reached then no match */
+ if (!*t++) match = MATCH_ABORT;
+ }
+ while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN);
+ /* return result */
+ return match;
+}
+
+
+int Matche(const regchar_t *p, const regchar_t *t)
+{
+ regchar_t range_start, range_end; /* start and end in range */
+
+ bool invert; /* is this [..] or [!..] */
+ bool member_match; /* have I matched the [..] construct? */
+ bool loop; /* should I terminate? */
+
+ for ( ; *p; p++, t++)
+ {
+ /* if this is the end of the text then this is the end of the match */
+ if (!*t)
+ {
+ return (*p == '*' && *++p == '\0') ? MATCH_VALID : MATCH_ABORT;
+ }
+ /* determine and react to pattern type */
+ switch (*p)
+ {
+ case '?': /* single any character match */
+ break;
+ case '*': /* multiple any character match */
+ return matche_after_star (p, t);
+
+ /* [..] construct, single member/exclusion character match */
+ case '[':
+ {
+ /* move to beginning of range */
+ p++;
+ /* check if this is a member match or exclusion match */
+ invert = false;
+ if (*p == '!' || *p == '^')
+ {
+ invert = true;
+ p++;
+ }
+ /* if closing bracket here or at range start then we have a malformed pattern */
+ if (*p == ']')
+ return MATCH_PATTERN;
+
+ member_match = false;
+ loop = true;
+ while (loop)
+ {
+ /* if end of construct then loop is done */
+ if (*p == ']')
+ {
+ loop = false;
+ continue;
+ }
+ /* matching a '!', '^', '-', '\' or a ']' */
+ if (*p == '\\')
+ range_start = range_end = *++p;
+ else
+ range_start = range_end = *p;
+ /* if end of pattern then bad pattern (Missing ']') */
+ if (!*p)
+ return MATCH_PATTERN;
+ /* check for range bar */
+ if (*++p == '-')
+ {
+ /* get the range end */
+ range_end = *++p;
+ /* if end of pattern or construct then bad pattern */
+ if (range_end == '\0' || range_end == ']') return MATCH_PATTERN;
+ /* special character range end */
+ if (range_end == '\\')
+ {
+ range_end = *++p;
+ /* if end of text then we have a bad pattern */
+ if (!range_end) return MATCH_PATTERN;
+ }
+ /* move just beyond this range */
+ p++;
+ }
+ /* if the text character is in range then match found.
+ make sure the range letters have the proper
+ relationship to one another before comparison */
+ if (range_start < range_end)
+ {
+ if (*t >= range_start && *t <= range_end)
+ {
+ member_match = true;
+ loop = false;
+ }
+ }
+ else
+ {
+ if (*t >= range_end && *t <= range_start)
+ {
+ member_match = true;
+ loop = false;
+ }
+ }
+ }
+ /* if there was a match in an exclusion set then no match */
+ /* if there was no match in a member set then no match */
+ if ((invert && member_match) || !(invert || member_match))
+ return MATCH_RANGE;
+ /* if this is not an exclusion then skip the rest of the [...] construct that already matched. */
+ if (member_match)
+ {
+ while (p && *p != ']')
+ {
+ /* bad pattern (Missing ']') */
+ if (!*p)
+ return MATCH_PATTERN;
+ /* skip exact match */
+ if (*p == '\\')
+ {
+ p++;
+ /* if end of text then we have a bad pattern */
+ if (!*p)
+ return MATCH_PATTERN;
+ }
+ /* move to next pattern char */
+ p++;
+ }
+ }
+ break;
+ }
+ case '\\': /* next character is quoted and must match exactly */
+ /* move pattern pointer to quoted char and fall through */
+ p++;
+ /* if end of text then we have a bad pattern */
+ if (!*p)
+ return MATCH_PATTERN;
+ /* must match this character exactly */
+ default:
+ if (*p != *t)
+ return MATCH_LITERAL;
+ }
+ }
+ /* if end of text not reached then the pattern fails */
+ if (*t)
+ return MATCH_END;
+ else return MATCH_VALID;
+}
+
+bool Match(const regchar_t *match, const regchar_t *string)
+{
+ if (!match)
+ return true;
+ int error_type;
+
+ error_type = Matche(match, string);
+ return (error_type == MATCH_VALID);
+}