diff options
author | Jef <jef@targetspot.com> | 2024-09-24 08:54:57 -0400 |
---|---|---|
committer | Jef <jef@targetspot.com> | 2024-09-24 08:54:57 -0400 |
commit | 20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch) | |
tree | 12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/nu/regexp.cpp | |
parent | 537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff) | |
download | winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz |
Initial community commit
Diffstat (limited to 'Src/nu/regexp.cpp')
-rw-r--r-- | Src/nu/regexp.cpp | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/Src/nu/regexp.cpp b/Src/nu/regexp.cpp new file mode 100644 index 00000000..6908a49a --- /dev/null +++ b/Src/nu/regexp.cpp @@ -0,0 +1,233 @@ +#include "regexp.h" +// TODO: make a little more multi-byte safe + + + +// regexp match functions + +// A match means the entire string TEXT is used up in matching. +// In the pattern string: +// `*' matches any sequence of characters (zero or more) +// `?' matches any character +// [SET] matches any character in the specified set, +// [!SET] or [^SET] matches any character not in the specified set. + +// A set is composed of characters or ranges; a range looks like +// character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the +// minimal set of characters allowed in the [..] pattern construct. +// Other characters are allowed (ie. 8 bit characters) if your system +// will support them. + +// To suppress the special syntactic significance of any of `[]*?!^-\', +// and match the character exactly, precede it with a `\'. + +enum { + MATCH_VALID = 1, /* valid match */ + MATCH_END, /* premature end of pattern string */ + MATCH_ABORT, /* premature end of text string */ + MATCH_RANGE, /* match failure on [..] construct */ + MATCH_LITERAL, /* match failure on literal match */ + MATCH_PATTERN, /* bad pattern */ +}; + +enum { + PATTERN_VALID = 0, /* valid pattern */ + PATTERN_ESC = -1, /* literal escape at end of pattern */ + PATTERN_RANGE = -2, /* malformed range in [..] construct */ + PATTERN_CLOSE = -3, /* no end bracket in [..] construct */ + PATTERN_EMPTY = -4, /* [..] contstruct is empty */ +}; + +int Matche(const regchar_t *p, const regchar_t *t); + +// TODO: make this multi-byte aware +int matche_after_star(const regchar_t *p, const regchar_t *t) +{ + register int match = 0; + register regchar_t nextp; + /* pass over existing ? and * in pattern */ + while ( *p == '?' || *p == '*' ) + { + /* take one char for each ? and + */ + if (*p == '?') + { + /* if end of text then no match */ + if (!*t++) return MATCH_ABORT; + } + /* move to next char in pattern */ + p++; + } + /* if end of pattern we have matched regardless of text left */ + if (!*p) return MATCH_VALID; + /* get the next character to match which must be a literal or '[' */ + nextp = *p; + if (nextp == '\\') + { + nextp = p[1]; + /* if end of text then we have a bad pattern */ + if (!nextp) return MATCH_PATTERN; + } + /* Continue until we run out of text or definite result seen */ + do + { + /* a precondition for matching is that the next character + in the pattern match the next character in the text or that + the next pattern char is the beginning of a range. Increment + text pointer as we go here */ + if (nextp == *t || nextp == '[') match = Matche(p, t); + /* if the end of text is reached then no match */ + if (!*t++) match = MATCH_ABORT; + } + while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN); + /* return result */ + return match; +} + + +int Matche(const regchar_t *p, const regchar_t *t) +{ + regchar_t range_start, range_end; /* start and end in range */ + + bool invert; /* is this [..] or [!..] */ + bool member_match; /* have I matched the [..] construct? */ + bool loop; /* should I terminate? */ + + for ( ; *p; p++, t++) + { + /* if this is the end of the text then this is the end of the match */ + if (!*t) + { + return (*p == '*' && *++p == '\0') ? MATCH_VALID : MATCH_ABORT; + } + /* determine and react to pattern type */ + switch (*p) + { + case '?': /* single any character match */ + break; + case '*': /* multiple any character match */ + return matche_after_star (p, t); + + /* [..] construct, single member/exclusion character match */ + case '[': + { + /* move to beginning of range */ + p++; + /* check if this is a member match or exclusion match */ + invert = false; + if (*p == '!' || *p == '^') + { + invert = true; + p++; + } + /* if closing bracket here or at range start then we have a malformed pattern */ + if (*p == ']') + return MATCH_PATTERN; + + member_match = false; + loop = true; + while (loop) + { + /* if end of construct then loop is done */ + if (*p == ']') + { + loop = false; + continue; + } + /* matching a '!', '^', '-', '\' or a ']' */ + if (*p == '\\') + range_start = range_end = *++p; + else + range_start = range_end = *p; + /* if end of pattern then bad pattern (Missing ']') */ + if (!*p) + return MATCH_PATTERN; + /* check for range bar */ + if (*++p == '-') + { + /* get the range end */ + range_end = *++p; + /* if end of pattern or construct then bad pattern */ + if (range_end == '\0' || range_end == ']') return MATCH_PATTERN; + /* special character range end */ + if (range_end == '\\') + { + range_end = *++p; + /* if end of text then we have a bad pattern */ + if (!range_end) return MATCH_PATTERN; + } + /* move just beyond this range */ + p++; + } + /* if the text character is in range then match found. + make sure the range letters have the proper + relationship to one another before comparison */ + if (range_start < range_end) + { + if (*t >= range_start && *t <= range_end) + { + member_match = true; + loop = false; + } + } + else + { + if (*t >= range_end && *t <= range_start) + { + member_match = true; + loop = false; + } + } + } + /* if there was a match in an exclusion set then no match */ + /* if there was no match in a member set then no match */ + if ((invert && member_match) || !(invert || member_match)) + return MATCH_RANGE; + /* if this is not an exclusion then skip the rest of the [...] construct that already matched. */ + if (member_match) + { + while (p && *p != ']') + { + /* bad pattern (Missing ']') */ + if (!*p) + return MATCH_PATTERN; + /* skip exact match */ + if (*p == '\\') + { + p++; + /* if end of text then we have a bad pattern */ + if (!*p) + return MATCH_PATTERN; + } + /* move to next pattern char */ + p++; + } + } + break; + } + case '\\': /* next character is quoted and must match exactly */ + /* move pattern pointer to quoted char and fall through */ + p++; + /* if end of text then we have a bad pattern */ + if (!*p) + return MATCH_PATTERN; + /* must match this character exactly */ + default: + if (*p != *t) + return MATCH_LITERAL; + } + } + /* if end of text not reached then the pattern fails */ + if (*t) + return MATCH_END; + else return MATCH_VALID; +} + +bool Match(const regchar_t *match, const regchar_t *string) +{ + if (!match) + return true; + int error_type; + + error_type = Matche(match, string); + return (error_type == MATCH_VALID); +} |