aboutsummaryrefslogtreecommitdiff
path: root/sys/fmtio/strmatch.x
blob: ad16bef8bbdc2d72affc841bc37ce5cc6284ca66 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.

include	<ctype.h>
include <pattern.h>

.help strmatch, gstrmatch
.nf ________________________________________________________________________
STRMATCH -- Find the first occurrence of the string A in the string B.
If not found, return zero, else return the index of the first character
following the matched substring.

GSTRMATCH -- More general version of strmatch.  The indices of the
first and last characters matched are returned as arguments.  The function
value is the same as for STRMATCH.

STRMATCH recognizes the metacharacters BOL, EOL, ANY, WHITESPACE, IGNORECASE,
and MATCHCASE (BOL and EOL are special only as the first and last chars
in the pattern).  The null pattern matches any string.  Metacharacters
can be escaped.
.endhelp ___________________________________________________________________


# STRMATCH -- Search string STR for pattern PAT.  Return the index of the
# next character following the matched substring, or 0.

int procedure strmatch (str, pat)

char	pat[ARB], str[ARB]
int	first_char, last_char
int	gstrmatch()

begin
	return (gstrmatch (str, pat, first_char, last_char))
end


# GSTRMATCH -- Generalized string match.  Returns the indices of the first and
# last characters in the matched substring if a match occurs.

int procedure gstrmatch (str, pat, first_char, last_char)

char	pat[ARB], str[ARB]
int	first_char, last_char
bool	ignore_case, bolflag
char	ch, pch
int	i, ip, initial_pp, pp

begin
	ignore_case = false
	bolflag = false
	first_char = 1
	initial_pp = 1

	if (pat[1] == CH_BOL) {			# match at beginning of line?
	    bolflag = true
	    initial_pp = 2
	}
	    
	# Try to match pattern starting at each character offset in string.
	do ip = 1, ARB {
	    if (str[ip] == EOS)
		break
	    i = ip

	    # Compare pattern to string str[ip].
	    for (pp=initial_pp;  pat[pp] != EOS;  pp=pp+1) {
		switch (pat[pp]) {
		case CH_WHITESPACE:
		    while (IS_WHITE (str[i]))
			i = i + 1
		case CH_ANY:
		    if (str[i] != '\n')
			i = i + 1
		case CH_IGNORECASE:
		    ignore_case = true
		case CH_MATCHCASE:
		    ignore_case = false
		
		default:
		    pch = pat[pp]
		    if (pch == CH_ESCAPE && pat[pp+1] != EOS) {
			pp = pp + 1
			pch = pat[pp]
		    } else if (pch == CH_EOL)
		        if (pat[pp+1] == EOS && (str[i]=='\n' || str[i]==EOS)) {
			    first_char = ip
			    last_char = i
			    if (str[i] == EOS)
				last_char = last_char - 1
			    return (last_char + 1)
			}

		    ch = str[i]
		    i = i + 1

		    # Compare ordinary characters.  The comparison is trivial
		    # unless case insensitivity is required.

		    if (ignore_case) {
	                if (IS_UPPER (ch)) {
			    if (IS_UPPER (pch)) {
				if (pch != ch)
				    break
			    } else if (pch != TO_LOWER (ch))
		                    break
	                } else if (IS_LOWER (ch)) {
			    if (IS_LOWER (pch)) {
		                if (pch != ch)
		                    break
			    } else if (pch != TO_UPPER (ch))
				    break
	                } else {
			    if (pch != ch)
				break
			}
		    } else {
			if (pch != ch)
			    break
		    }
	        }
	    }

	    # If the above loop was exited before the end of the pattern
	    # was reached, the pattern did not match.

	    if (pat[pp] == EOS) {
		first_char = ip
		last_char = i-1
		return (i)

	    } else if (bolflag || str[i] == EOS)
		break
	}

	return (0)				# no match
end