aboutsummaryrefslogtreecommitdiff
path: root/pkg/utilities/nttools/tedit/substitute.x
blob: 963d2786138954e3397f62c20f07e00a9b5d6b98 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
.help substitute
.nf____________________________________________________________________________

This procedure searches for and replaces text patterns in a string.
The text patterns are passed to the procedure as arguments, so this
procedure can be used to perform a variety of text processing tasks.
The procedure has four arguments: a target pattern string (from), a
replacement pattern string (to), the string to be modified (str), and
a maximum length for this string (maxch). The syntax for the target
and replacement pattern strings largely follows that used in the
substitute command by the Unix text editors `ed' and `ex'. The pattern
consists of a sequence of ordinary characters, which match themselves,
and metacharacters, which match a set of characters. A metacharacter
can be matched as if it were an ordinary character by preceding it
with the escape character, `\'. For example, the escape character
itself is indicated in a pattern by `\\'. The metacharacters which can
be used in the target pattern are:

	beginning of string	^	end of string		$
	white space		#	escape character	\
	ignore case		{	end ignore case		}
	begin character class	[	end character class	]
	not, in char class	^	range, in char class	-
	one character		?	zero or more occurences *
	begin tagged string	\(	end tagged string	\)


A set of characters is indicated in the target string by the character
class construct. For example, punctuation could be indicated by
`[,;.!]'.  A range of characters contiguous in the underlying
character set can be abbreviated by the range construct. For example,
`[a-z]' matches any lower case character. The complement of a
character set is indicated by making `^' the first character in a
class. For example, `[^0-9]' matches any non-digit. Repetition of a
character or character class is indicated by the following it with the
`*' metacharacter. Thus, zero or more occurences of a lower case
character is indicated by `[a-z]*'. The tagged string metacharacters
have no effect on the match, they only serve to identify portions of
the matched string for the replacement pattern. The metacharacters
which are used in the replacement pattern are the following:

	entire string		&	tagged string		\n
	capitalize		\u	upper case		\U
	lower case		\L	end case conversion	\e \E

The ditto metacharacter, `&', indicates that the entire portion of the
string that was matched by the target pattern. The tag metacharacter
indicates that the n-th tagged string.  For example, `\1' indicates
the first tagged string and `\2' the second. The remaining
metacharacters affect the case of the output string. The
capitalization metacharacter only affects the immediately following
metacharacter, but the upper and lower case metacharacters must be
turned off explicitly with `\e' or `\E'. The following are a few
examples of the results that can be obtained with this subroutine:

	from			to			action
	----			--	       		------
	IRAF			SDAS			convert all mentions
							of IRAF to SDAS
	[a-z][A-Za-z]*		\u&			capitalize all words
	"\([^"]*\)"		'\1'			convert double quoted
							strings to single
							quoted strings
	\([^,]*\),\(?*\)	\2,\1			reverse two fields
							separated by commas

.endhelp_______________________________________________________________________

include <ctype.h>

define	DITTO		-1	# substitute matched expression
define	TAG		-2	# substitute tagged part of matched expression
define	CAP		-3	# capitalize next char
define	UCASE		-4	# convert to upper case
define	LCASE		-5	# convert to lower case
define	ENDCASE		-6	# end case conversion

define	CH_ESCAPE	'\\'
define	CH_DITTO	'&'
define	CH_LTAG		'('
define	CH_RTAG		')'
define	CH_INDEX	'%'

#* HISTORY *
#* B.Simon	08-Dec-87	First code
#* B.Simon	05-Jan-93	Modified for substitute command

# SUBSTITUTE -- Substitute characters in second pattern for first pattern

bool procedure substitute (from, to, str, maxch)

char	from[ARB]	# i: Target pattern
char	to[ARB]		# i: Replacement pattern 
char	str[ARB]	# u: String to be modified
int	maxch		# i: Maximum length of string
#--
bool	match
int	maxpat, ic, jc, nc
pointer	sp, pat, sub, temp

int	pat_amatch()

begin
	# Allocate memory for temporary strings

	maxpat = maxch + SZ_LINE

	call smark (sp)
	call salloc (pat, maxpat, TY_CHAR)
	call salloc (sub, maxpat, TY_CHAR)
	call salloc (temp, maxch, TY_CHAR)

	# Encode target and replacement patterns

	call code_pat (from, Memc[pat], maxpat)
	call code_sub (to, Memc[sub], maxpat)

	# Perform an anchored match at each character of the string.
	# If there is a match, substitute the replacement pattern for
	# the target. Otherwise move the character to the output unchanged

	ic = 1
	jc = 1
	match = false

	while (str[ic] != EOS) {
	    nc = pat_amatch (str, ic, Memc[pat])
	    if (nc > 0) {
		match = true
		call make_sub (Memc[pat], Memc[sub], str, ic, ic+nc-1, 
			       Memc[temp], jc, maxch)

	    } else {
		nc = 1
		if (jc <= maxch) {
		    Memc[temp+jc-1] = str[ic]
		    jc = jc + 1
		}
	    }

	    ic = ic + nc
	}

	# Copy from temporary output string back to the original string

	Memc[temp+jc-1] = EOS
	call strcpy (Memc[temp], str, maxch)

	# Return status indicates if there were any matches

	call sfree (sp)
	return (match)

end

# CODE_PAT -- Encode the target pattern

procedure code_pat (from, pat, maxch)

char	from[ARB]	# i: Target string
char	pat[ARB]	# o: Encoded target pattern
int	maxch		# i: Maximum length of pattern
#--
char	ch
int	ic, jc, nc
pointer	sp, temp

int	patmake()

begin
	# Allocate memory for temporary string

	call smark (sp)
	call salloc (temp, maxch, TY_CHAR)

	# Convert target string to a form acceptable to the IRAF pattern 
	# matcher by converting tagged strings to index characters. Also
	# escape any index characters which might already be in the string.

	ic = 1
	jc = 1
	while (from[ic] != EOS) {
	    if (from[ic] == CH_ESCAPE) {
		if (from[ic+1] == CH_LTAG || from[ic+1] == CH_RTAG) {
		    ch = CH_INDEX
		    ic = ic + 1
		} else {
		    ch = from[ic]
		}

	    } else if (from[ic] == CH_INDEX) {
		if (jc <= maxch) {
		    Memc[temp+jc-1] = CH_ESCAPE
		    jc = jc + 1
		}
		ch = from[ic]

	    } else {
		ch = from[ic]
	    }

	    if (jc <= maxch) {
		Memc[temp+jc-1] = ch
		jc = jc + 1
	    }

	    ic = ic + 1
	}

	# Call the IRAF pattern encoder to encode the converted string

	Memc[temp+jc-1] = EOS
	nc = patmake (Memc[temp], pat, maxch)

	call sfree (sp)
end

# CODE_SUB -- Encode the replacement pattern

procedure code_sub (to, sub, maxch)

char	to[ARB]		# i: Replacement string
char	sub[ARB]	# o: Encoded replacement pattern
int	maxch		# i: Maximum length of encoded pattern
#--
char	ch
int	ic, jc

int	cctoc()

begin
	# Convert special characters in replacement pattern to codes
	# Also convert escape sequences to single characters

	ic = 1
	jc = 1

	while (to[ic] != EOS) {
	    if (to[ic] == CH_DITTO) {
		ch = DITTO

	    } else if (to[ic] == CH_ESCAPE) {
		switch (to[ic+1]) {
		case 'u':
		    ch = CAP
		    ic = ic + 1
		case 'U':
		    ch = UCASE
		    ic = ic + 1
		case 'L':
		    ch = LCASE
		    ic = ic + 1
		case 'e', 'E':
		    ch = ENDCASE
		    ic = ic + 1
		default:
		    if (IS_DIGIT(to[ic+1])) {
			if (jc <= maxch) {
			    sub[jc] = TAG
			    jc = jc + 1
			}
			ch = TO_INTEG(to[ic+1])
			ic = ic + 1

		    } else if (cctoc (to, ic, ch) == 1) {
			ch = to[ic]

		    } else {
			ic = ic - 1
		    }
		}

	    } else {
		ch = to[ic]
	    }

	    if (jc <= maxch) {
		sub[jc] = ch
		jc = jc + 1
	    }

	    ic = ic + 1
	}

	sub[jc] = EOS

end

# COPY_SUB -- Move input characters to the output string

procedure copy_sub (str1, first, last, caseflag, str2, len, maxch)

char	str1[ARB]	# i: Input string
int	first		# i: First character to be moved
int	last		# i: Last character to be moved
int	caseflag	# u: Case conversion flag
char	str2[ARB]	# u: Output string
int	len		# u: Length of output string
int	maxch		# i: Maximum length of output string
#--
char	ch
int	ic

begin                   
	do ic = first, last {
	    switch (caseflag) {
	    case ENDCASE:
		ch = str1[ic]
	    case LCASE:
		ch = str1[ic]
		if (IS_UPPER (ch))
	     	    ch = TO_LOWER (ch)
	    case UCASE,CAP:
		ch = str1[ic]
		if (IS_LOWER (ch))
		    ch = TO_UPPER (ch)
	    default:
		ch = str1[ic]
	    }

	    if (len <= maxch) {
		str2[len] = ch
		len = len + 1
	    }

	    if (caseflag == CAP)
		caseflag = ENDCASE
	}
end

# MAKE_SUB Substitute for the chars matched by the target pattern

procedure make_sub (pat, sub, in, first, last, out, oc, maxch)

char	pat[ARB]	# i: Target pattern
char	sub[ARB]	# i: Replacement pattern
char	in[ARB]		# i: Input string
int	first		# i: First character matched in input string
int	last		# i: Last character matched in input string
char	out[ARB]	# u: Output string
int	oc		# u: Last character in output string
int	maxch		# i: Maximum length of output string
#--
int	caseflag, ic, index, ltag, rtag

int	patindex()

begin
	caseflag = ENDCASE
	for (ic = 1; sub[ic] != EOS; ic = ic + 1) {
	    switch (sub[ic]) {
	    case ENDCASE:
		caseflag = ENDCASE
	    case LCASE:
		caseflag = LCASE
	    case UCASE:
		caseflag = UCASE
	    case CAP:
		caseflag = CAP
	    case TAG:
		ic = ic + 1
		index = (sub[ic] - 1) * 2 + 1
		ltag = patindex (pat, index)
		rtag = patindex (pat, index+1) - 1
		call copy_sub (in, ltag, rtag, caseflag, out, oc, maxch)
	    case DITTO:
		call copy_sub (in, first, last, caseflag, out, oc, maxch)
	    default:
		call copy_sub (sub, ic, ic, caseflag, out, oc, maxch)
	    }
	}
end