19 files changed, 1340 insertions, 0 deletions
diff --git a/unix/as.vax/README b/unix/as.vax/README
new file mode 100644
index 00000000..58b5c87f
--- /dev/null
+++ b/unix/as.vax/README
@@ -0,0 +1,34 @@
+AS - Assembler Sources
+
+This directory contains any (non-kernel) files which it has proven
+desirable to optimize in assembler.  Most of these routines are not
+required for the operation of the system, although considerable gains
+in speed may be possible in some cases.  If the autogeneration routines
+cannot find a particular assembler file, the portable routine will
+automatically be used instead.
+
+The following assembler files are required:
+
+	zsvjmp.s	(libos.a)
+
+The following should normally be optimized in assembler, particularly if the
+machine has special memory move or bitfield instructions:
+
+	aclr.s		clear a block of memory
+	amov.s		move a block of memory
+	bitpak.s	write into a bit field
+	bitupk.s	read from a bit field
+	bytmov.s	a variant on amov.s
+	ishift.s	bit shift, also iand, ior (used by NCAR/graphics)
+
+The following can be omitted without significant penalty:
+
+	aaddks.s
+	aadds.s
+	aluir.s
+	aluis.s
+	amapr.s
+	amaps.s
+	awsur.s
+	awsus.s
+	cyboow.s
diff --git a/unix/as.vax/aaddks.s b/unix/as.vax/aaddks.s
new file mode 100644
index 00000000..8000beb0
--- /dev/null
+++ b/unix/as.vax/aaddks.s
@@ -0,0 +1,40 @@
+	.data	0
+	.set	LWM1,0xfc0
+	.data	2
+	.data	1
+	.data	0
+	.globl	_aaddks_
+	.data	2
+v.2:
+	.space	4
+	.set	v.1,v.2
+
+	.stabs	"aaddks.f",0x64,0,0,0
+	.text
+	.globl	_aaddks_
+	.set	LF1,12
+_aaddks_:
+	.word	LWM1
+	subl2	$LF1,sp
+	jbr	L12
+	.align	1
+L12:
+	moval	v.1,r11
+	movl	*16(ap),-4(fp)
+	subl2	$2,4(ap)
+	movl	*16(ap),-8(fp)
+	subl2	$2,12(ap)
+	movl	*16(ap),-12(fp)
+	movl	v.2-v.1(r11),r10
+	movl	4(ap),r9
+	movl	8(ap),r8
+	movl	12(ap),r7
+	movl	-12(fp),r6
+	movl	$1,r10
+	cmpl	r6,r10
+	jlss	L20
+L21:
+	addw3	(r9)[r10],(r8),(r7)[r10]
+	aobleq	r6,r10,L21
+L20:
+	ret
diff --git a/unix/as.vax/aadds.s b/unix/as.vax/aadds.s
new file mode 100644
index 00000000..42315155
--- /dev/null
+++ b/unix/as.vax/aadds.s
@@ -0,0 +1,42 @@
+	.data	0
+	.set	LWM1,0xfc0
+	.data	2
+	.data	1
+	.data	0
+	.globl	_aadds_
+	.data	2
+v.2:
+	.space	4
+	.set	v.1,v.2
+
+	.stabs	"aadds.f",0x64,0,0,0
+	.text
+	.globl	_aadds_
+	.set	LF1,16
+_aadds_:
+	.word	LWM1
+	subl2	$LF1,sp
+	jbr	L12
+	.align	1
+L12:
+	moval	v.1,r11
+	movl	*16(ap),-4(fp)
+	subl2	$2,4(ap)
+	movl	*16(ap),-8(fp)
+	subl2	$2,8(ap)
+	movl	*16(ap),-12(fp)
+	subl2	$2,12(ap)
+	movl	*16(ap),-16(fp)
+	movl	v.2-v.1(r11),r10
+	movl	4(ap),r9
+	movl	8(ap),r8
+	movl	12(ap),r7
+	movl	-16(fp),r6
+	movl	$1,r10
+	cmpl	r6,r10
+	jlss	L21
+L22:
+	addw3	(r9)[r10],(r8)[r10],(r7)[r10]
+	aobleq	r6,r10,L22
+L21:
+	ret
diff --git a/unix/as.vax/aclr.s b/unix/as.vax/aclr.s
new file mode 100644
index 00000000..5cbb1617
--- /dev/null
+++ b/unix/as.vax/aclr.s
@@ -0,0 +1,64 @@
+# ACLR -- Zero a block of memory.
+
+	.set	MASK,	07400
+	.set	A,	4
+	.set	NPIX,	8
+
+.data
+LZB:
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.quad	0
+	.align	2
+.text
+	.globl	_aclrb_				# aclr_ (a, npix)
+	.globl	_aclrc_
+	.globl	_aclrs_
+	.globl	_aclri_
+	.globl	_aclrl_
+	.globl	_aclrr_
+	.globl	_aclrd_
+	.globl	_aclrx_
+_aclrb_:
+	.word	MASK
+	movl	*NPIX(ap), r11
+	jbr	L10
+_aclrc_:
+_aclrs_:
+	.word	MASK
+	mull3	$2, *NPIX(ap), r11
+	jbr	L10
+_aclri_:
+_aclrl_:
+_aclrr_:
+	.word	MASK
+	mull3	$4, *NPIX(ap), r11
+	jbr	L10
+_aclrd_:
+_aclrx_:
+	.word	MASK
+	mull3	$8, *NPIX(ap), r11
+L10:
+	jleq	L20
+	moval	LZB, r8
+	movl	A(ap), r9
+	ashl	$-6, r11, r10
+	bleq	L12
+
+	# Clear successive 64 byte blocks.
+L11:
+	movc3	$64, (r8), (r9)
+	addl2	$64, r9
+	sobgtr	r10, L11
+L12:
+	# Clear the remaining bytes.
+
+	bicl2	$-64, r11
+	movc3	r11, (r8), (r9)
+L20:
+	ret
diff --git a/unix/as.vax/aluir.s b/unix/as.vax/aluir.s
new file mode 100644
index 00000000..30a37d0d
--- /dev/null
+++ b/unix/as.vax/aluir.s
@@ -0,0 +1,54 @@
+# ALUIR -- Lookup an interpolate a vector of type real onto a real grid.
+# [OBSOLETE - NO LONGER USED.  5/27/87]
+
+	.data	0
+	.align	2
+	.text
+	.globl	_aluir_
+
+	.set	A,	4
+	.set	B,	8
+	.set	X,	12
+	.set	NPIX,	16
+
+	# ALUIR (a, b, x, npix)
+	#
+	# left = int (x[i])
+	# tau = x[i] - left
+	# b[i] = (a[left] * (1-tau)) + (a[left+1] * tau)
+	#
+	# registers:
+	#	r0	max_b
+	#	r1	a
+	#	r2	b
+	#	r3	x
+	#	r4	x[i], tau
+	#	r5	left
+	#	r6
+
+_aluir_:
+	.word	0374			# save r2-r7
+	subl3	$4, A(ap), r1
+	movl	B(ap), r2
+	movl	X(ap), r3
+	mull3	$4, *NPIX(ap), r0
+	addl2	r2, r0
+L1:
+	movf	(r3)+, r4		# get X into r4
+	cvtfl	r4, r5			# r5 = left
+	cvtlf	r5, r6
+	subf2	r6, r4			# r4 = tau = (x[i] - left)
+
+	movf	(r1)[r5], r6
+	mulf3	r4, r6, r7
+	subf2	r7, r6			# r6 = (a[left] * (1-tau))
+
+	incl	r5
+	mulf3	r4, (r1)[r5], r7	# r7 = (a[left+1] * tau)
+
+	addf3	r6, r7, (r2)+		# output result to B
+
+	cmpl	r2, r0
+	blssu	L1
+
+	ret
diff --git a/unix/as.vax/aluis.s b/unix/as.vax/aluis.s
new file mode 100644
index 00000000..0fe54e26
--- /dev/null
+++ b/unix/as.vax/aluis.s
@@ -0,0 +1,56 @@
+# ALUIS -- Lookup an interpolate a vector of type short onto a real grid.
+# [OBSOLETE - NO LONGER USED.  5/27/87]
+
+	.data	0
+	.align	2
+	.text
+	.globl	_aluis_
+
+	.set	A,	4
+	.set	B,	8
+	.set	X,	12
+	.set	NPIX,	16
+
+	# ALUIS (a, b, x, npix)
+	#
+	# left = int (x[i])
+	# tau = x[i] - left
+	# b[i] = (a[left] * (1-tau)) + (a[left+1] * tau)
+	#
+	# registers:
+	#	r0	max_b
+	#	r1	a
+	#	r2	b
+	#	r3	x
+	#	r4	x[i], tau
+	#	r5	left
+	#	r6
+
+_aluis_:
+	.word	0374			# save r2-r7
+	subl3	$2, A(ap), r1
+	movl	B(ap), r2
+	movl	X(ap), r3
+	mull3	$2, *NPIX(ap), r0
+	addl2	r2, r0
+L1:
+	movf	(r3)+, r4		# get X into r4
+	cvtfl	r4, r5			# r5 = left
+	cvtlf	r5, r6
+	subf2	r6, r4			# r4 = tau = (x[i] - left)
+
+	cvtwf	(r1)[r5], r6
+	mulf3	r4, r6, r7
+	subf2	r7, r6			# r6 = (a[left] * (1-tau))
+
+	incl	r5
+	cvtwf	(r1)[r5], r7
+	mulf2	r4, r7			# r7 = (a[left+1] * tau)
+
+	addf2	r6, r7
+	cvtfw	r7, (r2)+		# output result to B
+
+	cmpl	r2, r0
+	blssu	L1
+
+	ret
diff --git a/unix/as.vax/amapr.s b/unix/as.vax/amapr.s
new file mode 100644
index 00000000..5ba41092
--- /dev/null
+++ b/unix/as.vax/amapr.s
@@ -0,0 +1,82 @@
+# AMAPR -- Linear transformation, type real.  The range of pixel values
+# A1 to A2 are mapped into the range B1 to B2 using a linear transformation.
+# Values less than A1 or greater than A2 are mapped into the values B1 and
+# B2 upon output.
+
+	.data	0
+
+	.set	A,	4
+	.set	B,	8
+	.set	NPIX,	12
+	.set	A1,	16
+	.set	A2,	20
+	.set	B1,	24
+	.set	B2,	28
+
+	.align	2
+	.globl	_amapr_
+	.text
+
+	# AMAPR (a, b, npix, a1, a2, b1, b2)
+	#
+	# scalar = real (b2 - b1) / real (a2 - a1)
+	# minout = min (b1, b2)
+	# maxout = max (b1, b2)
+	# 
+	# do i = 1, npix
+	#     b[i] = max(minout, min(maxout,
+	#	  PIXEL((a[i] - a1) * scalar) + b1))
+	#
+	# Registers:
+	#	r0	last_a
+	#	r1	a
+	#	r2	b
+	#	r3	scalar
+	#	r4	minout
+	#	r5	maxout
+	#	r6	a1
+	#	r7	b1
+
+_amapr_:
+	.word	01774			# save r2-r9
+	movl	A(ap), r1
+	movl	B(ap), r2
+	mull3	$4, *NPIX(ap), r0
+	addl2	r1, r0
+	movf	*A1(ap), r6
+	movf	*B1(ap), r7
+	movf	*A2(ap), r8
+	movf	*B2(ap), r9
+
+	subf3	r7, r9, r3		# r3 = (b2 - b1) / (a2 - a1)
+	subf3	r6, r8, r4
+	divf2	r4, r3
+
+	cmpf	r7, r9			# b1 <= b2
+	bleq	L1
+	movf	r9, r4			# no, min=b2, max=b1
+	movf	r7, r5
+	jbr	L2
+L1:	movf	r7, r4			# yes, min=b1, max=b2
+	movf	r9, r5
+L2:
+	subf3	r6, (r1)+, r8		# r8 = a[i] - a1
+	mulf2	r3, r8			# (..) * scalar
+	addf2	r7, r8			# (..) + b1
+
+	cmpf	r8, r4			# r8 < minout?
+	bgtr	L3
+	movf	r4, (r2)+
+	jbr	L5
+L3:
+	cmpf	r8, r5			# r8 > maxout?
+	blss	L4
+	movf	r5, (r2)+
+	jbr	L5
+L4:
+	movf	r8, (r2)+		# new value in range
+L5:
+	cmpl	r1, r0
+	blssu	L2			# loop again
+
+	ret
diff --git a/unix/as.vax/amaps.s b/unix/as.vax/amaps.s
new file mode 100644
index 00000000..8f7664ea
--- /dev/null
+++ b/unix/as.vax/amaps.s
@@ -0,0 +1,86 @@
+# AMAPS -- Linear transformation, type short.  The range of pixel values
+# A1 to A2 are mapped into the range B1 to B2 using a linear transformation.
+# Values less than A1 or greater than A2 are mapped into the values B1 and
+# B2 upon output.
+
+	.data	0
+
+	.set	A,	4
+	.set	B,	8
+	.set	NPIX,	12
+	.set	A1,	16
+	.set	A2,	20
+	.set	B1,	24
+	.set	B2,	28
+
+	.align	2
+	.globl	_amaps_
+	.text
+
+	# AMAPS (a, b, npix, a1, a2, b1, b2)
+	#
+	# scalar = real (b2 - b1) / real (a2 - a1)
+	# minout = min (b1, b2)
+	# maxout = max (b1, b2)
+	# 
+	# do i = 1, npix
+	#     b[i] = max(minout, min(maxout,
+	#	  PIXEL((a[i] - a1) * scalar) + b1))
+	#
+	# Registers:
+	#	r0	last_a
+	#	r1	a
+	#	r2	b
+	#	r3	scalar
+	#	r4	minout
+	#	r5	maxout
+	#	r6	a1
+	#	r7	b1
+
+_amaps_:
+	.word	01774			# save r2-r9
+	movl	A(ap), r1
+	movl	B(ap), r2
+	mull3	$2, *NPIX(ap), r0
+	addl2	r1, r0
+	movw	*A1(ap), r6
+	movw	*B1(ap), r7
+	movw	*A2(ap), r8
+	movw	*B2(ap), r9
+
+	subw3	r7, r9, r3		# r3 = (b2 - b1) / (a2 - a1)
+	cvtwf	r3, r3
+	subw3	r6, r8, r4
+	cvtwf	r4, r4
+	divf2	r4, r3
+
+	cmpw	r7, r9			# b1 <= b2
+	bleq	L1
+	movw	r9, r4			# no, min=b2, max=b1
+	movw	r7, r5
+	jbr	L2
+L1:	movw	r7, r4			# yes, min=b1, max=b2
+	movw	r9, r5
+L2:
+	subw3	r6, (r1)+, r8		# r8 = a[i] - a1
+	cvtwf	r8, r8
+	mulf2	r3, r8			# (..) * scalar
+	cvtfw	r8, r8
+	addw2	r7, r8			# (..) + b1
+
+	cmpw	r8, r4			# r8 < minout?
+	bgtr	L3
+	movw	r4, (r2)+
+	jbr	L5
+L3:
+	cmpw	r8, r5			# r8 > maxout?
+	blss	L4
+	movw	r5, (r2)+
+	jbr	L5
+L4:
+	movw	r8, (r2)+		# new value in range
+L5:
+	cmpl	r1, r0
+	blssu	L2			# loop again
+
+	ret
diff --git a/unix/as.vax/amov.s b/unix/as.vax/amov.s
new file mode 100644
index 00000000..61784aac
--- /dev/null
+++ b/unix/as.vax/amov.s
@@ -0,0 +1,94 @@
+# AMOV -- Move a block of data from one area of memory to another.  The
+# move is carried out (using the MOVC instruction) in such a way that
+# data is not destroyed, regardless of whether or not the input an output
+# arrays overlap.  Note that the move is not data dependent (floating
+# point data is not special).
+
+	.set	MASK,	07400
+	.set	A,	4
+	.set	B,	8
+	.set	NPIX,	12
+	.set	MAXBLK,	0177777
+
+	.align	2
+.text
+	.globl	_amovc_				# amov_ (a, b, npix)
+	.globl	_amovs_
+	.globl	_amovi_
+	.globl	_amovl_
+	.globl	_amovr_
+	.globl	_amovd_
+	.globl	_amovx_
+_amovc_:
+_amovs_:
+	.word	MASK
+	movl	$2, r11				# r11 = size of pixel
+	jbr	L10
+_amovi_:
+_amovl_:
+_amovr_:
+	.word	MASK
+	movl	$4, r11
+	jbr	L10
+_amovd_:
+_amovx_:
+	.word	MASK
+	movl	$8, r11
+
+	# Compute source and destination addresses and the number of bytes to
+	# be moved.  If nbytes=0 or the source and destinatation are the same
+	# then we are done.  If nbytes is greater than a single MOVC3 can
+	# accomodate then we must branch to the more complicated code below,
+	# otherwise we call MOVC3 and return.
+
+L10:	mull3	r11, *NPIX(ap), r10		# nbytes
+	jleq	L20
+	movl	A(ap), r8			# fwa of A array
+	movl	B(ap), r9			# fwa of B array
+	cmpl	r8, r9
+	jeql	L20				# A, B same array
+	cmpl	r10, $MAXBLK			# too large for single movc3?
+	jgtr	L30
+	movc3	r10, (r8), (r9)
+L20:
+	ret
+L30:
+	# Since the array is larger than a single MOVC3 instruction can
+	# accomodate we must do the move in segments of size MAXBLK.  Since
+	# multiple moves are needed we cannot leave it up to MOVC3 to make
+	# sure that the move is nondestructive.  If the destination is to
+	# the left (lower address) of the source then the move is necessarily
+	# nondestructive.  If to the right then the move is potentially
+	# nondestructive, and we must solve the problem by moving the high
+	# segments first.
+
+	movl	$MAXBLK, r11
+	cmpl	r8, r9
+	jlssu	L50
+L40:						# move high to low
+	cmpl	r10, $MAXBLK
+	jgtr	L41
+	movl	r10, r11
+L41:
+	movc3	r11, (r8), (r9)
+	addl2	r11, r8
+	addl2	r11, r9
+	subl2	r11, r10
+	jgtr	L40
+
+	ret
+L50:						# move low to high
+	addl2	r10, r8	
+	addl2	r10, r9
+L60:
+	cmpl	r10, $MAXBLK
+	jgtr	L61
+	movl	r10, r11
+L61:
+	subl2	r11, r8
+	subl2	r11, r9
+	movc3	r11, (r8), (r9)
+	subl2	r11, r10
+	jgtr	L60
+
+	ret
diff --git a/unix/as.vax/awsur.s b/unix/as.vax/awsur.s
new file mode 100644
index 00000000..a1a97e16
--- /dev/null
+++ b/unix/as.vax/awsur.s
@@ -0,0 +1,44 @@
+# AWSUR -- Weighted sum of two type real vectors.
+
+	.data	0
+	.globl	_awsur_
+	.align	2
+	.text
+
+	.set	A,	4
+	.set	B,	8
+	.set	C,	12
+	.set	NPIX,	16
+	.set	W1,	20
+	.set	W2,	24
+
+	# AWSUR (a, b, c, npix, w1, w2)
+	#
+	# registers:
+	#	r0	max_a
+	#	r1	a
+	#	r2	b
+	#	r3	c
+	#	r4	w1	(real)
+	#	r5	w2	(real)
+
+_awsur_:
+	.word	0374
+	movl	A(ap), r1
+	movl	B(ap), r2
+	movl	C(ap), r3
+	mull3	$4, *NPIX(ap), r0
+	addl2	r1, r0
+	movf	*W1(ap), r4
+	movf	*W2(ap), r5
+
+	# c[i] = a[i] * w1 + b[i] * w2
+L1:
+	mulf3	(r1)+, r4, r6
+	mulf3	(r2)+, r5, r7
+	addf3	r6, r7, (r3)+
+
+	cmpl	r1, r0
+	blssu	L1
+
+	ret
diff --git a/unix/as.vax/awsus.s b/unix/as.vax/awsus.s
new file mode 100644
index 00000000..5ad9bb78
--- /dev/null
+++ b/unix/as.vax/awsus.s
@@ -0,0 +1,47 @@
+# AWSUS -- Weighted sum of two type short vectors.
+
+	.data	0
+	.globl	_awsus_
+	.align	2
+	.text
+
+	.set	A,	4
+	.set	B,	8
+	.set	C,	12
+	.set	NPIX,	16
+	.set	W1,	20
+	.set	W2,	24
+
+	# AWSUS (a, b, c, npix, w1, w2)
+	#
+	# registers:
+	#	r0	max_a
+	#	r1	a
+	#	r2	b
+	#	r3	c
+	#	r4	w1	(real)
+	#	r5	w2	(real)
+
+_awsus_:
+	.word	0374
+	movl	A(ap), r1
+	movl	B(ap), r2
+	movl	C(ap), r3
+	mull3	$2, *NPIX(ap), r0
+	addl2	r1, r0
+	movf	*W1(ap), r4
+	movf	*W2(ap), r5
+
+	# c[i] = a[i] * w1 + b[i] * w2
+L1:
+	cvtwf	(r1)+, r6
+	mulf2	r4, r6
+	cvtwf	(r2)+, r7
+	mulf2	r5, r7
+	addf2	r6, r7
+	cvtfw	r7, (r3)+
+
+	cmpl	r1, r0
+	blssu	L1
+
+	ret
diff --git a/unix/as.vax/bitfields.s b/unix/as.vax/bitfields.s
new file mode 100644
index 00000000..5e28cd3d
--- /dev/null
+++ b/unix/as.vax/bitfields.s
@@ -0,0 +1,42 @@
+# BITFIELDS -- Routines for inserting and extracting bitfields into integers.
+
+# BITPAK -- Pack an integer into a bitfield of an array.  Set all nbits
+# bits regardless of the value of the integer.
+
+	.text
+	.align	1
+	.globl	_bitpak_
+
+	# bitpak (intval, array, offset, nbits)
+	.set	INTVAL,	4
+	.set	ARRAY,	8
+	.set	OFFSET,	12		# one-indexed bit offset
+	.set	NBITS,	16
+
+_bitpak_:
+	.word	0x0
+
+	subl3	$1, *OFFSET(ap), r1
+	insv	*INTVAL(ap), r1, *NBITS(ap), *ARRAY(ap)
+	ret
+	.data
+
+# BITUPK -- Unpack a bitfield from an array and return as the function
+# value, an integer.  Do not sign extend.
+
+	.text
+	.align	1
+	.globl	_bitupk_
+
+	# bitupk (array, offset, nbits)
+	.set	ARRAY,	4
+	.set	OFFSET,	8			# one-indexed bit offset
+	.set	NBITS,	12
+
+_bitupk_:
+	.word	0x0
+
+	subl3	$1, *OFFSET(ap), r1
+	extzv	r1, *NBITS(ap), *ARRAY(ap), r0
+	ret
+	.data
diff --git a/unix/as.vax/bytmov.s b/unix/as.vax/bytmov.s
new file mode 100644
index 00000000..64acc299
--- /dev/null
+++ b/unix/as.vax/bytmov.s
@@ -0,0 +1,80 @@
+# BYTMOV -- Move a block of data from one area of memory to another.  The
+# move is carried out (using the MOVC instruction) in such a way that
+# data is not destroyed, regardless of whether or not the input and output
+# arrays overlap.
+
+	.set	MASK,	07400
+
+	# bytmov (a, aoff, b, boff, nbytes)
+	.set	A,	4
+	.set	AOFF,	8
+	.set	B,	12
+	.set	BOFF,	16
+	.set	NBYTES,	20
+	.set	MAXBLK,	0177777
+
+	.align	2
+.text
+	.globl	_bytmov_
+_bytmov_:
+	.word	MASK
+
+	# Compute source and destination addresses and the number of bytes to
+	# be moved.  If nbytes=0 or the source and destinatation are the same
+	# then we are done.  If nbytes is greater than a single MOVC3 can
+	# accomodate then we must branch to the more complicated code below,
+	# otherwise we call MOVC3 and return.
+
+	movl	*NBYTES(ap), r10		# nbytes
+	jleq	L20
+	addl3	A(ap), *AOFF(ap), r8		# fwa of A array
+	decl	r8				# allow for one-indexing
+	addl3	B(ap), *BOFF(ap), r9		# fwa of B array
+	decl	r9				# allow for one-indexing
+	cmpl	r8, r9
+	jeql	L20				# A, B same array
+	cmpl	r10, $MAXBLK			# too large for single movc3?
+	jgtr	L30
+	movc3	r10, (r8), (r9)
+L20:
+	ret
+L30:
+	# Since the array is larger than a single MOVC3 instruction can
+	# accomodate we must do the move in segments of size MAXBLK.  Since
+	# multiple moves are needed we cannot leave it up to MOVC3 to make
+	# sure that the move is nondestructive.  If the destination is to
+	# the left (lower address) of the source then the move is necessarily
+	# nondestructive.  If to the right then the move is potentially
+	# nondestructive, and we must solve the problem by moving the high
+	# segments first.
+
+	movl	$MAXBLK, r11
+	cmpl	r8, r9
+	jlssu	L50
+L40:						# move high to low
+	cmpl	r10, $MAXBLK
+	jgtr	L41
+	movl	r10, r11
+L41:
+	movc3	r11, (r8), (r9)
+	addl2	r11, r8
+	addl2	r11, r9
+	subl2	r11, r10
+	jgtr	L40
+
+	ret
+L50:						# move low to high
+	addl2	r10, r8	
+	addl2	r10, r9
+L60:
+	cmpl	r10, $MAXBLK
+	jgtr	L61
+	movl	r10, r11
+L61:
+	subl2	r11, r8
+	subl2	r11, r9
+	movc3	r11, (r8), (r9)
+	subl2	r11, r10
+	jgtr	L60
+
+	ret
diff --git a/unix/as.vax/cyboow.s b/unix/as.vax/cyboow.s
new file mode 100644
index 00000000..cafec5a2
--- /dev/null
+++ b/unix/as.vax/cyboow.s
@@ -0,0 +1,93 @@
+# CYBOOW, CYBOEW -- Order the bits in an odd or even indexed 60-bit Cyber word.
+# The operation may not be performed in-place.  The offsets and sizes of the
+# bit segments which must be moved are as follows:
+#
+#	 --> Odd Words <--	        --> Even Words <--
+#       [from]   [to]  [nbits]
+# 	 1	53	8		-3	57	4
+# 	 9	45	8		5	49	8
+# 	17	37	8		13	41	8
+# 	25	29	8		21	33	8
+# 	33	21	8		29	25	8
+# 	41	13	8		37	17	8
+# 	49	 5	8		45	 9	8
+# 	61	 1	4		53	 1	8
+#
+# Input bit-offsets must be a multiple of the Cyber word size, i.e., 1, 61,
+# 121, etc.  An output word may begin at any bit-offset.
+
+.globl	_cyboow_
+.globl	_cyboew_
+
+	.set	IN,	4
+	.set	INBIT,	8
+	.set	OUT,	12
+	.set	OUTBIT,	16
+
+	.data
+	.align	2
+W:	.long	0				# temp space for output word
+	.long	0
+
+	.text
+	.align	1
+
+
+# CYBOOW -- Order odd cyber word.  After swapping the first 8 bytes of IN the
+# ordered 60-bit Cyber word is in bits 5-64 of the temporary storage area at W.
+
+_cyboow_: # (in, inbit, out, outbit)
+	.word	0x4
+
+	subl3	$1, *INBIT(ap), r0		# bit offset into IN
+	ashl	$-3, r0, r0
+	addl2	IN(ap), r0			# input base address
+
+	addl3	$8, $W, r1			# swap bytes into W temporary
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+
+	movl	OUT(ap), r0			# output base address
+	subl3	$1, *OUTBIT(ap), r1		# bit offset into OUT
+	extzv	$4, $30, W, r2
+	insv	r2, r1, $30, (r0)		# put first 30 bits
+	extzv	$34, $30, W, r2
+	addl2	$30, r1
+	insv	r2, r1, $30, (r0)		# put second 30 bits
+	ret
+
+# CYBOEW -- Order even cyber word.  After swapping the 8 bytes the ordered
+# Cyber word will be found in bits 1-60 of the temporary storage area at W.
+
+_cyboew_: # (in, inbit, out, outbit)
+	.word	0x4
+	subl3	$5, *INBIT(ap), r0		# bit offset into IN
+	ashl	$-3, r0, r0
+	addl2	IN(ap), r0			# input base address
+
+	addl3	$8, $W, r1			# swap bytes into W temporary
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+	movb	(r0)+, -(r1)
+
+	movl	OUT(ap), r0			# output base address
+	subl3	$1, *OUTBIT(ap), r1		# bit offset into OUT
+	movl	W, r2
+	insv	r2, r1, $32, (r0)		# put first 32 bits
+	extzv	$32, $30, W, r2
+	addl2	$32, r1
+	insv	r2, r1, $28, (r0)		# put remaining 28 bits
+
+	ret
+	.data
diff --git a/unix/as.vax/ieeed.s b/unix/as.vax/ieeed.s
new file mode 100644
index 00000000..ec550592
--- /dev/null
+++ b/unix/as.vax/ieeed.s
@@ -0,0 +1,182 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+#
+# IEEED.S -- IEEE double to VAX double floating conversions.
+#
+#	 ieepakd (x)				# scalar, vax->ieee
+#	 ieeupkd (x)				# scalar, ieee->vax
+#	ieevpakd (native, ieee, nelem)		# vector, vax->ieee
+#	ieevupkd (ieee, native, nelem)		# vector, ieee->vax
+#	ieesnand (NaN)				# set VAX NaN value
+#	ieegnand (NaN)				# get VAX NaN value
+#        ieemapd (mapin, mapout)                # enable NaN mapping
+#       ieestatd (nin, nout)                    # get num NaN values mapped
+#      ieezstatd ()                             # zero NaN counters
+#
+# These routines convert between the VAX and IEEE double floating formats,
+# operating upon a single value or an array of values.  +/- zero is converted
+# to zero.  When converting IEEE to VAX, underflow maps to zero, and exponent
+# overflow and NaN input values map to the value set by IEESNAND (default 0).
+# These routines are functionally equivalent to the semi-portable versions of
+# the IRAF ieee/native floating conversion routines in osb$ieeed.x.
+# TODO - Add a function callback option for processing NaN values.
+
+	.data
+vaxnan:	.quad	0
+nanin:  .long   0
+nanout: .long   0
+mapin:  .long   1       # enable input NaN mapping by default for VAX
+mapout: .long   0
+
+	.text
+	.align	1
+	.globl	_ieepad_
+	.globl	_ieevpd_
+	.globl	_ieeupd_
+	.globl	_ieevud_
+	.globl	_ieesnd_
+	.globl	_ieegnd_
+        .globl  _ieemad_
+        .globl  _ieestd_
+        .globl  _ieezsd_
+
+_ieepad_:	# IEEPAKD (X)
+	.word	0x3c
+	movl	4(ap), r4			# data addr -> r4
+	movl	r4, r5				# output clobbers input
+	jsb	cvt_vax_ieee			# convert value
+	ret
+_ieevpd_:	# IEEVPAKD (VAX, IEEE, NELEM)
+	.word	0x7c
+	movl	4(ap), r4			# input vector -> r4
+	movl	8(ap), r5			# output vector -> r5
+	movl	*12(ap), r6			# loop counter
+L1:	jsb	cvt_vax_ieee			# convert one value
+	sobgtr	r6, L1				# loop
+	ret
+_ieeupd_:	# IEEUPKD (X)
+	.word	0x3c
+	movl	4(ap), r4			# data addr -> r4
+	movl	r4, r5				# output clobbers input
+	jsb	cvt_ieee_vax			# convert value
+	ret
+_ieevud_:	# IEEVUPKD (IEEE, VAX, NELEM)
+	.word	0x7c
+	movl	4(ap), r4			# input vector -> r4
+	movl	8(ap), r5			# output vector -> r5
+	movl	*12(ap), r6			# loop counter
+L2:	jsb	cvt_ieee_vax			# convert one value
+	sobgtr	r6, L2				# loop
+	ret
+_ieesnd_:	# IEESNAND (VAXNAN)
+	.word	0x0
+	movq	*4(ap), vaxnan
+        clrl    nanin
+        clrl    nanout
+	ret
+_ieegnd_:	# IEEGNAND (VAXNAN)
+	.word	0x0
+	movq	vaxnan, *4(ap)
+	ret
+_ieemad_:       # IEEMAPD (MAPIN, MAPOUT)
+        .word   0x0
+        movl    *4(ap), mapin
+        movl    *8(ap), mapout
+        ret
+_ieestd_:       # IEESTATD (NIN, NOUT)
+        .word   0x0
+        movl    nanin, *4(ap)
+        movl    nanout, *8(ap)
+        ret
+_ieezsd_:       # IEEZSTATD ()
+        .word   0x0
+        clrl    nanin
+        clrl    nanout
+        ret
+
+cvt_vax_ieee:					# R4=in, R5=out
+	movl	(r4)+, r1			# get vax double
+	movl	(r4)+, r0			# get vax double
+
+        tstl    mapout                          # map NaNs on output?
+        beql    L4                              # no, just output value
+        cmpl    r0, vaxnan                      # yes, check if reserved value
+        bneq    L4                              # no, just output value
+        cmpl    r1, vaxnan+4                    # yes, check if reserved value
+        bneq    L4                              # no, just output value
+        clrl    r0                              # generate IEEE NaN value
+        clrl    r1                              # generate IEEE NaN value
+        insv    $2047, $20, $11, r1             # insert NaN exponent (2047)
+        incl    nanout                          # increment counter
+        jbr     L5
+L4:
+	rotl	$16, r0, r0			# swap words -> r0
+	rotl	$16, r1, r1			# swap words -> r1
+	extzv	$23, $8, r1, r2			# 8 bit exponent -> r2
+	beql	L6				# branch if zero exponent 
+	extzv	$2, $1, r0, r3			# get round bit -> r3
+	ashq	$-3, r0, r0			# shift 64 data bits by 3
+	addw2	$(1024-130), r2			# adjust exponent bias
+	insv	r2, $20, $11, r1		# insert new exponent
+	blbc	r3, L5				# branch if round bit clear
+	incl	r0				# round low longword
+	adwc	$0, r1				# carry to high longword
+L5:
+	movl	sp, r3				# r3 points to input byte
+	pushl	r1				# push r1 on stack
+	pushl	r0				# push r0 on stack
+	movb	-(r3), (r5)+			# output quadword, swapped
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	movb	-(r3), (r5)+
+	addl2	$8, sp				# pop stack
+	rsb					# all done
+L6:
+	clrl	r0				# return all 64 bits zero
+	clrl	r1
+	jbr	L5
+
+cvt_ieee_vax:					# R4=in, R5=out
+	movb	(r4)+, -(sp)			# byte swap quadword onto stack
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+	movb	(r4)+, -(sp)
+
+	movl	(sp)+, r0			# pop low bits
+	movl	(sp)+, r1			# pop high bits
+	extzv	$20, $11, r1, r2		# exponent -> r2
+	beql	L10				# zero exponent
+        tstl    mapin                           # map NaNs on input?
+        beql    L9                              # no, don't check value
+        cmpl    r2, $2047                       # NaN double has exponent 2047
+        beql    L11                             # yes, output vaxnan
+L9:
+	extzv	$31, $1, r1, r3			# save sign bit
+	ashq	$3, r0, r0			# shift 64 bits left 3 bits
+	subw2	$(1024-130), r2			# adjust exponent bias
+	bleq	L10				# return zero if underflow
+	cmpw	r2, $256			# compare with max VAX exponent
+	bgeq	L11				# return VAX-NaN if overflow
+	insv	r2, $23, $8, r1			# insert VAX-D exponent
+	insv	r3, $31, $1, r1			# restore sign bit
+
+	rotl	$16, r1, (r5)+			# output VAX double
+	rotl	$16, r0, (r5)+			# output VAX double
+	rsb
+L10:
+	clrl	(r5)+				# return all 64 bits zero
+	clrl	(r5)+
+	rsb
+L11:
+	moval	vaxnan, r3			# return VAX equiv. of NaN
+	movl	(r3)+, (r5)+
+	movl	(r3)+, (r5)+
+	incl	nanin
+	rsb
diff --git a/unix/as.vax/ieeer.s b/unix/as.vax/ieeer.s
new file mode 100644
index 00000000..789712aa
--- /dev/null
+++ b/unix/as.vax/ieeer.s
@@ -0,0 +1,153 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+#
+# IEEER.S -- IEEE real to VAX single precision floating conversions.
+#
+#	 ieepakr (x)				# scalar, vax->ieee
+#	 ieeupkr (x)				# scalar, ieee->vax
+#	ieevpakr (native, ieee, nelem)		# vector, vax->ieee
+#	ieevupkr (ieee, native, nelem)		# vector, ieee->vax
+#	ieesnanr (NaN)				# set VAX NaN value
+#	ieegnanr (NaN)				# get VAX NaN value
+#	 ieemapr (mapin, mapout)		# enable NaN mapping
+#	ieestatr (nin, nout)			# get num NaN values mapped
+#      ieezstatr ()				# zero NaN counters
+#
+# These routines convert between the VAX and IEEE real floating formats,
+# operating upon a single value or an array of values.  +/- zero is converted
+# to zero.  When converting IEEE to VAX, underflow maps to zero, and exponent
+# overflow and NaN input values map to the value set by IEESNANR (default 0).
+# These routines are functionally equivalent to the semi-portable versions of
+# the IRAF ieee/native floating conversion routines in osb$ieeer.x.
+# TODO - Add a function callback option for processing NaN values.
+
+	.data
+vaxnan:	.long	0
+nanin:	.long	0
+nanout:	.long	0
+mapin:	.long	1	# enable input NaN mapping by default for VAX
+mapout:	.long	0
+
+	.text
+	.align	1
+	.globl	_ieepar_
+	.globl	_ieevpr_
+	.globl	_ieeupr_
+	.globl	_ieevur_
+	.globl	_ieesnr_
+	.globl	_ieegnr_
+	.globl	_ieemar_
+	.globl	_ieestr_
+	.globl	_ieezsr_
+
+_ieepar_:	# IEEPAKR (X)
+	.word	0x0c
+	movl	4(ap), r2			# data addr -> r2
+	movl	r2, r3				# output clobbers input
+	jsb	cvt_vax_ieee			# convert value
+	ret
+_ieevpr_:	# IEEVPAKR (VAX, IEEE, NELEM)
+	.word	0x1c
+	movl	4(ap), r2			# input vector -> r2
+	movl	8(ap), r3			# output vector -> r3
+	movl	*12(ap), r4			# loop counter
+L1:	jsb	cvt_vax_ieee			# convert one value
+	sobgtr	r4, L1				# loop
+	ret
+_ieeupr_:	# IEEUPKR (X)
+	.word	0x0c
+	movl	4(ap), r2			# data addr -> r2
+	movl	r2, r3				# output clobbers input
+	jsb	cvt_ieee_vax			# convert value
+	ret
+_ieevur_:	# IEEVUPKR (IEEE, VAX, NELEM)
+	.word	0x1c
+	movl	4(ap), r2			# input vector -> r2
+	movl	8(ap), r3			# output vector -> r3
+	movl	*12(ap), r4			# loop counter
+L2:	jsb	cvt_ieee_vax			# convert one value
+	sobgtr	r4, L2				# loop
+	ret
+_ieesnr_:	# IEESNANR (VAXNAN)
+	.word	0x0
+	movl	*4(ap), vaxnan
+	clrl	nanin
+	clrl	nanout
+	ret
+_ieegnr_:	# IEEGNANR (VAXNAN)
+	.word	0x0
+	movl	vaxnan, *4(ap)
+	ret
+_ieemar_:	# IEEMAPR (MAPIN, MAPOUT)
+	.word	0x0
+	movl	*4(ap), mapin
+	movl	*8(ap), mapout
+	ret
+_ieestr_:	# IEESTATR (NIN, NOUT)
+	.word	0x0
+	movl	nanin, *4(ap)
+	movl	nanout, *8(ap)
+	ret
+_ieezsr_:	# IEEZSTATR ()
+	.word	0x0
+	clrl	nanin
+	clrl	nanout
+	ret
+
+cvt_vax_ieee:					# R2=in, R3=out
+	movl	(r2)+, r0			# vax value -> r0
+
+	tstl	mapout				# map NaNs on output?
+	beql	L4				# no, just output value
+	cmpl	r0, vaxnan			# yes, check if reserved value
+	bneq	L4				# no, just output value
+	clrl	r0				# generate IEEE NaN value
+	insv	$255, $23, $8, r0		# insert NaN exponent (255)
+	incl	nanout				# increment counter
+	jbr	L5
+L4:
+	rotl	$16, r0, r0			# swap words -> r0
+	extzv	$23, $8, r0, r1			# 8 bit exponent -> r1
+	beql	L6				# branch if zero exponent 
+	subw2	$2, r1				# adjust exponent bias
+	bleq	L6				# return zero if underflow
+	insv	r1, $23, $8, r0			# insert new exponent
+L5:
+	movl	sp, r1				# r3 points to input byte
+	pushl	r0				# push r0 on stack
+	movb	-(r1), (r3)+			# output longword, swapped
+	movb	-(r1), (r3)+
+	movb	-(r1), (r3)+
+	movb	-(r1), (r3)+
+	tstl	(sp)+				# pop stack
+	rsb					# all done
+L6:
+	clrl	r0				# return all 32 bits zero
+	jbr	L5
+
+cvt_ieee_vax:					# R2=in, R3=out
+	movb	(r2)+, -(sp)			# byte swap longword onto stack
+	movb	(r2)+, -(sp)
+	movb	(r2)+, -(sp)
+	movb	(r2)+, -(sp)
+	movl	(sp)+, r0			# pop swapped value -> r0
+	extzv	$23, $8, r0, r1			# exponent -> r1
+	beql	L10				# zero exponent
+	tstl	mapin				# map NaNs on input?
+	beql	L9				# no, don't check value
+	cmpl	r1, $255			# NaN has exponent 255
+	beql	L11				# yes, output vaxnan
+L9:
+	addw2	$2, r1				# adjust exponent bias
+	cmpw	r1, $256			# compare with max VAX exponent
+	bgeq	L11				# return VAX-NaN if overflow
+	insv	r1, $23, $8, r0			# insert VAX-D exponent
+	rotl	$16, r0, (r3)+			# output VAX value
+	rsb
+L10:
+	clrl	(r3)+				# return all 32 bits zero
+	rsb
+L11:
+	moval	vaxnan, r1			# return VAX equiv. of NaN
+	movl	(r1)+, (r3)+
+	incl	nanin
+	rsb
diff --git a/unix/as.vax/ishift.s b/unix/as.vax/ishift.s
new file mode 100644
index 00000000..1319556f
--- /dev/null
+++ b/unix/as.vax/ishift.s
@@ -0,0 +1,57 @@
+# IAND, IOR, ISHIFT -- Bitwise boolean integer functions for the NCAR
+# package.  The shift function must rotate the bits left and around
+# if the nbits to shift argument is positive.
+
+	.data				# Bitwise boolean AND
+	.text
+	.align	1
+	.globl	_iand_
+_iand_:
+	.word	L12
+	jbr 	L14
+L15:
+	mcoml	*8(ap),r0
+	bicl3	r0,*4(ap),r0
+	ret
+	ret
+	.set	L12,0x0
+L14:
+	jbr 	L15
+
+
+	.data				# Bitwise boolean OR
+	.text
+	.align	1
+	.globl	_ior_
+_ior_:
+	.word	L17
+	jbr 	L19
+L20:
+	bisl3	*8(ap),*4(ap),r0
+	ret
+	ret
+	.set	L17,0x0
+L19:
+	jbr 	L20
+
+
+	.data				# Bitwise SHIFT
+	.text
+	.align	1
+	.globl	_ishift_
+_ishift_:
+	.word	L22
+	jbr 	L24
+L25:
+	movl	*8(ap),r11
+	jlss	L26
+	rotl	r11,*4(ap),r0		# left rotate longword
+	ret
+L26:
+	ashl	r11,*4(ap),r0		# right shift with sign extension
+	ret
+	ret
+	.set	L22,0x800
+L24:
+	jbr 	L25
+	.data
diff --git a/unix/as.vax/zsvjmp.s b/unix/as.vax/zsvjmp.s
new file mode 100644
index 00000000..f4664dac
--- /dev/null
+++ b/unix/as.vax/zsvjmp.s
@@ -0,0 +1,35 @@
+# ZSVJMP, ZDOJMP -- Set up a jump (non-local goto) by saving the processor
+# registers in the buffer jmpbuf.  A subsequent call to ZDOJMP restores
+# the registers, effecting a call in the context of the procedure which
+# originally called ZSVJMP, but with the new status code.  These are Fortran
+# callable procedures.
+
+        .globl  _zsvjmp_
+
+        # The following has nothing to do with ZSVJMP, and is included here
+        # only because this assembler module is loaded with every process.
+        # This code sets the value of the symbol MEM (the Mem common) to zero,
+        # setting the origin for IRAF pointers to zero rather than some
+        # arbitrary value, and ensuring that the MEM common is aligned for
+        # all datatypes as well as page aligned.  A further advantage is that
+        # references to NULL pointers will cause a memory violation.
+
+        .globl  _mem_
+        .set    _mem_,  0
+
+        .set    JMPBUF, 4
+        .set    STATUS, 8
+
+        # The strategy here is to build on the services provided by the C
+        # setjmp/longjmp.  Note that we cannot do this by writing a C function
+        # which calls setjmp, because the procedure which calls setjmp cannot
+        # return before the longjmp is executed (we want to return to the caller        # of the routine containing the setjmp call, not the routine itself).
+
+        .align  1
+_zsvjmp_:                               # CALL ZSVJMP (JMPBUF, STATUS)
+        .word   0x0
+        movl    STATUS(ap),*JMPBUF(ap)  # jmp_buf[0] = addr of status variable
+        clrl    *STATUS(ap)             # return zero status
+        addl2   $4, JMPBUF(ap)          # skip first cell of jmp_buf
+        movl    $1, (ap)                # SETJMP (JMP_BUF)
+        jmp     _setjmp+2               # let setjmp do the rest.
diff --git a/unix/as.vax/zsvjmp.s.ORIG b/unix/as.vax/zsvjmp.s.ORIG
new file mode 100644
index 00000000..59911970
--- /dev/null
+++ b/unix/as.vax/zsvjmp.s.ORIG
@@ -0,0 +1,55 @@
+# ZSVJMP, ZDOJMP -- Set up a jump (non-local goto) by saving the processor
+# registers in the buffer jmpbuf.  A subsequent call to ZDOJMP restores
+# the registers, effecting a call in the context of the procedure which
+# originally called ZSVJMP, but with the new status code.  These are Fortran
+# callable procedures.
+
+	.globl	_zsvjmp_
+	.globl	_zdojmp_
+
+	# The following has nothing to do with ZSVJMP, and is included here
+	# only because this assembler module is loaded with every process.
+	# This code sets the value of the symbol MEM (the Mem common) to zero,
+	# setting the origin for IRAF pointers to zero rather than some
+	# arbitrary value, and ensuring that the MEM common is aligned for
+	# all datatypes as well as page aligned.  A further advantage is that
+	# references to NULL pointers will cause a memory violation.
+
+	.globl	_mem_
+	.set	_mem_,	0
+
+	.set	JMPBUF,	4
+	.set	STATUS,	8
+
+	.align	1
+_zsvjmp_:				# set up jump
+	.word	0x0
+	movl	JMPBUF(ap), r0
+	movl	STATUS(ap), (r0)+	# save address of status variable
+	movq	r6, (r0)+
+	movq	r8, (r0)+
+	movq	r10, (r0)+
+	movq	8(fp), (r0)+		# ap, fp
+	movab	12(ap), (r0)+		# sp
+	movl	16(fp), (r0)		# saved pc
+	clrl	*STATUS(ap)
+	clrl	r0
+	ret
+
+	.align	1
+_zdojmp_:				# do jump (return again from zsvjmp)
+	.word	0x0
+	movl	JMPBUF(ap), r1
+	movl	(r1)+, r0		# get address of status variable
+	movl	*STATUS(ap), (r0)	# put new status there
+	movq	(r1)+, r6
+	movq	(r1)+, r8
+	movq	(r1)+, r10
+	movq	(r1)+, r12
+	movl	(r1)+, sp
+	tstl	(r0)			# must not return status=0
+	bneq	L1
+	movzbl	$1, (r0)
+L1:
+	movl	(r0), r0
+	jmp	*(r1)