aboutsummaryrefslogtreecommitdiff
path: root/unix/as.vax
diff options
context:
space:
mode:
authorJoseph Hunkeler <jhunkeler@gmail.com>2015-07-08 20:46:52 -0400
committerJoseph Hunkeler <jhunkeler@gmail.com>2015-07-08 20:46:52 -0400
commitfa080de7afc95aa1c19a6e6fc0e0708ced2eadc4 (patch)
treebdda434976bc09c864f2e4fa6f16ba1952b1e555 /unix/as.vax
downloadiraf-linux-fa080de7afc95aa1c19a6e6fc0e0708ced2eadc4.tar.gz
Initial commit
Diffstat (limited to 'unix/as.vax')
-rw-r--r--unix/as.vax/README34
-rw-r--r--unix/as.vax/aaddks.s40
-rw-r--r--unix/as.vax/aadds.s42
-rw-r--r--unix/as.vax/aclr.s64
-rw-r--r--unix/as.vax/aluir.s54
-rw-r--r--unix/as.vax/aluis.s56
-rw-r--r--unix/as.vax/amapr.s82
-rw-r--r--unix/as.vax/amaps.s86
-rw-r--r--unix/as.vax/amov.s94
-rw-r--r--unix/as.vax/awsur.s44
-rw-r--r--unix/as.vax/awsus.s47
-rw-r--r--unix/as.vax/bitfields.s42
-rw-r--r--unix/as.vax/bytmov.s80
-rw-r--r--unix/as.vax/cyboow.s93
-rw-r--r--unix/as.vax/ieeed.s182
-rw-r--r--unix/as.vax/ieeer.s153
-rw-r--r--unix/as.vax/ishift.s57
-rw-r--r--unix/as.vax/zsvjmp.s35
-rw-r--r--unix/as.vax/zsvjmp.s.ORIG55
19 files changed, 1340 insertions, 0 deletions
diff --git a/unix/as.vax/README b/unix/as.vax/README
new file mode 100644
index 00000000..58b5c87f
--- /dev/null
+++ b/unix/as.vax/README
@@ -0,0 +1,34 @@
+AS - Assembler Sources
+
+This directory contains any (non-kernel) files which it has proven
+desirable to optimize in assembler. Most of these routines are not
+required for the operation of the system, although considerable gains
+in speed may be possible in some cases. If the autogeneration routines
+cannot find a particular assembler file, the portable routine will
+automatically be used instead.
+
+The following assembler files are required:
+
+ zsvjmp.s (libos.a)
+
+The following should normally be optimized in assembler, particularly if the
+machine has special memory move or bitfield instructions:
+
+ aclr.s clear a block of memory
+ amov.s move a block of memory
+ bitpak.s write into a bit field
+ bitupk.s read from a bit field
+ bytmov.s a variant on amov.s
+ ishift.s bit shift, also iand, ior (used by NCAR/graphics)
+
+The following can be omitted without significant penalty:
+
+ aaddks.s
+ aadds.s
+ aluir.s
+ aluis.s
+ amapr.s
+ amaps.s
+ awsur.s
+ awsus.s
+ cyboow.s
diff --git a/unix/as.vax/aaddks.s b/unix/as.vax/aaddks.s
new file mode 100644
index 00000000..8000beb0
--- /dev/null
+++ b/unix/as.vax/aaddks.s
@@ -0,0 +1,40 @@
+ .data 0
+ .set LWM1,0xfc0
+ .data 2
+ .data 1
+ .data 0
+ .globl _aaddks_
+ .data 2
+v.2:
+ .space 4
+ .set v.1,v.2
+
+ .stabs "aaddks.f",0x64,0,0,0
+ .text
+ .globl _aaddks_
+ .set LF1,12
+_aaddks_:
+ .word LWM1
+ subl2 $LF1,sp
+ jbr L12
+ .align 1
+L12:
+ moval v.1,r11
+ movl *16(ap),-4(fp)
+ subl2 $2,4(ap)
+ movl *16(ap),-8(fp)
+ subl2 $2,12(ap)
+ movl *16(ap),-12(fp)
+ movl v.2-v.1(r11),r10
+ movl 4(ap),r9
+ movl 8(ap),r8
+ movl 12(ap),r7
+ movl -12(fp),r6
+ movl $1,r10
+ cmpl r6,r10
+ jlss L20
+L21:
+ addw3 (r9)[r10],(r8),(r7)[r10]
+ aobleq r6,r10,L21
+L20:
+ ret
diff --git a/unix/as.vax/aadds.s b/unix/as.vax/aadds.s
new file mode 100644
index 00000000..42315155
--- /dev/null
+++ b/unix/as.vax/aadds.s
@@ -0,0 +1,42 @@
+ .data 0
+ .set LWM1,0xfc0
+ .data 2
+ .data 1
+ .data 0
+ .globl _aadds_
+ .data 2
+v.2:
+ .space 4
+ .set v.1,v.2
+
+ .stabs "aadds.f",0x64,0,0,0
+ .text
+ .globl _aadds_
+ .set LF1,16
+_aadds_:
+ .word LWM1
+ subl2 $LF1,sp
+ jbr L12
+ .align 1
+L12:
+ moval v.1,r11
+ movl *16(ap),-4(fp)
+ subl2 $2,4(ap)
+ movl *16(ap),-8(fp)
+ subl2 $2,8(ap)
+ movl *16(ap),-12(fp)
+ subl2 $2,12(ap)
+ movl *16(ap),-16(fp)
+ movl v.2-v.1(r11),r10
+ movl 4(ap),r9
+ movl 8(ap),r8
+ movl 12(ap),r7
+ movl -16(fp),r6
+ movl $1,r10
+ cmpl r6,r10
+ jlss L21
+L22:
+ addw3 (r9)[r10],(r8)[r10],(r7)[r10]
+ aobleq r6,r10,L22
+L21:
+ ret
diff --git a/unix/as.vax/aclr.s b/unix/as.vax/aclr.s
new file mode 100644
index 00000000..5cbb1617
--- /dev/null
+++ b/unix/as.vax/aclr.s
@@ -0,0 +1,64 @@
+# ACLR -- Zero a block of memory.
+
+ .set MASK, 07400
+ .set A, 4
+ .set NPIX, 8
+
+.data
+LZB:
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .align 2
+.text
+ .globl _aclrb_ # aclr_ (a, npix)
+ .globl _aclrc_
+ .globl _aclrs_
+ .globl _aclri_
+ .globl _aclrl_
+ .globl _aclrr_
+ .globl _aclrd_
+ .globl _aclrx_
+_aclrb_:
+ .word MASK
+ movl *NPIX(ap), r11
+ jbr L10
+_aclrc_:
+_aclrs_:
+ .word MASK
+ mull3 $2, *NPIX(ap), r11
+ jbr L10
+_aclri_:
+_aclrl_:
+_aclrr_:
+ .word MASK
+ mull3 $4, *NPIX(ap), r11
+ jbr L10
+_aclrd_:
+_aclrx_:
+ .word MASK
+ mull3 $8, *NPIX(ap), r11
+L10:
+ jleq L20
+ moval LZB, r8
+ movl A(ap), r9
+ ashl $-6, r11, r10
+ bleq L12
+
+ # Clear successive 64 byte blocks.
+L11:
+ movc3 $64, (r8), (r9)
+ addl2 $64, r9
+ sobgtr r10, L11
+L12:
+ # Clear the remaining bytes.
+
+ bicl2 $-64, r11
+ movc3 r11, (r8), (r9)
+L20:
+ ret
diff --git a/unix/as.vax/aluir.s b/unix/as.vax/aluir.s
new file mode 100644
index 00000000..30a37d0d
--- /dev/null
+++ b/unix/as.vax/aluir.s
@@ -0,0 +1,54 @@
+# ALUIR -- Lookup an interpolate a vector of type real onto a real grid.
+# [OBSOLETE - NO LONGER USED. 5/27/87]
+
+ .data 0
+ .align 2
+ .text
+ .globl _aluir_
+
+ .set A, 4
+ .set B, 8
+ .set X, 12
+ .set NPIX, 16
+
+ # ALUIR (a, b, x, npix)
+ #
+ # left = int (x[i])
+ # tau = x[i] - left
+ # b[i] = (a[left] * (1-tau)) + (a[left+1] * tau)
+ #
+ # registers:
+ # r0 max_b
+ # r1 a
+ # r2 b
+ # r3 x
+ # r4 x[i], tau
+ # r5 left
+ # r6
+
+_aluir_:
+ .word 0374 # save r2-r7
+ subl3 $4, A(ap), r1
+ movl B(ap), r2
+ movl X(ap), r3
+ mull3 $4, *NPIX(ap), r0
+ addl2 r2, r0
+L1:
+ movf (r3)+, r4 # get X into r4
+ cvtfl r4, r5 # r5 = left
+ cvtlf r5, r6
+ subf2 r6, r4 # r4 = tau = (x[i] - left)
+
+ movf (r1)[r5], r6
+ mulf3 r4, r6, r7
+ subf2 r7, r6 # r6 = (a[left] * (1-tau))
+
+ incl r5
+ mulf3 r4, (r1)[r5], r7 # r7 = (a[left+1] * tau)
+
+ addf3 r6, r7, (r2)+ # output result to B
+
+ cmpl r2, r0
+ blssu L1
+
+ ret
diff --git a/unix/as.vax/aluis.s b/unix/as.vax/aluis.s
new file mode 100644
index 00000000..0fe54e26
--- /dev/null
+++ b/unix/as.vax/aluis.s
@@ -0,0 +1,56 @@
+# ALUIS -- Lookup an interpolate a vector of type short onto a real grid.
+# [OBSOLETE - NO LONGER USED. 5/27/87]
+
+ .data 0
+ .align 2
+ .text
+ .globl _aluis_
+
+ .set A, 4
+ .set B, 8
+ .set X, 12
+ .set NPIX, 16
+
+ # ALUIS (a, b, x, npix)
+ #
+ # left = int (x[i])
+ # tau = x[i] - left
+ # b[i] = (a[left] * (1-tau)) + (a[left+1] * tau)
+ #
+ # registers:
+ # r0 max_b
+ # r1 a
+ # r2 b
+ # r3 x
+ # r4 x[i], tau
+ # r5 left
+ # r6
+
+_aluis_:
+ .word 0374 # save r2-r7
+ subl3 $2, A(ap), r1
+ movl B(ap), r2
+ movl X(ap), r3
+ mull3 $2, *NPIX(ap), r0
+ addl2 r2, r0
+L1:
+ movf (r3)+, r4 # get X into r4
+ cvtfl r4, r5 # r5 = left
+ cvtlf r5, r6
+ subf2 r6, r4 # r4 = tau = (x[i] - left)
+
+ cvtwf (r1)[r5], r6
+ mulf3 r4, r6, r7
+ subf2 r7, r6 # r6 = (a[left] * (1-tau))
+
+ incl r5
+ cvtwf (r1)[r5], r7
+ mulf2 r4, r7 # r7 = (a[left+1] * tau)
+
+ addf2 r6, r7
+ cvtfw r7, (r2)+ # output result to B
+
+ cmpl r2, r0
+ blssu L1
+
+ ret
diff --git a/unix/as.vax/amapr.s b/unix/as.vax/amapr.s
new file mode 100644
index 00000000..5ba41092
--- /dev/null
+++ b/unix/as.vax/amapr.s
@@ -0,0 +1,82 @@
+# AMAPR -- Linear transformation, type real. The range of pixel values
+# A1 to A2 are mapped into the range B1 to B2 using a linear transformation.
+# Values less than A1 or greater than A2 are mapped into the values B1 and
+# B2 upon output.
+
+ .data 0
+
+ .set A, 4
+ .set B, 8
+ .set NPIX, 12
+ .set A1, 16
+ .set A2, 20
+ .set B1, 24
+ .set B2, 28
+
+ .align 2
+ .globl _amapr_
+ .text
+
+ # AMAPR (a, b, npix, a1, a2, b1, b2)
+ #
+ # scalar = real (b2 - b1) / real (a2 - a1)
+ # minout = min (b1, b2)
+ # maxout = max (b1, b2)
+ #
+ # do i = 1, npix
+ # b[i] = max(minout, min(maxout,
+ # PIXEL((a[i] - a1) * scalar) + b1))
+ #
+ # Registers:
+ # r0 last_a
+ # r1 a
+ # r2 b
+ # r3 scalar
+ # r4 minout
+ # r5 maxout
+ # r6 a1
+ # r7 b1
+
+_amapr_:
+ .word 01774 # save r2-r9
+ movl A(ap), r1
+ movl B(ap), r2
+ mull3 $4, *NPIX(ap), r0
+ addl2 r1, r0
+ movf *A1(ap), r6
+ movf *B1(ap), r7
+ movf *A2(ap), r8
+ movf *B2(ap), r9
+
+ subf3 r7, r9, r3 # r3 = (b2 - b1) / (a2 - a1)
+ subf3 r6, r8, r4
+ divf2 r4, r3
+
+ cmpf r7, r9 # b1 <= b2
+ bleq L1
+ movf r9, r4 # no, min=b2, max=b1
+ movf r7, r5
+ jbr L2
+L1: movf r7, r4 # yes, min=b1, max=b2
+ movf r9, r5
+L2:
+ subf3 r6, (r1)+, r8 # r8 = a[i] - a1
+ mulf2 r3, r8 # (..) * scalar
+ addf2 r7, r8 # (..) + b1
+
+ cmpf r8, r4 # r8 < minout?
+ bgtr L3
+ movf r4, (r2)+
+ jbr L5
+L3:
+ cmpf r8, r5 # r8 > maxout?
+ blss L4
+ movf r5, (r2)+
+ jbr L5
+L4:
+ movf r8, (r2)+ # new value in range
+L5:
+ cmpl r1, r0
+ blssu L2 # loop again
+
+ ret
diff --git a/unix/as.vax/amaps.s b/unix/as.vax/amaps.s
new file mode 100644
index 00000000..8f7664ea
--- /dev/null
+++ b/unix/as.vax/amaps.s
@@ -0,0 +1,86 @@
+# AMAPS -- Linear transformation, type short. The range of pixel values
+# A1 to A2 are mapped into the range B1 to B2 using a linear transformation.
+# Values less than A1 or greater than A2 are mapped into the values B1 and
+# B2 upon output.
+
+ .data 0
+
+ .set A, 4
+ .set B, 8
+ .set NPIX, 12
+ .set A1, 16
+ .set A2, 20
+ .set B1, 24
+ .set B2, 28
+
+ .align 2
+ .globl _amaps_
+ .text
+
+ # AMAPS (a, b, npix, a1, a2, b1, b2)
+ #
+ # scalar = real (b2 - b1) / real (a2 - a1)
+ # minout = min (b1, b2)
+ # maxout = max (b1, b2)
+ #
+ # do i = 1, npix
+ # b[i] = max(minout, min(maxout,
+ # PIXEL((a[i] - a1) * scalar) + b1))
+ #
+ # Registers:
+ # r0 last_a
+ # r1 a
+ # r2 b
+ # r3 scalar
+ # r4 minout
+ # r5 maxout
+ # r6 a1
+ # r7 b1
+
+_amaps_:
+ .word 01774 # save r2-r9
+ movl A(ap), r1
+ movl B(ap), r2
+ mull3 $2, *NPIX(ap), r0
+ addl2 r1, r0
+ movw *A1(ap), r6
+ movw *B1(ap), r7
+ movw *A2(ap), r8
+ movw *B2(ap), r9
+
+ subw3 r7, r9, r3 # r3 = (b2 - b1) / (a2 - a1)
+ cvtwf r3, r3
+ subw3 r6, r8, r4
+ cvtwf r4, r4
+ divf2 r4, r3
+
+ cmpw r7, r9 # b1 <= b2
+ bleq L1
+ movw r9, r4 # no, min=b2, max=b1
+ movw r7, r5
+ jbr L2
+L1: movw r7, r4 # yes, min=b1, max=b2
+ movw r9, r5
+L2:
+ subw3 r6, (r1)+, r8 # r8 = a[i] - a1
+ cvtwf r8, r8
+ mulf2 r3, r8 # (..) * scalar
+ cvtfw r8, r8
+ addw2 r7, r8 # (..) + b1
+
+ cmpw r8, r4 # r8 < minout?
+ bgtr L3
+ movw r4, (r2)+
+ jbr L5
+L3:
+ cmpw r8, r5 # r8 > maxout?
+ blss L4
+ movw r5, (r2)+
+ jbr L5
+L4:
+ movw r8, (r2)+ # new value in range
+L5:
+ cmpl r1, r0
+ blssu L2 # loop again
+
+ ret
diff --git a/unix/as.vax/amov.s b/unix/as.vax/amov.s
new file mode 100644
index 00000000..61784aac
--- /dev/null
+++ b/unix/as.vax/amov.s
@@ -0,0 +1,94 @@
+# AMOV -- Move a block of data from one area of memory to another. The
+# move is carried out (using the MOVC instruction) in such a way that
+# data is not destroyed, regardless of whether or not the input an output
+# arrays overlap. Note that the move is not data dependent (floating
+# point data is not special).
+
+ .set MASK, 07400
+ .set A, 4
+ .set B, 8
+ .set NPIX, 12
+ .set MAXBLK, 0177777
+
+ .align 2
+.text
+ .globl _amovc_ # amov_ (a, b, npix)
+ .globl _amovs_
+ .globl _amovi_
+ .globl _amovl_
+ .globl _amovr_
+ .globl _amovd_
+ .globl _amovx_
+_amovc_:
+_amovs_:
+ .word MASK
+ movl $2, r11 # r11 = size of pixel
+ jbr L10
+_amovi_:
+_amovl_:
+_amovr_:
+ .word MASK
+ movl $4, r11
+ jbr L10
+_amovd_:
+_amovx_:
+ .word MASK
+ movl $8, r11
+
+ # Compute source and destination addresses and the number of bytes to
+ # be moved. If nbytes=0 or the source and destinatation are the same
+ # then we are done. If nbytes is greater than a single MOVC3 can
+ # accomodate then we must branch to the more complicated code below,
+ # otherwise we call MOVC3 and return.
+
+L10: mull3 r11, *NPIX(ap), r10 # nbytes
+ jleq L20
+ movl A(ap), r8 # fwa of A array
+ movl B(ap), r9 # fwa of B array
+ cmpl r8, r9
+ jeql L20 # A, B same array
+ cmpl r10, $MAXBLK # too large for single movc3?
+ jgtr L30
+ movc3 r10, (r8), (r9)
+L20:
+ ret
+L30:
+ # Since the array is larger than a single MOVC3 instruction can
+ # accomodate we must do the move in segments of size MAXBLK. Since
+ # multiple moves are needed we cannot leave it up to MOVC3 to make
+ # sure that the move is nondestructive. If the destination is to
+ # the left (lower address) of the source then the move is necessarily
+ # nondestructive. If to the right then the move is potentially
+ # nondestructive, and we must solve the problem by moving the high
+ # segments first.
+
+ movl $MAXBLK, r11
+ cmpl r8, r9
+ jlssu L50
+L40: # move high to low
+ cmpl r10, $MAXBLK
+ jgtr L41
+ movl r10, r11
+L41:
+ movc3 r11, (r8), (r9)
+ addl2 r11, r8
+ addl2 r11, r9
+ subl2 r11, r10
+ jgtr L40
+
+ ret
+L50: # move low to high
+ addl2 r10, r8
+ addl2 r10, r9
+L60:
+ cmpl r10, $MAXBLK
+ jgtr L61
+ movl r10, r11
+L61:
+ subl2 r11, r8
+ subl2 r11, r9
+ movc3 r11, (r8), (r9)
+ subl2 r11, r10
+ jgtr L60
+
+ ret
diff --git a/unix/as.vax/awsur.s b/unix/as.vax/awsur.s
new file mode 100644
index 00000000..a1a97e16
--- /dev/null
+++ b/unix/as.vax/awsur.s
@@ -0,0 +1,44 @@
+# AWSUR -- Weighted sum of two type real vectors.
+
+ .data 0
+ .globl _awsur_
+ .align 2
+ .text
+
+ .set A, 4
+ .set B, 8
+ .set C, 12
+ .set NPIX, 16
+ .set W1, 20
+ .set W2, 24
+
+ # AWSUR (a, b, c, npix, w1, w2)
+ #
+ # registers:
+ # r0 max_a
+ # r1 a
+ # r2 b
+ # r3 c
+ # r4 w1 (real)
+ # r5 w2 (real)
+
+_awsur_:
+ .word 0374
+ movl A(ap), r1
+ movl B(ap), r2
+ movl C(ap), r3
+ mull3 $4, *NPIX(ap), r0
+ addl2 r1, r0
+ movf *W1(ap), r4
+ movf *W2(ap), r5
+
+ # c[i] = a[i] * w1 + b[i] * w2
+L1:
+ mulf3 (r1)+, r4, r6
+ mulf3 (r2)+, r5, r7
+ addf3 r6, r7, (r3)+
+
+ cmpl r1, r0
+ blssu L1
+
+ ret
diff --git a/unix/as.vax/awsus.s b/unix/as.vax/awsus.s
new file mode 100644
index 00000000..5ad9bb78
--- /dev/null
+++ b/unix/as.vax/awsus.s
@@ -0,0 +1,47 @@
+# AWSUS -- Weighted sum of two type short vectors.
+
+ .data 0
+ .globl _awsus_
+ .align 2
+ .text
+
+ .set A, 4
+ .set B, 8
+ .set C, 12
+ .set NPIX, 16
+ .set W1, 20
+ .set W2, 24
+
+ # AWSUS (a, b, c, npix, w1, w2)
+ #
+ # registers:
+ # r0 max_a
+ # r1 a
+ # r2 b
+ # r3 c
+ # r4 w1 (real)
+ # r5 w2 (real)
+
+_awsus_:
+ .word 0374
+ movl A(ap), r1
+ movl B(ap), r2
+ movl C(ap), r3
+ mull3 $2, *NPIX(ap), r0
+ addl2 r1, r0
+ movf *W1(ap), r4
+ movf *W2(ap), r5
+
+ # c[i] = a[i] * w1 + b[i] * w2
+L1:
+ cvtwf (r1)+, r6
+ mulf2 r4, r6
+ cvtwf (r2)+, r7
+ mulf2 r5, r7
+ addf2 r6, r7
+ cvtfw r7, (r3)+
+
+ cmpl r1, r0
+ blssu L1
+
+ ret
diff --git a/unix/as.vax/bitfields.s b/unix/as.vax/bitfields.s
new file mode 100644
index 00000000..5e28cd3d
--- /dev/null
+++ b/unix/as.vax/bitfields.s
@@ -0,0 +1,42 @@
+# BITFIELDS -- Routines for inserting and extracting bitfields into integers.
+
+# BITPAK -- Pack an integer into a bitfield of an array. Set all nbits
+# bits regardless of the value of the integer.
+
+ .text
+ .align 1
+ .globl _bitpak_
+
+ # bitpak (intval, array, offset, nbits)
+ .set INTVAL, 4
+ .set ARRAY, 8
+ .set OFFSET, 12 # one-indexed bit offset
+ .set NBITS, 16
+
+_bitpak_:
+ .word 0x0
+
+ subl3 $1, *OFFSET(ap), r1
+ insv *INTVAL(ap), r1, *NBITS(ap), *ARRAY(ap)
+ ret
+ .data
+
+# BITUPK -- Unpack a bitfield from an array and return as the function
+# value, an integer. Do not sign extend.
+
+ .text
+ .align 1
+ .globl _bitupk_
+
+ # bitupk (array, offset, nbits)
+ .set ARRAY, 4
+ .set OFFSET, 8 # one-indexed bit offset
+ .set NBITS, 12
+
+_bitupk_:
+ .word 0x0
+
+ subl3 $1, *OFFSET(ap), r1
+ extzv r1, *NBITS(ap), *ARRAY(ap), r0
+ ret
+ .data
diff --git a/unix/as.vax/bytmov.s b/unix/as.vax/bytmov.s
new file mode 100644
index 00000000..64acc299
--- /dev/null
+++ b/unix/as.vax/bytmov.s
@@ -0,0 +1,80 @@
+# BYTMOV -- Move a block of data from one area of memory to another. The
+# move is carried out (using the MOVC instruction) in such a way that
+# data is not destroyed, regardless of whether or not the input and output
+# arrays overlap.
+
+ .set MASK, 07400
+
+ # bytmov (a, aoff, b, boff, nbytes)
+ .set A, 4
+ .set AOFF, 8
+ .set B, 12
+ .set BOFF, 16
+ .set NBYTES, 20
+ .set MAXBLK, 0177777
+
+ .align 2
+.text
+ .globl _bytmov_
+_bytmov_:
+ .word MASK
+
+ # Compute source and destination addresses and the number of bytes to
+ # be moved. If nbytes=0 or the source and destinatation are the same
+ # then we are done. If nbytes is greater than a single MOVC3 can
+ # accomodate then we must branch to the more complicated code below,
+ # otherwise we call MOVC3 and return.
+
+ movl *NBYTES(ap), r10 # nbytes
+ jleq L20
+ addl3 A(ap), *AOFF(ap), r8 # fwa of A array
+ decl r8 # allow for one-indexing
+ addl3 B(ap), *BOFF(ap), r9 # fwa of B array
+ decl r9 # allow for one-indexing
+ cmpl r8, r9
+ jeql L20 # A, B same array
+ cmpl r10, $MAXBLK # too large for single movc3?
+ jgtr L30
+ movc3 r10, (r8), (r9)
+L20:
+ ret
+L30:
+ # Since the array is larger than a single MOVC3 instruction can
+ # accomodate we must do the move in segments of size MAXBLK. Since
+ # multiple moves are needed we cannot leave it up to MOVC3 to make
+ # sure that the move is nondestructive. If the destination is to
+ # the left (lower address) of the source then the move is necessarily
+ # nondestructive. If to the right then the move is potentially
+ # nondestructive, and we must solve the problem by moving the high
+ # segments first.
+
+ movl $MAXBLK, r11
+ cmpl r8, r9
+ jlssu L50
+L40: # move high to low
+ cmpl r10, $MAXBLK
+ jgtr L41
+ movl r10, r11
+L41:
+ movc3 r11, (r8), (r9)
+ addl2 r11, r8
+ addl2 r11, r9
+ subl2 r11, r10
+ jgtr L40
+
+ ret
+L50: # move low to high
+ addl2 r10, r8
+ addl2 r10, r9
+L60:
+ cmpl r10, $MAXBLK
+ jgtr L61
+ movl r10, r11
+L61:
+ subl2 r11, r8
+ subl2 r11, r9
+ movc3 r11, (r8), (r9)
+ subl2 r11, r10
+ jgtr L60
+
+ ret
diff --git a/unix/as.vax/cyboow.s b/unix/as.vax/cyboow.s
new file mode 100644
index 00000000..cafec5a2
--- /dev/null
+++ b/unix/as.vax/cyboow.s
@@ -0,0 +1,93 @@
+# CYBOOW, CYBOEW -- Order the bits in an odd or even indexed 60-bit Cyber word.
+# The operation may not be performed in-place. The offsets and sizes of the
+# bit segments which must be moved are as follows:
+#
+# --> Odd Words <-- --> Even Words <--
+# [from] [to] [nbits]
+# 1 53 8 -3 57 4
+# 9 45 8 5 49 8
+# 17 37 8 13 41 8
+# 25 29 8 21 33 8
+# 33 21 8 29 25 8
+# 41 13 8 37 17 8
+# 49 5 8 45 9 8
+# 61 1 4 53 1 8
+#
+# Input bit-offsets must be a multiple of the Cyber word size, i.e., 1, 61,
+# 121, etc. An output word may begin at any bit-offset.
+
+.globl _cyboow_
+.globl _cyboew_
+
+ .set IN, 4
+ .set INBIT, 8
+ .set OUT, 12
+ .set OUTBIT, 16
+
+ .data
+ .align 2
+W: .long 0 # temp space for output word
+ .long 0
+
+ .text
+ .align 1
+
+
+# CYBOOW -- Order odd cyber word. After swapping the first 8 bytes of IN the
+# ordered 60-bit Cyber word is in bits 5-64 of the temporary storage area at W.
+
+_cyboow_: # (in, inbit, out, outbit)
+ .word 0x4
+
+ subl3 $1, *INBIT(ap), r0 # bit offset into IN
+ ashl $-3, r0, r0
+ addl2 IN(ap), r0 # input base address
+
+ addl3 $8, $W, r1 # swap bytes into W temporary
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+
+ movl OUT(ap), r0 # output base address
+ subl3 $1, *OUTBIT(ap), r1 # bit offset into OUT
+ extzv $4, $30, W, r2
+ insv r2, r1, $30, (r0) # put first 30 bits
+ extzv $34, $30, W, r2
+ addl2 $30, r1
+ insv r2, r1, $30, (r0) # put second 30 bits
+ ret
+
+# CYBOEW -- Order even cyber word. After swapping the 8 bytes the ordered
+# Cyber word will be found in bits 1-60 of the temporary storage area at W.
+
+_cyboew_: # (in, inbit, out, outbit)
+ .word 0x4
+ subl3 $5, *INBIT(ap), r0 # bit offset into IN
+ ashl $-3, r0, r0
+ addl2 IN(ap), r0 # input base address
+
+ addl3 $8, $W, r1 # swap bytes into W temporary
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+ movb (r0)+, -(r1)
+
+ movl OUT(ap), r0 # output base address
+ subl3 $1, *OUTBIT(ap), r1 # bit offset into OUT
+ movl W, r2
+ insv r2, r1, $32, (r0) # put first 32 bits
+ extzv $32, $30, W, r2
+ addl2 $32, r1
+ insv r2, r1, $28, (r0) # put remaining 28 bits
+
+ ret
+ .data
diff --git a/unix/as.vax/ieeed.s b/unix/as.vax/ieeed.s
new file mode 100644
index 00000000..ec550592
--- /dev/null
+++ b/unix/as.vax/ieeed.s
@@ -0,0 +1,182 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+#
+# IEEED.S -- IEEE double to VAX double floating conversions.
+#
+# ieepakd (x) # scalar, vax->ieee
+# ieeupkd (x) # scalar, ieee->vax
+# ieevpakd (native, ieee, nelem) # vector, vax->ieee
+# ieevupkd (ieee, native, nelem) # vector, ieee->vax
+# ieesnand (NaN) # set VAX NaN value
+# ieegnand (NaN) # get VAX NaN value
+# ieemapd (mapin, mapout) # enable NaN mapping
+# ieestatd (nin, nout) # get num NaN values mapped
+# ieezstatd () # zero NaN counters
+#
+# These routines convert between the VAX and IEEE double floating formats,
+# operating upon a single value or an array of values. +/- zero is converted
+# to zero. When converting IEEE to VAX, underflow maps to zero, and exponent
+# overflow and NaN input values map to the value set by IEESNAND (default 0).
+# These routines are functionally equivalent to the semi-portable versions of
+# the IRAF ieee/native floating conversion routines in osb$ieeed.x.
+# TODO - Add a function callback option for processing NaN values.
+
+ .data
+vaxnan: .quad 0
+nanin: .long 0
+nanout: .long 0
+mapin: .long 1 # enable input NaN mapping by default for VAX
+mapout: .long 0
+
+ .text
+ .align 1
+ .globl _ieepad_
+ .globl _ieevpd_
+ .globl _ieeupd_
+ .globl _ieevud_
+ .globl _ieesnd_
+ .globl _ieegnd_
+ .globl _ieemad_
+ .globl _ieestd_
+ .globl _ieezsd_
+
+_ieepad_: # IEEPAKD (X)
+ .word 0x3c
+ movl 4(ap), r4 # data addr -> r4
+ movl r4, r5 # output clobbers input
+ jsb cvt_vax_ieee # convert value
+ ret
+_ieevpd_: # IEEVPAKD (VAX, IEEE, NELEM)
+ .word 0x7c
+ movl 4(ap), r4 # input vector -> r4
+ movl 8(ap), r5 # output vector -> r5
+ movl *12(ap), r6 # loop counter
+L1: jsb cvt_vax_ieee # convert one value
+ sobgtr r6, L1 # loop
+ ret
+_ieeupd_: # IEEUPKD (X)
+ .word 0x3c
+ movl 4(ap), r4 # data addr -> r4
+ movl r4, r5 # output clobbers input
+ jsb cvt_ieee_vax # convert value
+ ret
+_ieevud_: # IEEVUPKD (IEEE, VAX, NELEM)
+ .word 0x7c
+ movl 4(ap), r4 # input vector -> r4
+ movl 8(ap), r5 # output vector -> r5
+ movl *12(ap), r6 # loop counter
+L2: jsb cvt_ieee_vax # convert one value
+ sobgtr r6, L2 # loop
+ ret
+_ieesnd_: # IEESNAND (VAXNAN)
+ .word 0x0
+ movq *4(ap), vaxnan
+ clrl nanin
+ clrl nanout
+ ret
+_ieegnd_: # IEEGNAND (VAXNAN)
+ .word 0x0
+ movq vaxnan, *4(ap)
+ ret
+_ieemad_: # IEEMAPD (MAPIN, MAPOUT)
+ .word 0x0
+ movl *4(ap), mapin
+ movl *8(ap), mapout
+ ret
+_ieestd_: # IEESTATD (NIN, NOUT)
+ .word 0x0
+ movl nanin, *4(ap)
+ movl nanout, *8(ap)
+ ret
+_ieezsd_: # IEEZSTATD ()
+ .word 0x0
+ clrl nanin
+ clrl nanout
+ ret
+
+cvt_vax_ieee: # R4=in, R5=out
+ movl (r4)+, r1 # get vax double
+ movl (r4)+, r0 # get vax double
+
+ tstl mapout # map NaNs on output?
+ beql L4 # no, just output value
+ cmpl r0, vaxnan # yes, check if reserved value
+ bneq L4 # no, just output value
+ cmpl r1, vaxnan+4 # yes, check if reserved value
+ bneq L4 # no, just output value
+ clrl r0 # generate IEEE NaN value
+ clrl r1 # generate IEEE NaN value
+ insv $2047, $20, $11, r1 # insert NaN exponent (2047)
+ incl nanout # increment counter
+ jbr L5
+L4:
+ rotl $16, r0, r0 # swap words -> r0
+ rotl $16, r1, r1 # swap words -> r1
+ extzv $23, $8, r1, r2 # 8 bit exponent -> r2
+ beql L6 # branch if zero exponent
+ extzv $2, $1, r0, r3 # get round bit -> r3
+ ashq $-3, r0, r0 # shift 64 data bits by 3
+ addw2 $(1024-130), r2 # adjust exponent bias
+ insv r2, $20, $11, r1 # insert new exponent
+ blbc r3, L5 # branch if round bit clear
+ incl r0 # round low longword
+ adwc $0, r1 # carry to high longword
+L5:
+ movl sp, r3 # r3 points to input byte
+ pushl r1 # push r1 on stack
+ pushl r0 # push r0 on stack
+ movb -(r3), (r5)+ # output quadword, swapped
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ movb -(r3), (r5)+
+ addl2 $8, sp # pop stack
+ rsb # all done
+L6:
+ clrl r0 # return all 64 bits zero
+ clrl r1
+ jbr L5
+
+cvt_ieee_vax: # R4=in, R5=out
+ movb (r4)+, -(sp) # byte swap quadword onto stack
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+ movb (r4)+, -(sp)
+
+ movl (sp)+, r0 # pop low bits
+ movl (sp)+, r1 # pop high bits
+ extzv $20, $11, r1, r2 # exponent -> r2
+ beql L10 # zero exponent
+ tstl mapin # map NaNs on input?
+ beql L9 # no, don't check value
+ cmpl r2, $2047 # NaN double has exponent 2047
+ beql L11 # yes, output vaxnan
+L9:
+ extzv $31, $1, r1, r3 # save sign bit
+ ashq $3, r0, r0 # shift 64 bits left 3 bits
+ subw2 $(1024-130), r2 # adjust exponent bias
+ bleq L10 # return zero if underflow
+ cmpw r2, $256 # compare with max VAX exponent
+ bgeq L11 # return VAX-NaN if overflow
+ insv r2, $23, $8, r1 # insert VAX-D exponent
+ insv r3, $31, $1, r1 # restore sign bit
+
+ rotl $16, r1, (r5)+ # output VAX double
+ rotl $16, r0, (r5)+ # output VAX double
+ rsb
+L10:
+ clrl (r5)+ # return all 64 bits zero
+ clrl (r5)+
+ rsb
+L11:
+ moval vaxnan, r3 # return VAX equiv. of NaN
+ movl (r3)+, (r5)+
+ movl (r3)+, (r5)+
+ incl nanin
+ rsb
diff --git a/unix/as.vax/ieeer.s b/unix/as.vax/ieeer.s
new file mode 100644
index 00000000..789712aa
--- /dev/null
+++ b/unix/as.vax/ieeer.s
@@ -0,0 +1,153 @@
+# Copyright(c) 1986 Association of Universities for Research in Astronomy Inc.
+#
+# IEEER.S -- IEEE real to VAX single precision floating conversions.
+#
+# ieepakr (x) # scalar, vax->ieee
+# ieeupkr (x) # scalar, ieee->vax
+# ieevpakr (native, ieee, nelem) # vector, vax->ieee
+# ieevupkr (ieee, native, nelem) # vector, ieee->vax
+# ieesnanr (NaN) # set VAX NaN value
+# ieegnanr (NaN) # get VAX NaN value
+# ieemapr (mapin, mapout) # enable NaN mapping
+# ieestatr (nin, nout) # get num NaN values mapped
+# ieezstatr () # zero NaN counters
+#
+# These routines convert between the VAX and IEEE real floating formats,
+# operating upon a single value or an array of values. +/- zero is converted
+# to zero. When converting IEEE to VAX, underflow maps to zero, and exponent
+# overflow and NaN input values map to the value set by IEESNANR (default 0).
+# These routines are functionally equivalent to the semi-portable versions of
+# the IRAF ieee/native floating conversion routines in osb$ieeer.x.
+# TODO - Add a function callback option for processing NaN values.
+
+ .data
+vaxnan: .long 0
+nanin: .long 0
+nanout: .long 0
+mapin: .long 1 # enable input NaN mapping by default for VAX
+mapout: .long 0
+
+ .text
+ .align 1
+ .globl _ieepar_
+ .globl _ieevpr_
+ .globl _ieeupr_
+ .globl _ieevur_
+ .globl _ieesnr_
+ .globl _ieegnr_
+ .globl _ieemar_
+ .globl _ieestr_
+ .globl _ieezsr_
+
+_ieepar_: # IEEPAKR (X)
+ .word 0x0c
+ movl 4(ap), r2 # data addr -> r2
+ movl r2, r3 # output clobbers input
+ jsb cvt_vax_ieee # convert value
+ ret
+_ieevpr_: # IEEVPAKR (VAX, IEEE, NELEM)
+ .word 0x1c
+ movl 4(ap), r2 # input vector -> r2
+ movl 8(ap), r3 # output vector -> r3
+ movl *12(ap), r4 # loop counter
+L1: jsb cvt_vax_ieee # convert one value
+ sobgtr r4, L1 # loop
+ ret
+_ieeupr_: # IEEUPKR (X)
+ .word 0x0c
+ movl 4(ap), r2 # data addr -> r2
+ movl r2, r3 # output clobbers input
+ jsb cvt_ieee_vax # convert value
+ ret
+_ieevur_: # IEEVUPKR (IEEE, VAX, NELEM)
+ .word 0x1c
+ movl 4(ap), r2 # input vector -> r2
+ movl 8(ap), r3 # output vector -> r3
+ movl *12(ap), r4 # loop counter
+L2: jsb cvt_ieee_vax # convert one value
+ sobgtr r4, L2 # loop
+ ret
+_ieesnr_: # IEESNANR (VAXNAN)
+ .word 0x0
+ movl *4(ap), vaxnan
+ clrl nanin
+ clrl nanout
+ ret
+_ieegnr_: # IEEGNANR (VAXNAN)
+ .word 0x0
+ movl vaxnan, *4(ap)
+ ret
+_ieemar_: # IEEMAPR (MAPIN, MAPOUT)
+ .word 0x0
+ movl *4(ap), mapin
+ movl *8(ap), mapout
+ ret
+_ieestr_: # IEESTATR (NIN, NOUT)
+ .word 0x0
+ movl nanin, *4(ap)
+ movl nanout, *8(ap)
+ ret
+_ieezsr_: # IEEZSTATR ()
+ .word 0x0
+ clrl nanin
+ clrl nanout
+ ret
+
+cvt_vax_ieee: # R2=in, R3=out
+ movl (r2)+, r0 # vax value -> r0
+
+ tstl mapout # map NaNs on output?
+ beql L4 # no, just output value
+ cmpl r0, vaxnan # yes, check if reserved value
+ bneq L4 # no, just output value
+ clrl r0 # generate IEEE NaN value
+ insv $255, $23, $8, r0 # insert NaN exponent (255)
+ incl nanout # increment counter
+ jbr L5
+L4:
+ rotl $16, r0, r0 # swap words -> r0
+ extzv $23, $8, r0, r1 # 8 bit exponent -> r1
+ beql L6 # branch if zero exponent
+ subw2 $2, r1 # adjust exponent bias
+ bleq L6 # return zero if underflow
+ insv r1, $23, $8, r0 # insert new exponent
+L5:
+ movl sp, r1 # r3 points to input byte
+ pushl r0 # push r0 on stack
+ movb -(r1), (r3)+ # output longword, swapped
+ movb -(r1), (r3)+
+ movb -(r1), (r3)+
+ movb -(r1), (r3)+
+ tstl (sp)+ # pop stack
+ rsb # all done
+L6:
+ clrl r0 # return all 32 bits zero
+ jbr L5
+
+cvt_ieee_vax: # R2=in, R3=out
+ movb (r2)+, -(sp) # byte swap longword onto stack
+ movb (r2)+, -(sp)
+ movb (r2)+, -(sp)
+ movb (r2)+, -(sp)
+ movl (sp)+, r0 # pop swapped value -> r0
+ extzv $23, $8, r0, r1 # exponent -> r1
+ beql L10 # zero exponent
+ tstl mapin # map NaNs on input?
+ beql L9 # no, don't check value
+ cmpl r1, $255 # NaN has exponent 255
+ beql L11 # yes, output vaxnan
+L9:
+ addw2 $2, r1 # adjust exponent bias
+ cmpw r1, $256 # compare with max VAX exponent
+ bgeq L11 # return VAX-NaN if overflow
+ insv r1, $23, $8, r0 # insert VAX-D exponent
+ rotl $16, r0, (r3)+ # output VAX value
+ rsb
+L10:
+ clrl (r3)+ # return all 32 bits zero
+ rsb
+L11:
+ moval vaxnan, r1 # return VAX equiv. of NaN
+ movl (r1)+, (r3)+
+ incl nanin
+ rsb
diff --git a/unix/as.vax/ishift.s b/unix/as.vax/ishift.s
new file mode 100644
index 00000000..1319556f
--- /dev/null
+++ b/unix/as.vax/ishift.s
@@ -0,0 +1,57 @@
+# IAND, IOR, ISHIFT -- Bitwise boolean integer functions for the NCAR
+# package. The shift function must rotate the bits left and around
+# if the nbits to shift argument is positive.
+
+ .data # Bitwise boolean AND
+ .text
+ .align 1
+ .globl _iand_
+_iand_:
+ .word L12
+ jbr L14
+L15:
+ mcoml *8(ap),r0
+ bicl3 r0,*4(ap),r0
+ ret
+ ret
+ .set L12,0x0
+L14:
+ jbr L15
+
+
+ .data # Bitwise boolean OR
+ .text
+ .align 1
+ .globl _ior_
+_ior_:
+ .word L17
+ jbr L19
+L20:
+ bisl3 *8(ap),*4(ap),r0
+ ret
+ ret
+ .set L17,0x0
+L19:
+ jbr L20
+
+
+ .data # Bitwise SHIFT
+ .text
+ .align 1
+ .globl _ishift_
+_ishift_:
+ .word L22
+ jbr L24
+L25:
+ movl *8(ap),r11
+ jlss L26
+ rotl r11,*4(ap),r0 # left rotate longword
+ ret
+L26:
+ ashl r11,*4(ap),r0 # right shift with sign extension
+ ret
+ ret
+ .set L22,0x800
+L24:
+ jbr L25
+ .data
diff --git a/unix/as.vax/zsvjmp.s b/unix/as.vax/zsvjmp.s
new file mode 100644
index 00000000..f4664dac
--- /dev/null
+++ b/unix/as.vax/zsvjmp.s
@@ -0,0 +1,35 @@
+# ZSVJMP, ZDOJMP -- Set up a jump (non-local goto) by saving the processor
+# registers in the buffer jmpbuf. A subsequent call to ZDOJMP restores
+# the registers, effecting a call in the context of the procedure which
+# originally called ZSVJMP, but with the new status code. These are Fortran
+# callable procedures.
+
+ .globl _zsvjmp_
+
+ # The following has nothing to do with ZSVJMP, and is included here
+ # only because this assembler module is loaded with every process.
+ # This code sets the value of the symbol MEM (the Mem common) to zero,
+ # setting the origin for IRAF pointers to zero rather than some
+ # arbitrary value, and ensuring that the MEM common is aligned for
+ # all datatypes as well as page aligned. A further advantage is that
+ # references to NULL pointers will cause a memory violation.
+
+ .globl _mem_
+ .set _mem_, 0
+
+ .set JMPBUF, 4
+ .set STATUS, 8
+
+ # The strategy here is to build on the services provided by the C
+ # setjmp/longjmp. Note that we cannot do this by writing a C function
+ # which calls setjmp, because the procedure which calls setjmp cannot
+ # return before the longjmp is executed (we want to return to the caller # of the routine containing the setjmp call, not the routine itself).
+
+ .align 1
+_zsvjmp_: # CALL ZSVJMP (JMPBUF, STATUS)
+ .word 0x0
+ movl STATUS(ap),*JMPBUF(ap) # jmp_buf[0] = addr of status variable
+ clrl *STATUS(ap) # return zero status
+ addl2 $4, JMPBUF(ap) # skip first cell of jmp_buf
+ movl $1, (ap) # SETJMP (JMP_BUF)
+ jmp _setjmp+2 # let setjmp do the rest.
diff --git a/unix/as.vax/zsvjmp.s.ORIG b/unix/as.vax/zsvjmp.s.ORIG
new file mode 100644
index 00000000..59911970
--- /dev/null
+++ b/unix/as.vax/zsvjmp.s.ORIG
@@ -0,0 +1,55 @@
+# ZSVJMP, ZDOJMP -- Set up a jump (non-local goto) by saving the processor
+# registers in the buffer jmpbuf. A subsequent call to ZDOJMP restores
+# the registers, effecting a call in the context of the procedure which
+# originally called ZSVJMP, but with the new status code. These are Fortran
+# callable procedures.
+
+ .globl _zsvjmp_
+ .globl _zdojmp_
+
+ # The following has nothing to do with ZSVJMP, and is included here
+ # only because this assembler module is loaded with every process.
+ # This code sets the value of the symbol MEM (the Mem common) to zero,
+ # setting the origin for IRAF pointers to zero rather than some
+ # arbitrary value, and ensuring that the MEM common is aligned for
+ # all datatypes as well as page aligned. A further advantage is that
+ # references to NULL pointers will cause a memory violation.
+
+ .globl _mem_
+ .set _mem_, 0
+
+ .set JMPBUF, 4
+ .set STATUS, 8
+
+ .align 1
+_zsvjmp_: # set up jump
+ .word 0x0
+ movl JMPBUF(ap), r0
+ movl STATUS(ap), (r0)+ # save address of status variable
+ movq r6, (r0)+
+ movq r8, (r0)+
+ movq r10, (r0)+
+ movq 8(fp), (r0)+ # ap, fp
+ movab 12(ap), (r0)+ # sp
+ movl 16(fp), (r0) # saved pc
+ clrl *STATUS(ap)
+ clrl r0
+ ret
+
+ .align 1
+_zdojmp_: # do jump (return again from zsvjmp)
+ .word 0x0
+ movl JMPBUF(ap), r1
+ movl (r1)+, r0 # get address of status variable
+ movl *STATUS(ap), (r0) # put new status there
+ movq (r1)+, r6
+ movq (r1)+, r8
+ movq (r1)+, r10
+ movq (r1)+, r12
+ movl (r1)+, sp
+ tstl (r0) # must not return status=0
+ bneq L1
+ movzbl $1, (r0)
+L1:
+ movl (r0), r0
+ jmp *(r1)