14 files changed, 1463 insertions, 0 deletions
diff --git a/math/llsq/original_f/bndacc.f b/math/llsq/original_f/bndacc.f
new file mode 100644
index 00000000..80b9914c
--- /dev/null
+++ b/math/llsq/original_f/bndacc.f
@@ -0,0 +1,74 @@
+      subroutine bndacc (g,mdg,nb,ip,ir,mt,jt)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   sequential algorithm for banded least squares problem..
+c	   accumulation phase.	    for solution phase use bndsol.
+c
+c     the calling program must set ir=1 and ip=1 before the first call
+c     to bndacc for a new case.
+c
+c     the second subscript of g( ) must be dimensioned at least
+c     nb+1 in the calling program.
+      dimension g(mdg,1)
+      zero=0.
+c
+c	       alg. steps 1-4 are performed external to this subroutine.
+c
+      nbp1=nb+1
+      if (mt.le.0) return
+c					      alg. step 5
+      if (jt.eq.ip) go to 70
+c					      alg. steps 6-7
+      if (jt.le.ir) go to 30
+c					      alg. steps 8-9
+      do 10 i=1,mt
+	ig1=jt+mt-i
+	ig2=ir+mt-i
+	do 10 j=1,nbp1
+   10	g(ig1,j)=g(ig2,j)
+c					      alg. step 10
+      ie=jt-ir
+      do 20 i=1,ie
+	ig=ir+i-1
+	do 20 j=1,nbp1
+   20	g(ig,j)=zero
+c					      alg. step 11
+      ir=jt
+c					      alg. step 12
+   30 mu=min0(nb-1,ir-ip-1)
+      if (mu.eq.0) go to 60
+c					      alg. step 13
+      do 50 l=1,mu
+c					      alg. step 14
+	k=min0(l,jt-ip)
+c					      alg. step 15
+	lp1=l+1
+	ig=ip+l
+	do 40 i=lp1,nb
+	  jg=i-k
+   40	  g(ig,jg)=g(ig,i)
+c					      alg. step 16
+	do 50 i=1,k
+	jg=nbp1-i
+   50	g(ig,jg)=zero
+c					      alg. step 17
+   60 ip=jt
+c					      alg. steps 18-19
+   70 mh=ir+mt-ip
+      kh=min0(nbp1,mh)
+c					      alg. step 20
+      do 80 i=1,kh
+   80	call h12 (1,i,max0(i+1,ir-ip+1),mh,g(ip,i),1,rho,
+     1		  g(ip,i+1),1,mdg,nbp1-i)
+c					      alg. step 21
+      ir=ip+kh
+c					      alg. step 22
+      if (kh.lt.nbp1) go to 100
+c					      alg. step 23
+      do 90 i=1,nb
+   90	g(ir-1,i)=zero
+c					      alg. step 24
+  100 continue
+c					      alg. step 25
+      return
+      end
diff --git a/math/llsq/original_f/bndsol.f b/math/llsq/original_f/bndsol.f
new file mode 100644
index 00000000..05dd35d7
--- /dev/null
+++ b/math/llsq/original_f/bndsol.f
@@ -0,0 +1,70 @@
+      subroutine bndsol (mode,g,mdg,nb,ip,ir,x,n,rnorm)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   sequential solution of a banded least squares problem..
+c	   solution phase.   for the accumulation phase use bndacc.
+c
+c     mode = 1	   solve r*x=y	 where r and y are in the g( ) array
+c		   and x will be stored in the x( ) array.
+c	     2	   solve (r**t)*x=y   where r is in g( ),
+c		   y is initially in x( ), and x replaces y in x( ),
+c	     3	   solve r*x=y	 where r is in g( ).
+c		   y is initially in x( ), and x replaces y in x( ).
+c
+c     the second subscript of g( ) must be dimensioned at least
+c     nb+1 in the calling program.
+      dimension g(mdg,1),x(n)
+      zero=0.
+c
+      rnorm=zero
+      go to (10,90,50), mode
+c				    ********************* mode = 1
+c				    algc step 26
+   10	   do 20 j=1,n
+   20	   x(j)=g(j,nb+1)
+      rsq=zero
+      np1=n+1
+      irm1=ir-1
+      if (np1.gt.irm1) go to 40
+	   do 30 j=np1,irm1
+   30	   rsq=rsq+g(j,nb+1)**2
+      rnorm=sqrt(rsq)
+   40 continue
+c				    ********************* mode = 3
+c				    alg. step 27
+   50	   do 80 ii=1,n
+	   i=n+1-ii
+c				    alg. step 28
+	   s=zero
+	   l=max0(0,i-ip)
+c				    alg. step 29
+	   if (i.eq.n) go to 70
+c				    alg. step 30
+	   ie=min0(n+1-i,nb)
+		do 60 j=2,ie
+		jg=j+l
+		ix=i-1+j
+   60		s=s+g(i,jg)*x(ix)
+c				    alg. step 31
+   70	   if (g(i,l+1)) 80,130,80
+   80	   x(i)=(x(i)-s)/g(i,l+1)
+c				    alg. step 32
+      return
+c				    ********************* mode = 2
+   90	   do 120 j=1,n
+	   s=zero
+	   if (j.eq.1) go to 110
+	   i1=max0(1,j-nb+1)
+	   i2=j-1
+		do 100 i=i1,i2
+		l=j-i+1+max0(0,i-ip)
+  100		s=s+x(i)*g(i,l)
+  110	   l=max0(0,j-ip)
+	   if (g(j,l+1)) 120,130,120
+  120	   x(j)=(x(j)-s)/g(j,l+1)
+      return
+c
+  130 write (6,140) mode,i,j,l
+      stop
+  140 format (30h0zero diagonal term in bndsol.,12h mode,i,j,l=,4i6)
+      end
diff --git a/math/llsq/original_f/diff.f b/math/llsq/original_f/diff.f
new file mode 100644
index 00000000..79dacb92
--- /dev/null
+++ b/math/llsq/original_f/diff.f
@@ -0,0 +1,6 @@
+      function diff(x,y)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 june 7
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+      diff=x-y
+      return
+      end
diff --git a/math/llsq/original_f/g1.f b/math/llsq/original_f/g1.f
new file mode 100644
index 00000000..5d44c50b
--- /dev/null
+++ b/math/llsq/original_f/g1.f
@@ -0,0 +1,33 @@
+      subroutine g1 (a,b,cos,sin,sig)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c
+c
+c     compute orthogonal rotation matrix..
+c     compute.. matrix	 (c, s) so that (c, s)(a) = (sqrt(a**2+b**2))
+c			 (-s,c) 	(-s,c)(b)   (	0	   )
+c     compute sig = sqrt(a**2+b**2)
+c	 sig is computed last to allow for the possibility that
+c	 sig may be in the same location as a or b .
+c
+      zero=0.
+      one=1.
+      if (abs(a).le.abs(b)) go to 10
+      xr=b/a
+      yr=sqrt(one+xr**2)
+      cos=sign(one/yr,a)
+      sin=cos*xr
+      sig=abs(a)*yr
+      return
+   10 if (b) 20,30,20
+   20 xr=a/b
+      yr=sqrt(one+xr**2)
+      sin=sign(one/yr,b)
+      cos=sin*xr
+      sig=abs(b)*yr
+      return
+   30 sig=zero
+      cos=zero
+      sin=one
+      return
+      end
diff --git a/math/llsq/original_f/g2.f b/math/llsq/original_f/g2.f
new file mode 100644
index 00000000..6f01a582
--- /dev/null
+++ b/math/llsq/original_f/g2.f
@@ -0,0 +1,9 @@
+      subroutine g2    (cos,sin,x,y)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1972 dec 15
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   apply the rotation computed by g1 to (x,y).
+      xr=cos*x+sin*y
+      y=-sin*x+cos*y
+      x=xr
+      return
+      end
diff --git a/math/llsq/original_f/gen.f b/math/llsq/original_f/gen.f
new file mode 100644
index 00000000..98181a93
--- /dev/null
+++ b/math/llsq/original_f/gen.f
@@ -0,0 +1,28 @@
+      function	 gen(anoise)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1972 dec 15
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   generate numbers for construction of test cases.
+      if (anoise) 10,30,20
+   10 mi=891
+      mj=457
+      i=5
+      j=7
+      aj=0.
+      gen=0.
+      return
+c
+c     the sequence of values of j  is bounded between 1 and 996
+c     if initial j = 1,2,3,4,5,6,7,8, or 9, the period is 332
+   20 j=j*mj
+      j=j-997*(j/997)
+      aj=j-498
+c     the sequence of values of i  is bounded between 1 and 999
+c     if initial i = 1,2,3,6,7, or 9,  the period will be 50
+c     if initial i = 4 or 8   the period will be 25
+c     if initial i = 5	      the period will be 10
+   30 i=i*mi
+      i=i-1000*(i/1000)
+      ai=i-500
+      gen=ai+aj*anoise
+      return
+      end
diff --git a/math/llsq/original_f/h12.f b/math/llsq/original_f/h12.f
new file mode 100644
index 00000000..81daa629
--- /dev/null
+++ b/math/llsq/original_f/h12.f
@@ -0,0 +1,80 @@
+c     subroutine h12 (mode,lpivot,l1,m,u,iue,up,c,ice,icv,ncv)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c
+c     construction and/or application of a single
+c     householder transformation..     q = i + u*(u**t)/b
+c
+c     mode    = 1 or 2	 to select algorithm  h1  or  h2 .
+c     lpivot is the index of the pivot element.
+c     l1,m   if l1 .le. m   the transformation will be constructed to
+c	     zero elements indexed from l1 through m.	if l1 gt. m
+c	     the subroutine does an identity transformation.
+c     u(),iue,up    on entry to h1 u() contains the pivot vector.
+c		    iue is the storage increment between elements.
+c					on exit from h1 u() and up
+c		    contain quantities defining the vector u of the
+c		    householder transformation.   on entry to h2 u()
+c		    and up should contain quantities previously computed
+c		    by h1.  these will not be modified by h2.
+c     c()    on entry to h1 or h2 c() contains a matrix which will be
+c	     regarded as a set of vectors to which the householder
+c	     transformation is to be applied.  on exit c() contains the
+c	     set of transformed vectors.
+c     ice    storage increment between elements of vectors in c().
+c     icv    storage increment between vectors in c().
+c     ncv    number of vectors in c() to be transformed. if ncv .le. 0
+c	     no operations will be done on c().
+c
+      subroutine h12 (mode,lpivot,l1,m,u,iue,up,c,ice,icv,ncv)
+      dimension u(iue,m), c(1)
+      double precision sm,b
+      one=1.
+c
+      if (0.ge.lpivot.or.lpivot.ge.l1.or.l1.gt.m) return
+      cl=abs(u(1,lpivot))
+      if (mode.eq.2) go to 60
+c			     ****** construct the transformation. ******
+	  do 10 j=l1,m
+   10	  cl=amax1(abs(u(1,j)),cl)
+      if (cl) 130,130,20
+   20 clinv=one/cl
+      sm=(dble(u(1,lpivot))*clinv)**2
+	  do 30 j=l1,m
+   30	  sm=sm+(dble(u(1,j))*clinv)**2
+c			       convert dble. prec. sm to sngl. prec. sm1
+      sm1=sm
+      cl=cl*sqrt(sm1)
+      if (u(1,lpivot)) 50,50,40
+   40 cl=-cl
+   50 up=u(1,lpivot)-cl
+      u(1,lpivot)=cl
+      go to 70
+c	     ****** apply the transformation  i+u*(u**t)/b  to c. ******
+c
+   60 if (cl) 130,130,70
+   70 if (ncv.le.0) return
+      b=dble(up)*u(1,lpivot)
+c			b  must be nonpositive here.  if b = 0., return.
+c
+      if (b) 80,130,130
+   80 b=one/b
+      i2=1-icv+ice*(lpivot-1)
+      incr=ice*(l1-lpivot)
+	  do 120 j=1,ncv
+	  i2=i2+icv
+	  i3=i2+incr
+	  i4=i3
+	  sm=c(i2)*dble(up)
+	      do 90 i=l1,m
+	      sm=sm+c(i3)*dble(u(1,i))
+   90	      i3=i3+ice
+	  if (sm) 100,120,100
+  100	  sm=sm*b
+	  c(i2)=c(i2)+sm*dble(up)
+	      do 110 i=l1,m
+	      c(i4)=c(i4)+sm*dble(u(1,i))
+  110	      i4=i4+ice
+  120	  continue
+  130 return
+      end
diff --git a/math/llsq/original_f/hfti.f b/math/llsq/original_f/hfti.f
new file mode 100644
index 00000000..62dcebe4
--- /dev/null
+++ b/math/llsq/original_f/hfti.f
@@ -0,0 +1,136 @@
+      subroutine hfti (a,mda,m,n,b,mdb,nb,tau,krank,rnorm,h,g,ip)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   solve least squares problem using algorithm, hfti.
+c
+      dimension a(mda,n),b(mdb,nb),h(n),g(n),rnorm(nb)
+      integer ip(n)
+      double precision sm,dzero
+      szero=0.
+      dzero=0.d0
+      factor=0.001
+c
+      k=0
+      ldiag=min0(m,n)
+      if (ldiag.le.0) go to 270
+	  do 80 j=1,ldiag
+	  if (j.eq.1) go to 20
+c
+c     update squared column lengths and find lmax
+c    ..
+	  lmax=j
+	      do 10 l=j,n
+	      h(l)=h(l)-a(j-1,l)**2
+	      if (h(l).gt.h(lmax)) lmax=l
+   10	      continue
+	  if(diff(hmax+factor*h(lmax),hmax)) 20,20,50
+c
+c     compute squared column lengths and find lmax
+c    ..
+   20	  lmax=j
+	      do 40 l=j,n
+	      h(l)=0.
+		  do 30 i=j,m
+   30		  h(l)=h(l)+a(i,l)**2
+	      if (h(l).gt.h(lmax)) lmax=l
+   40	      continue
+	  hmax=h(lmax)
+c    ..
+c     lmax has been determined
+c
+c     do column interchanges if needed.
+c    ..
+   50	  continue
+	  ip(j)=lmax
+	  if (ip(j).eq.j) go to 70
+	      do 60 i=1,m
+	      tmp=a(i,j)
+	      a(i,j)=a(i,lmax)
+   60	      a(i,lmax)=tmp
+	  h(lmax)=h(j)
+c
+c     compute the j-th transformation and apply it to a and b.
+c    ..
+   70	  call h12 (1,j,j+1,m,a(1,j),1,h(j),a(1,j+1),1,mda,n-j)
+   80	  call h12 (2,j,j+1,m,a(1,j),1,h(j),b,1,mdb,nb)
+c
+c     determine the pseudorank, k, using the tolerance, tau.
+c    ..
+	  do 90 j=1,ldiag
+	  if (abs(a(j,j)).le.tau) go to 100
+   90	  continue
+      k=ldiag
+      go to 110
+  100 k=j-1
+  110 kp1=k+1
+c
+c     compute the norms of the residual vectors.
+c
+      if (nb.le.0) go to 140
+	  do 130 jb=1,nb
+	  tmp=szero
+	  if (kp1.gt.m) go to 130
+	      do 120 i=kp1,m
+  120	      tmp=tmp+b(i,jb)**2
+  130	  rnorm(jb)=sqrt(tmp)
+  140 continue
+c					    special for pseudorank = 0
+      if (k.gt.0) go to 160
+      if (nb.le.0) go to 270
+	  do 150 jb=1,nb
+	      do 150 i=1,n
+  150	      b(i,jb)=szero
+      go to 270
+c
+c     if the pseudorank is less than n compute householder
+c     decomposition of first k rows.
+c    ..
+  160 if (k.eq.n) go to 180
+	  do 170 ii=1,k
+	  i=kp1-ii
+  170	  call h12 (1,i,kp1,n,a(i,1),mda,g(i),a,mda,1,i-1)
+  180 continue
+c
+c
+      if (nb.le.0) go to 270
+	  do 260 jb=1,nb
+c
+c     solve the k by k triangular system.
+c    ..
+	      do 210 l=1,k
+	      sm=dzero
+	      i=kp1-l
+	      if (i.eq.k) go to 200
+	      ip1=i+1
+		  do 190 j=ip1,k
+  190		  sm=sm+a(i,j)*dble(b(j,jb))
+  200	      sm1=sm
+  210	      b(i,jb)=(b(i,jb)-sm1)/a(i,i)
+c
+c     complete computation of solution vector.
+c    ..
+	  if (k.eq.n) go to 240
+	      do 220 j=kp1,n
+  220	      b(j,jb)=szero
+	      do 230 i=1,k
+  230	      call h12 (2,i,kp1,n,a(i,1),mda,g(i),b(1,jb),1,mdb,1)
+c
+c      re-order the solution vector to compensate for the
+c      column interchanges.
+c    ..
+  240	      do 250 jj=1,ldiag
+	      j=ldiag+1-jj
+	      if (ip(j).eq.j) go to 250
+	      l=ip(j)
+	      tmp=b(l,jb)
+	      b(l,jb)=b(j,jb)
+	      b(j,jb)=tmp
+  250	      continue
+  260	  continue
+c    ..
+c     the solution vectors, x, are now
+c     in the first  n  rows of the array b(,).
+c
+  270 krank=k
+      return
+      end
diff --git a/math/llsq/original_f/ldp.f b/math/llsq/original_f/ldp.f
new file mode 100644
index 00000000..65d4c77e
--- /dev/null
+++ b/math/llsq/original_f/ldp.f
@@ -0,0 +1,79 @@
+      subroutine ldp (g,mdg,m,n,h,x,xnorm,w,index,mode)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1974 mar 1
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c
+c	     **********  least distance programming  **********
+c
+      integer index(m)
+      dimension g(mdg,n), h(m), x(n), w(1)
+      zero=0.
+      one=1.
+      if (n.le.0) go to 120
+	  do 10 j=1,n
+   10	  x(j)=zero
+      xnorm=zero
+      if (m.le.0) go to 110
+c
+c     the declared dimension of w() must be at least (n+1)*(m+2)+2*m.
+c
+c      first (n+1)*m locs of w()   =  matrix e for problem nnls.
+c	next	 n+1 locs of w()   =  vector f for problem nnls.
+c	next	 n+1 locs of w()   =  vector z for problem nnls.
+c	next	   m locs of w()   =  vector y for problem nnls.
+c	next	   m locs of w()   =  vector wdual for problem nnls.
+c     copy g**t into first n rows and m columns of e.
+c     copy h**t into row n+1 of e.
+c
+      iw=0
+	  do 30 j=1,m
+	      do 20 i=1,n
+	      iw=iw+1
+   20	      w(iw)=g(j,i)
+	  iw=iw+1
+   30	  w(iw)=h(j)
+      if=iw+1
+c				 store n zeros followed by a one into f.
+	  do 40 i=1,n
+	  iw=iw+1
+   40	  w(iw)=zero
+      w(iw+1)=one
+c
+      np1=n+1
+      iz=iw+2
+      iy=iz+np1
+      iwdual=iy+m
+c
+      call nnls (w,np1,np1,m,w(if),w(iy),rnorm,w(iwdual),w(iz),index,
+     *		 mode)
+c		       use the following return if unsuccessful in nnls.
+      if (mode.ne.1) return
+      if (rnorm) 130,130,50
+   50 fac=one
+      iw=iy-1
+	  do 60 i=1,m
+	  iw=iw+1
+c				here we are using the solution vector y.
+   60	  fac=fac-h(i)*w(iw)
+c
+      if (diff(one+fac,one)) 130,130,70
+   70 fac=one/fac
+	  do 90 j=1,n
+	  iw=iy-1
+	      do 80 i=1,m
+	      iw=iw+1
+c				here we are using the solution vector y.
+   80	      x(j)=x(j)+g(i,j)*w(iw)
+   90	  x(j)=x(j)*fac
+	  do 100 j=1,n
+  100	  xnorm=xnorm+x(j)**2
+      xnorm=sqrt(xnorm)
+c			      successful return.
+  110 mode=1
+      return
+c			      error return.	  n .le. 0.
+  120 mode=2
+      return
+c			      returning with constraints not compatible.
+  130 mode=4
+      return
+      end
diff --git a/math/llsq/original_f/nnls.f b/math/llsq/original_f/nnls.f
new file mode 100644
index 00000000..fb5aa767
--- /dev/null
+++ b/math/llsq/original_f/nnls.f
@@ -0,0 +1,278 @@
+c     subroutine nnls  (a,mda,m,n,b,x,rnorm,w,zz,index,mode)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 june 15
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c
+c	  **********   nonnegative least squares   **********
+c
+c     given an m by n matrix, a, and an m-vector, b,  compute an
+c     n-vector, x, which solves the least squares problem
+c
+c		       a * x = b  subject to x .ge. 0
+c
+c     a(),mda,m,n     mda is the first dimensioning parameter for the
+c		      array, a().   on entry a() contains the m by n
+c		      matrix, a.	   on exit a() contains
+c		      the product matrix, q*a , where q is an
+c		      m by m orthogonal matrix generated implicitly by
+c		      this subroutine.
+c     b()     on entry b() contains the m-vector, b.   on exit b() con-
+c	      tains q*b.
+c     x()     on entry x() need not be initialized.  on exit x() will
+c	      contain the solution vector.
+c     rnorm   on exit rnorm contains the euclidean norm of the
+c	      residual vector.
+c     w()     an n-array of working space.  on exit w() will contain
+c	      the dual solution vector.   w will satisfy w(i) = 0.
+c	      for all i in set p  and w(i) .le. 0. for all i in set z
+c     zz()     an m-array of working space.
+c     index()	  an integer working array of length at least n.
+c		  on exit the contents of this array define the sets
+c		  p and z as follows..
+c
+c		  index(1)   thru index(nsetp) = set p.
+c		  index(iz1) thru index(iz2)   = set z.
+c		  iz1 = nsetp + 1 = npp1
+c		  iz2 = n
+c     mode    this is a success-failure flag with the following
+c	      meanings.
+c	      1     the solution has been computed successfully.
+c	      2     the dimensions of the problem are bad.
+c		    either m .le. 0 or n .le. 0.
+c	      3    iteration count exceeded.  more than 3*n iterations.
+c
+      subroutine nnls (a,mda,m,n,b,x,rnorm,w,zz,index,mode)
+      dimension a(mda,n), b(m), x(n), w(n), zz(m)
+      integer index(n)
+      zero=0.
+      one=1.
+      two=2.
+      factor=0.01
+c
+      mode=1
+      if (m.gt.0.and.n.gt.0) go to 10
+      mode=2
+      return
+   10 iter=0
+      itmax=3*n
+c
+c		     initialize the arrays index() and x().
+c
+	  do 20 i=1,n
+	  x(i)=zero
+   20	  index(i)=i
+c
+      iz2=n
+      iz1=1
+      nsetp=0
+      npp1=1
+c			      ******  main loop begins here  ******
+   30 continue
+c		   quit if all coefficients are already in the solution.
+c			 or if m cols of a have been triangularized.
+c
+      if (iz1.gt.iz2.or.nsetp.ge.m) go to 350
+c
+c	  compute components of the dual (negative gradient) vector w().
+c
+	  do 50 iz=iz1,iz2
+	  j=index(iz)
+	  sm=zero
+	      do 40 l=npp1,m
+   40	      sm=sm+a(l,j)*b(l)
+   50	  w(j)=sm
+c				    find largest positive w(j).
+   60 wmax=zero
+	  do 70 iz=iz1,iz2
+	  j=index(iz)
+	  if (w(j).le.wmax) go to 70
+	  wmax=w(j)
+	  izmax=iz
+   70	  continue
+c
+c	      if wmax .le. 0. go to termination.
+c	      this indicates satisfaction of the kuhn-tucker conditions.
+c
+      if (wmax) 350,350,80
+   80 iz=izmax
+      j=index(iz)
+c
+c     the sign of w(j) is ok for j to be moved to set p.
+c     begin the transformation and check new diagonal element to avoid
+c     near linear dependence.
+c
+      asave=a(npp1,j)
+      call h12 (1,npp1,npp1+1,m,a(1,j),1,up,dummy,1,1,0)
+      unorm=zero
+      if (nsetp.eq.0) go to 100
+	  do 90 l=1,nsetp
+   90	  unorm=unorm+a(l,j)**2
+  100 unorm=sqrt(unorm)
+      if (diff(unorm+abs(a(npp1,j))*factor,unorm)) 130,130,110
+c
+c     col j is sufficiently independent.  copy b into zz, update zz and
+c   > solve for ztest ( = proposed new value for x(j) ).
+c
+  110	  do 120 l=1,m
+  120	  zz(l)=b(l)
+      call h12 (2,npp1,npp1+1,m,a(1,j),1,up,zz,1,1,1)
+      ztest=zz(npp1)/a(npp1,j)
+c
+c				      see if ztest is positive
+c     reject j as a candidate to be moved from set z to set p.
+c     restore a(npp1,j), set w(j)=0., and loop back to test dual
+c
+      if (ztest) 130,130,140
+c
+c     coeffs again.
+c
+  130 a(npp1,j)=asave
+      w(j)=zero
+      go to 60
+c
+c     the index  j=index(iz)  has been selected to be moved from
+c     set z to set p.	 update b,  update indices,  apply householder
+c     transformations to cols in new set z,  zero subdiagonal elts in
+c     col j,  set w(j)=0.
+c
+  140	  do 150 l=1,m
+  150	  b(l)=zz(l)
+c
+      index(iz)=index(iz1)
+      index(iz1)=j
+      iz1=iz1+1
+      nsetp=npp1
+      npp1=npp1+1
+c
+      if (iz1.gt.iz2) go to 170
+	  do 160 jz=iz1,iz2
+	  jj=index(jz)
+  160	  call h12 (2,nsetp,npp1,m,a(1,j),1,up,a(1,jj),1,mda,1)
+  170 continue
+c
+      if (nsetp.eq.m) go to 190
+	  do 180 l=npp1,m
+  180	  a(l,j)=zero
+  190 continue
+c
+      w(j)=zero
+c				 solve the triangular system.
+c				 store the solution temporarily in zz().
+      assign 200 to next
+      go to 400
+  200 continue
+c
+c			******	secondary loop begins here ******
+c
+c			   iteration counter.
+c
+  210 iter=iter+1
+      if (iter.le.itmax) go to 220
+      mode=3
+      write (6,440)
+      go to 350
+  220 continue
+c
+c		     see if all new constrained coeffs are feasible.
+c				   if not compute alpha.
+c
+      alpha=two
+	  do 240 ip=1,nsetp
+	  l=index(ip)
+	  if (zz(ip)) 230,230,240
+c
+  230	  t=-x(l)/(zz(ip)-x(l))
+	  if (alpha.le.t) go to 240
+	  alpha=t
+	  jj=ip
+  240	  continue
+c
+c	   if all new constrained coeffs are feasible then alpha will
+c	   still = 2.	 if so exit from secondary loop to main loop.
+c
+      if (alpha.eq.two) go to 330
+c
+c	   otherwise use alpha which will be between 0. and 1. to
+c	   interpolate between the old x and the new zz.
+c
+	  do 250 ip=1,nsetp
+	  l=index(ip)
+  250	  x(l)=x(l)+alpha*(zz(ip)-x(l))
+c
+c	 modify a and b and the index arrays to move coefficient i
+c	 from set p to set z.
+c
+      i=index(jj)
+  260 x(i)=zero
+c
+      if (jj.eq.nsetp) go to 290
+      jj=jj+1
+	  do 280 j=jj,nsetp
+	  ii=index(j)
+	  index(j-1)=ii
+	  call g1 (a(j-1,ii),a(j,ii),cc,ss,a(j-1,ii))
+	  a(j,ii)=zero
+	      do 270 l=1,n
+	      if (l.ne.ii) call g2 (cc,ss,a(j-1,l),a(j,l))
+  270	      continue
+  280	  call g2 (cc,ss,b(j-1),b(j))
+  290 npp1=nsetp
+      nsetp=nsetp-1
+      iz1=iz1-1
+      index(iz1)=i
+c
+c	 see if the remaining coeffs in set p are feasible.  they should
+c	 be because of the way alpha was determined.
+c	 if any are infeasible it is due to round-off error.  any
+c	 that are nonpositive will be set to zero
+c	 and moved from set p to set z.
+c
+	  do 300 jj=1,nsetp
+	  i=index(jj)
+	  if (x(i)) 260,260,300
+  300	  continue
+c
+c	  copy b( ) into zz( ).  then solve again and loop back.
+c
+
+	  do 310 i=1,m
+  310	  zz(i)=b(i)
+      assign 320 to next
+      go to 400
+  320 continue
+      go to 210
+c		       ******  end of secondary loop  ******
+c
+  330	  do 340 ip=1,nsetp
+	  i=index(ip)
+  340	  x(i)=zz(ip)
+c	 all new coeffs are positive.  loop back to beginning.
+      go to 30
+c
+c			 ******  end of main loop  ******
+c
+c			 come to here for termination.
+c		      compute the norm of the final residual vector.
+c
+  350 sm=zero
+      if (npp1.gt.m) go to 370
+	  do 360 i=npp1,m
+  360	  sm=sm+b(i)**2
+      go to 390
+  370	  do 380 j=1,n
+  380	  w(j)=zero
+  390 rnorm=sqrt(sm)
+      return
+c
+c     the following block of code is used as an internal subroutine
+c     to solve the triangular system, putting the solution in zz().
+c
+  400	  do 430 l=1,nsetp
+	  ip=nsetp+1-l
+	  if (l.eq.1) go to 420
+	      do 410 ii=1,ip
+  410	      zz(ii)=zz(ii)-a(ii,jj)*zz(ip+1)
+  420	  jj=index(ip)
+  430	  zz(ip)=zz(ip)/a(ip,jj)
+      go to next, (200,320)
+  440 format (35h0 nnls quitting on iteration count.)
+      end
diff --git a/math/llsq/original_f/qrbd.f b/math/llsq/original_f/qrbd.f
new file mode 100644
index 00000000..d9a12a24
--- /dev/null
+++ b/math/llsq/original_f/qrbd.f
@@ -0,0 +1,208 @@
+c     subroutine qrbd (ipass,q,e,nn,v,mdv,nrv,c,mdc,ncc)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   qr algorithm for singular values of a bidiagonal matrix.
+c
+c     the bidiagonal matrix
+c
+c			(q1,e2,0...    )
+c			(   q2,e3,0... )
+c		 d=	(	.      )
+c			(	  .   0)
+c			(	    .en)
+c			(	   0,qn)
+c
+c		  is pre and post multiplied by
+c		  elementary rotation matrices
+c		  ri and pi so that
+c
+c		  rk...r1*d*p1**(t)...pk**(t) = diag(s1,...,sn)
+c
+c		  to within working accuracy.
+c
+c  1. ei and qi occupy e(i) and q(i) as input.
+c
+c  2. rm...r1*c replaces 'c' in storage as output.
+c
+c  3. v*p1**(t)...pm**(t) replaces 'v' in storage as output.
+c
+c  4. si occupies q(i) as output.
+c
+c  5. the si's are nonincreasing and nonnegative.
+c
+c     this code is based on the paper and 'algol' code..
+c ref..
+c  1. reinsch,c.h. and golub,g.h. 'singular value decomposition
+c     and least squares solutions' (numer. math.), vol. 14,(1970).
+c
+      subroutine qrbd (ipass,q,e,nn,v,mdv,nrv,c,mdc,ncc)
+      logical wntv ,havers,fail
+      dimension q(nn),e(nn),v(mdv,nn),c(mdc,ncc)
+      zero=0.
+      one=1.
+      two=2.
+c
+      n=nn
+      ipass=1
+      if (n.le.0) return
+      n10=10*n
+      wntv=nrv.gt.0
+      havers=ncc.gt.0
+      fail=.false.
+      nqrs=0
+      e(1)=zero
+      dnorm=zero
+	   do 10 j=1,n
+   10	   dnorm=amax1(abs(q(j))+abs(e(j)),dnorm)
+	   do 200 kk=1,n
+	   k=n+1-kk
+c
+c     test for splitting or rank deficiencies..
+c	  first make test for last diagonal term, q(k), being small.
+   20	    if(k.eq.1) go to 50
+	    if(diff(dnorm+q(k),dnorm)) 50,25,50
+c
+c     since q(k) is small we will make a special pass to
+c     transform e(k) to zero.
+c
+   25	   cs=zero
+	   sn=-one
+		do 40 ii=2,k
+		i=k+1-ii
+		f=-sn*e(i+1)
+		e(i+1)=cs*e(i+1)
+		call g1 (q(i),f,cs,sn,q(i))
+c	  transformation constructed to zero position (i,k).
+c
+		if (.not.wntv) go to 40
+		     do 30 j=1,nrv
+   30		     call g2 (cs,sn,v(j,i),v(j,k))
+c	       accumulate rt. transformations in v.
+c
+   40		continue
+c
+c	  the matrix is now bidiagonal, and of lower order
+c	  since e(k) .eq. zero..
+c
+   50		do 60 ll=1,k
+		l=k+1-ll
+		if(diff(dnorm+e(l),dnorm)) 55,100,55
+   55		if(diff(dnorm+q(l-1),dnorm)) 60,70,60
+   60		continue
+c     this loop can't complete since e(1) = zero.
+c
+	   go to 100
+c
+c	  cancellation of e(l), l.gt.1.
+   70	   cs=zero
+	   sn=-one
+		do 90 i=l,k
+		f=-sn*e(i)
+		e(i)=cs*e(i)
+		if(diff(dnorm+f,dnorm)) 75,100,75
+   75		call g1 (q(i),f,cs,sn,q(i))
+		if (.not.havers) go to 90
+		     do 80 j=1,ncc
+   80		     call g2 (cs,sn,c(i,j),c(l-1,j))
+   90		continue
+c
+c	  test for convergence..
+  100	   z=q(k)
+	   if (l.eq.k) go to 170
+c
+c	  shift from bottom 2 by 2 minor of b**(t)*b.
+	   x=q(l)
+	   y=q(k-1)
+	   g=e(k-1)
+	   h=e(k)
+	   f=((y-z)*(y+z)+(g-h)*(g+h))/(two*h*y)
+	   g=sqrt(one+f**2)
+	   if (f.lt.zero) go to 110
+	   t=f+g
+	   go to 120
+  110	   t=f-g
+  120	   f=((x-z)*(x+z)+h*(y/t-h))/x
+c
+c	  next qr sweep..
+	   cs=one
+	   sn=one
+	   lp1=l+1
+		do 160 i=lp1,k
+		g=e(i)
+		y=q(i)
+		h=sn*g
+		g=cs*g
+		call g1 (f,h,cs,sn,e(i-1))
+		f=x*cs+g*sn
+		g=-x*sn+g*cs
+		h=y*sn
+		y=y*cs
+		if (.not.wntv) go to 140
+c
+c	       accumulate rotations (from the right) in 'v'
+		     do 130 j=1,nrv
+  130		     call g2 (cs,sn,v(j,i-1),v(j,i))
+  140		call g1 (f,h,cs,sn,q(i-1))
+		f=cs*g+sn*y
+		x=-sn*g+cs*y
+		if (.not.havers) go to 160
+		     do 150 j=1,ncc
+  150		     call g2 (cs,sn,c(i-1,j),c(i,j))
+c	       apply rotations from the left to
+c	       right hand sides in 'c'..
+c
+  160		continue
+	   e(l)=zero
+	   e(k)=f
+	   q(k)=x
+	   nqrs=nqrs+1
+	   if (nqrs.le.n10) go to 20
+c	   return to 'test for splitting'.
+c
+	   fail=.true.
+c     ..
+c     cutoff for convergence failure. 'nqrs' will be 2*n usually.
+  170	   if (z.ge.zero) go to 190
+	   q(k)=-z
+	   if (.not.wntv) go to 190
+		do 180 j=1,nrv
+  180		v(j,k)=-v(j,k)
+  190	   continue
+c	  convergence. q(k) is made nonnegative..
+c
+  200	   continue
+      if (n.eq.1) return
+	   do 210 i=2,n
+	   if (q(i).gt.q(i-1)) go to 220
+  210	   continue
+      if (fail) ipass=2
+      return
+c     ..
+c     every singular value is in order..
+  220	   do 270 i=2,n
+	   t=q(i-1)
+	   k=i-1
+		do 230 j=i,n
+		if (t.ge.q(j)) go to 230
+		t=q(j)
+		k=j
+  230		continue
+	   if (k.eq.i-1) go to 270
+	   q(k)=q(i-1)
+	   q(i-1)=t
+	   if (.not.havers) go to 250
+		do 240 j=1,ncc
+		t=c(i-1,j)
+		c(i-1,j)=c(k,j)
+  240		c(k,j)=t
+  250	   if (.not.wntv) go to 270
+		do 260 j=1,nrv
+		t=v(j,i-1)
+		v(j,i-1)=v(j,k)
+  260		v(j,k)=t
+  270	   continue
+c	  end of ordering algorithm.
+c
+      if (fail) ipass=2
+      return
+      end
diff --git a/math/llsq/original_f/sfeout.f b/math/llsq/original_f/sfeout.f
new file mode 100644
index 00000000..fb439e40
--- /dev/null
+++ b/math/llsq/original_f/sfeout.f
@@ -0,0 +1,64 @@
+      subroutine mfeout (a,mda,m,n,names,mode)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   subroutine for matrix output with labeling.
+c
+c     a( )	   matrix to be output
+c		   mda	   first dimension of a array
+c		   m	     no. of rows in a matrix
+c		   n	     no. of cols in a matrix
+c     names()	   array of names.  if names(1) = 1h , the rest
+c		   of the names() array will be ignored.
+c     mode	   =1	for   4p8f15.0	format	for v matrix.
+c		   =2	for   8e15.8  format  for candidate solutions.
+c
+      dimension    a(mda,01)
+      integer names(m),ihead(2)
+      logical	notblk
+      data  maxcol/8/, iblank/1h /,ihead(1)/4h col/,ihead(2)/4hsoln/
+c
+      notblk=names(1).ne.iblank
+      if (m.le.0.or.n.le.0) return
+c
+      if (mode.eq.2) go to 10
+      write (6,70)
+      go to 20
+   10 write (6,80)
+   20 continue
+c
+      nblock=n/maxcol
+      last=n-nblock*maxcol
+      ncol=maxcol
+      j1=1
+c
+c			     main loop starts here
+c
+   30 if (nblock.gt.0) go to 40
+      if (last.le.0) return
+      ncol=last
+      last=0
+c
+   40 j2=j1+ncol-1
+      write (6,90) (ihead(mode),j,j=j1,j2)
+c
+	   do 60 i=1,m
+	   name=iblank
+	   if (notblk) name=names(i)
+c
+	   if (mode.eq.2) go to 50
+	   write (6,100) i,name,(a(i,j),j=j1,j2)
+	   go to 60
+   50	   write (6,110) i,name,(a(i,j),j=j1,j2)
+   60	   continue
+c
+      j1=j1+maxcol
+      nblock=nblock-1
+      go to 30
+c
+   70 format (45h0v-matrix of the singular value decomposition,
+     * 8h of a*d./47h (elements of v scaled up by a factor of 10**4))
+   80 format (35h0sequence of candidate solutions, x)
+   90 format (1h0,11x,8(6x,a4,i4,1x)/1x)
+  100 format (1x,i3,1x,a6,1x,4p8f15.0)
+  110 format (1x,i3,1x,a6,1x,8e15.8)
+      end
diff --git a/math/llsq/original_f/sva.f b/math/llsq/original_f/sva.f
new file mode 100644
index 00000000..469a90ef
--- /dev/null
+++ b/math/llsq/original_f/sva.f
@@ -0,0 +1,193 @@
+      subroutine sva (a,mda,m,n,mdata,b,sing,names,iscale,d)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1973 jun 12
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c		singular value analysis printout
+c
+c     iscale	   set by user to 1, 2, or 3 to select column scaling
+c		   option.
+c		   1   subr will use identity scaling and ignore the d()
+c		       array.
+c		   2   subr will scale nonzero cols to have unit euclide
+c		       length and will store reciprocal lengths of
+c		       original nonzero cols in d().
+c		   3   user supplies col scale factors in d(). subr
+c		       will mult col j by d(j) and remove the scaling
+c		       from the soln at the end.
+c
+      dimension  a(mda,n), b(m), sing(01),d(n)
+c     sing(3*n)
+      dimension names(n)
+      logical test
+      double precision	 sb, dzero
+      dzero=0.d0
+      one=1.
+      zero=0.
+      if (m.le.0 .or. n.le.0) return
+      np1=n+1
+      write (6,270)
+      write (6,260) m,n,mdata
+      go to (60,10,10), iscale
+c
+c     apply column scaling to a
+c
+   10	   do 50 j=1,n
+	   a1=d(j)
+	   go to (20,20,40), iscale
+   20	   sb=dzero
+		do 30 i=1,m
+   30		sb=sb+dble(a(i,j))**2
+	   a1=dsqrt(sb)
+	   if (a1.eq.zero) a1=one
+	   a1=one/a1
+	   d(j)=a1
+   40		do 50 i=1,m
+   50		a(i,j)=a(i,j)*a1
+      write (6,280) iscale,(d(j),j=1,n)
+      go to 70
+   60 continue
+      write (6,290)
+   70 continue
+c
+c     obtain  sing. value decomp. of scaled matrix.
+c
+c**********************************************************
+      call svdrs (a,mda,m,n,b,1,1,sing)
+c**********************************************************
+c
+c     print the v matrix.
+c
+      call mfeout (a,mda,n,n,names,1)
+      if (iscale.eq.1) go to 90
+c
+c     replace v by d*v in the array a()
+c
+	   do 80 i=1,n
+		do 80 j=1,n
+   80		a(i,j)=d(i)*a(i,j)
+c
+c     g  now in  b array.  v now in a array.
+c
+c
+c     obtain summary output.
+c
+   90 continue
+      write (6,220)
+c
+c     compute cumulative sums of squares of components of
+c     g and store them in sing(i), i=minmn+1,...,2*minmn+1
+c
+      sb=dzero
+      minmn=min0(m,n)
+      minmn1=minmn+1
+      if (m.eq.minmn) go to 110
+	   do 100 i=minmn1,m
+  100	   sb=sb+dble(b(i))**2
+  110 sing(2*minmn+1)=sb
+	   do 120 jj=1,minmn
+	   j=minmn+1-jj
+	   sb=sb+dble(b(j))**2
+	   js=minmn+j
+  120	   sing(js)=sb
+      a3=sing(minmn+1)
+      a4=sqrt(a3/float(max0(1,mdata)))
+      write (6,230) a3,a4
+c
+      nsol=0
+c
+c
+c
+	   do 160 k=1,minmn
+	   if (sing(k).eq.zero) go to 130
+	   nsol=k
+	   pi=b(k)/sing(k)
+	   a1=one/sing(k)
+	   a2=b(k)**2
+	   a3=sing(minmn1+k)
+	   a4=sqrt(a3/float(max0(1,mdata-k)))
+	   test=sing(k).ge.100..or.sing(k).lt..001
+	   if (test) write (6,240) k,sing(k),pi,a1,b(k),a2,a3,a4
+	   if (.not.test) write (6,250) k,sing(k),pi,a1,b(k),a2,a3,a4
+	   go to 140
+  130	   write (6,240) k,sing(k)
+	   pi=zero
+  140		do 150 i=1,n
+		a(i,k)=a(i,k)*pi
+  150		if (k.gt.1) a(i,k)=a(i,k)+a(i,k-1)
+  160	   continue
+c
+c     compute and print values of ynorm and rnorm.
+c
+      write (6,300)
+      j=0
+      ysq=zero
+      go to 180
+  170 j=j+1
+      ysq=ysq+(b(j)/sing(j))**2
+  180 ynorm=sqrt(ysq)
+      js=minmn1+j
+      rnorm=sqrt(sing(js))
+      yl=-1000.
+      if (ynorm.gt.0.) yl=alog10(ynorm)
+      rl=-1000.
+      if (rnorm.gt.0.) rl=alog10(rnorm)
+      write (6,310) j,ynorm,rnorm,yl,rl
+      if (j.lt.nsol) go to 170
+c
+c     compute values of xnorm and rnorm for a sequence of values of
+c     the levenberg-marquardt parameter.
+c
+      if (sing(1).eq.zero) go to 210
+      el=alog10(sing(1))+one
+      el2=alog10(sing(nsol))-one
+      del=(el2-el)/20.
+      ten=10.
+      aln10=alog(ten)
+      write (6,320)
+	   do 200 ie=1,21
+c     compute	     alamb=10.**el
+	   alamb=exp(aln10*el)
+	   ys=0.
+	   js=minmn1+nsol
+	   rs=sing(js)
+		do 190 i=1,minmn
+		sl=sing(i)**2+alamb**2
+		ys=ys+(b(i)*sing(i)/sl)**2
+		rs=rs+(b(i)*(alamb**2)/sl)**2
+  190		continue
+	   ynorm=sqrt(ys)
+	   rnorm=sqrt(rs)
+	   rl=-1000.
+	   if (rnorm.gt.zero) rl=alog10(rnorm)
+	   yl=-1000.
+	   if (ynorm.gt.zero) yl=alog10(ynorm)
+	   write (6,330) alamb,ynorm,rnorm,el,yl,rl
+	   el=el+del
+  200	   continue
+c
+c     print candidate solutions.
+c
+  210 if (nsol.ge.1) call mfeout (a,mda,n,nsol,names,2)
+      return
+  220 format (42h0 index  sing. value       p coef         ,48hrecip. s.
+     1v.             g coef            g**2  ,39h            c.s.s.
+     2    n.s.r.c.s.s.)
+  230 format (1h ,5x,1h0,88x,1pe15.4,1pe17.4)
+  240 format (1h ,i6,e12.4,1p(e15.4,4x,e15.4,4x,e15.4,4x,e15.4,4x,e15.4,
+     12x,e15.4))
+  250 format (1h ,i6,f12.4,1p(e15.4,4x,e15.4,4x,e15.4,4x,e15.4,4x,e15.4,
+     12x,e15.4))
+  260 format (5h0m = ,i6,8h,   n = ,i4,12h,   mdata = ,i8)
+  270 format (45h0singular value analysis of the least squares,42h probl
+     1em,  a*x=b,  scaled as  (a*d)*y=b  .)
+  280 format (19h0scaling option no.,i2,18h.  d is a diagonal,46h matrix
+     1 with the following diagonal elements../(5x,10e12.4))
+  290 format (50h0scaling option no. 1.   d is the identity matrix./1x)
+  300 format (6h0index,13x,28h         ynorm         rnorm,14x,28h  log1
+     10(ynorm)  log10(rnorm)/1x)
+  310 format (1h ,i4,14x,2e14.5,14x,2f14.5)
+  320 format (54h0norms of solution and residual vectors for a range of,
+     144h values of the levenberg-marquardt parameter,9h, lambda./1h0,4x
+     2,42h        lambda         ynorm         rnorm,42h log10(lambda)
+     3log10(ynorm)  log10(rnorm))
+  330 format (5x,3e14.5,3f14.5)
+      end
diff --git a/math/llsq/original_f/svdrs.f b/math/llsq/original_f/svdrs.f
new file mode 100644
index 00000000..66e488dc
--- /dev/null
+++ b/math/llsq/original_f/svdrs.f
@@ -0,0 +1,205 @@
+c     subroutine svdrs (a,mda,mm,nn,b,mdb,nb,s)
+c     c.l.lawson and r.j.hanson, jet propulsion laboratory, 1974 mar 1
+c     to appear in 'solving least squares problems', prentice-hall, 1974
+c	   singular value decomposition also treating right side vector.
+c
+c     the array s occupies 3*n cells.
+c     a occupies m*n cells
+c     b occupies m*nb cells.
+c
+c     special singular value decomposition subroutine.
+c     we have the m x n matrix a and the system a*x=b to solve.
+c     either m .ge. n  or  m .lt. n is permitted.
+c			the singular value decomposition
+c     a = u*s*v**(t) is made in such a way that one gets
+c	 (1) the matrix v in the first n rows and columns of a.
+c	 (2) the diagonal matrix of ordered singular values in
+c	     the first n cells of the array s(ip), ip .ge. 3*n.
+c	 (3) the matrix product u**(t)*b=g gets placed back in b.
+c	 (4) the user must complete the solution and do his own
+c	     singular value analysis.
+c     *******
+c     give special
+c     treatment to rows and columns which are entirely zero.  this
+c     causes certain zero sing. vals. to appear as exact zeros rather
+c     than as about eta times the largest sing. val.   it similarly
+c     cleans up the associated columns of u and v.
+c     method..
+c      1. exchange cols of a to pack nonzero cols to the left.
+c	  set n = no. of nonzero cols.
+c	  use locations a(1,nn),a(1,nn-1),...,a(1,n+1) to record the
+c	  col permutations.
+c      2. exchange rows of a to pack nonzero rows to the top.
+c	  quit packing if find n nonzero rows.	make same row exchanges
+c	  in b.  set m so that all nonzero rows of the permuted a
+c	  are in first m rows.	if m .le. n then all m rows are
+c	  nonzero.  if m .gt. n then	  the first n rows are known
+c	  to be nonzero,and rows n+1 thru m may be zero or nonzero.
+c      3. apply original algorithm to the m by n problem.
+c      4. move permutation record from a(,) to s(i),i=n+1,...,nn.
+c      5. build v up from  n by n  to  nn by nn  by placing ones on
+c	  the diagonal and zeros elsewhere.  this is only partly done
+c	  explicitly.  it is completed during step 6.
+c      6. exchange rows of v to compensate for col exchanges of step 2.
+c      7. place zeros in  s(i),i=n+1,nn  to represent zero sing vals.
+c
+      subroutine svdrs (a,mda,mm,nn,b,mdb,nb,s)
+      dimension a(mda,nn),b(mdb,nb),s(nn,3)
+      zero=0.
+      one=1.
+c
+c			      begin.. special for zero rows and cols.
+c
+c			      pack the nonzero cols to the left
+c
+      n=nn
+      if (n.le.0.or.mm.le.0) return
+      j=n
+   10 continue
+	  do 20 i=1,mm
+	  if (a(i,j)) 50,20,50
+   20	  continue
+c
+c	  col j  is zero. exchange it with col n.
+c
+      if (j.eq.n) go to 40
+	  do 30 i=1,mm
+   30	  a(i,j)=a(i,n)
+   40 continue
+      a(1,n)=j
+      n=n-1
+   50 continue
+      j=j-1
+      if (j.ge.1) go to 10
+c			      if n=0 then a is entirely zero and svd
+c			      computation can be skipped
+      ns=0
+      if (n.eq.0) go to 240
+c			      pack nonzero rows to the top
+c			      quit packing if find n nonzero rows
+      i=1
+      m=mm
+   60 if (i.gt.n.or.i.ge.m) go to 150
+      if (a(i,i)) 90,70,90
+   70	  do 80 j=1,n
+	  if (a(i,j)) 90,80,90
+   80	  continue
+      go to 100
+   90 i=i+1
+      go to 60
+c			      row i is zero
+c			      exchange rows i and m
+  100 if(nb.le.0) go to 115
+	  do 110 j=1,nb
+	  t=b(i,j)
+	  b(i,j)=b(m,j)
+  110	  b(m,j)=t
+  115	  do 120 j=1,n
+  120	  a(i,j)=a(m,j)
+      if (m.gt.n) go to 140
+	  do 130 j=1,n
+  130	  a(m,j)=zero
+  140 continue
+c			      exchange is finished
+      m=m-1
+      go to 60
+c
+  150 continue
+c			      end.. special for zero rows and columns
+c			      begin.. svd algorithm
+c     method..
+c     (1)     reduce the matrix to upper bidiagonal form with
+c     householder transformations.
+c	   h(n)...h(1)aq(1)...q(n-2) = (d**t,0)**t
+c     where d is upper bidiagonal.
+c
+c     (2)     apply h(n)...h(1) to b.  here h(n)...h(1)*b replaces b
+c     in storage.
+c
+c     (3)     the matrix product w= q(1)...q(n-2) overwrites the first
+c     n rows of a in storage.
+c
+c     (4)     an svd for d is computed.  here k rotations ri and pi are
+c     computed so that
+c	   rk...r1*d*p1**(t)...pk**(t) = diag(s1,...,sm)
+c     to working accuracy.  the si are nonnegative and nonincreasing.
+c     here rk...r1*b overwrites b in storage while
+c     a*p1**(t)...pk**(t)  overwrites a in storage.
+c
+c     (5)     it follows that,with the proper definitions,
+c     u**(t)*b overwrites b, while v overwrites the first n row and
+c     columns of a.
+c
+      l=min0(m,n)
+c	      the following loop reduces a to upper bidiagonal and
+c	      also applies the premultiplying transformations to b.
+c
+	  do 170 j=1,l
+	  if (j.ge.m) go to 160
+	  call h12 (1,j,j+1,m,a(1,j),1,t,a(1,j+1),1,mda,n-j)
+	  call h12 (2,j,j+1,m,a(1,j),1,t,b,1,mdb,nb)
+  160	  if (j.ge.n-1) go to 170
+	  call h12 (1,j+1,j+2,n,a(j,1),mda,s(j,3),a(j+1,1),mda,1,m-j)
+  170	  continue
+c
+c     copy the bidiagonal matrix into the array s() for qrbd.
+c
+      if (n.eq.1) go to 190
+	  do 180 j=2,n
+	  s(j,1)=a(j,j)
+  180	  s(j,2)=a(j-1,j)
+  190 s(1,1)=a(1,1)
+c
+      ns=n
+      if (m.ge.n) go to 200
+      ns=m+1
+      s(ns,1)=zero
+      s(ns,2)=a(m,m+1)
+  200 continue
+c
+c     construct the explicit n by n product matrix, w=q1*q2*...*ql*i
+c     in the array a().
+c
+	  do 230 k=1,n
+	  i=n+1-k
+	  if(i.gt.min0(m,n-2)) go to 210
+	  call h12 (2,i+1,i+2,n,a(i,1),mda,s(i,3),a(1,i+1),1,mda,n-i)
+  210	      do 220 j=1,n
+  220	      a(i,j)=zero
+  230	  a(i,i)=one
+c
+c	   compute the svd of the bidiagonal matrix
+c
+      call qrbd (ipass,s(1,1),s(1,2),ns,a,mda,n,b,mdb,nb)
+c
+      go to (240,310), ipass
+  240 continue
+      if (ns.ge.n) go to 260
+      nsp1=ns+1
+	  do 250 j=nsp1,n
+  250	  s(j,1)=zero
+  260 continue
+      if (n.eq.nn) return
+      np1=n+1
+c				   move record of permutations
+c				   and store zeros
+	  do 280 j=np1,nn
+	  s(j,1)=a(1,j)
+	      do 270 i=1,n
+  270	      a(i,j)=zero
+  280	  continue
+c			      permute rows and set zero singular values.
+	  do 300 k=np1,nn
+	  i=s(k,1)
+	  s(k,1)=zero
+	      do 290 j=1,nn
+	      a(k,j)=a(i,j)
+  290	      a(i,j)=zero
+	  a(i,k)=one
+  300	  continue
+c			      end.. special for zero rows and columns
+      return
+  310 write (6,320)
+      stop
+  320 format (49h convergence failure in qr bidiagonal svd routine)
+      end