feat(modules): migrate to go modules and bump go version 1.14.4

- migrate to go module - bump go version 1.14.4 Signed-off-by: prateekpandey14 <prateek.pandey@mayadata.io>
2026-02-02 15:45:13 +01:00 · 2020-06-05 19:25:46 +05:30 · 2020-06-05 19:25:46 +05:30 · fa76b346a0
commit fa76b346a0
parent f5ae3ff476
837 changed files with 104140 additions and 158314 deletions
--- a/vendor/gonum.org/v1/gonum/lapack/.gitignore
+++ b/vendor/gonum.org/v1/gonum/lapack/.gitignore
--- a/vendor/gonum.org/v1/gonum/lapack/README.md
+++ b/vendor/gonum.org/v1/gonum/lapack/README.md
@ -0,0 +1,28 @@
+Gonum LAPACK [![GoDoc](https://godoc.org/gonum.org/v1/gonum/lapack?status.svg)](https://godoc.org/gonum.org/v1/gonum/lapack)
+======
+
+A collection of packages to provide LAPACK functionality for the Go programming
+language (http://golang.org). This provides a partial implementation in native go
+and a wrapper using cgo to a c-based implementation.
+
+## Installation
+
+```
+  go get gonum.org/v1/gonum/lapack/...
+```
+
+## Packages
+
+### lapack
+
+Defines the LAPACK API based on http://www.netlib.org/lapack/lapacke.html
+
+### lapack/gonum
+
+Go implementation of the LAPACK API (incomplete, implements the `float64` API).
+
+### lapack/lapack64
+
+Wrappers for an implementation of the double (i.e., `float64`) precision real parts of
+the LAPACK API.
+
--- a/vendor/gonum.org/v1/gonum/lapack/doc.go
+++ b/vendor/gonum.org/v1/gonum/lapack/doc.go
@ -0,0 +1,6 @@
+// Copyright ©2018 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lapack provides interfaces for the LAPACK linear algebra standard.
+package lapack // import "gonum.org/v1/gonum/lapack"
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dbdsqr.go
@ -0,0 +1,505 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
+//
+// The SVD of the bidiagonal matrix B is
+//  B = Q * S * P^T
+// where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
+// left singular vectors, and P is an orthogonal matrix of right singular vectors.
+//
+// Q and P are only computed if requested. If left singular vectors are requested,
+// this routine returns U * Q instead of Q, and if right singular vectors are
+// requested P^T * VT is returned instead of P^T.
+//
+// Frequently Dbdsqr is used in conjunction with Dgebrd which reduces a general
+// matrix A into bidiagonal form. In this case, the SVD of A is
+//  A = (U * Q) * S * (P^T * VT)
+//
+// This routine may also compute Q^T * C.
+//
+// d and e contain the elements of the bidiagonal matrix b. d must have length at
+// least n, and e must have length at least n-1. Dbdsqr will panic if there is
+// insufficient length. On exit, D contains the singular values of B in decreasing
+// order.
+//
+// VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
+// of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
+//
+// U is a matrix of size nru×n whose elements are stored in u. The elements
+// of u are modified to contain U * Q on exit. U is not used if nru == 0.
+//
+// C is a matrix of size n×ncc whose elements are stored in c. The elements
+// of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
+//
+// work contains temporary storage and must have length at least 4*(n-1). Dbdsqr
+// will panic if there is insufficient working memory.
+//
+// Dbdsqr returns whether the decomposition was successful.
+//
+// Dbdsqr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case ncvt < 0:
+		panic(ncvtLT0)
+	case nru < 0:
+		panic(nruLT0)
+	case ncc < 0:
+		panic(nccLT0)
+	case ldvt < max(1, ncvt):
+		panic(badLdVT)
+	case (ldu < max(1, n) && nru > 0) || (ldu < 1 && nru == 0):
+		panic(badLdU)
+	case ldc < max(1, ncc):
+		panic(badLdC)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if len(vt) < (n-1)*ldvt+ncvt && ncvt != 0 {
+		panic(shortVT)
+	}
+	if len(u) < (nru-1)*ldu+n && nru != 0 {
+		panic(shortU)
+	}
+	if len(c) < (n-1)*ldc+ncc && ncc != 0 {
+		panic(shortC)
+	}
+	if len(d) < n {
+		panic(shortD)
+	}
+	if len(e) < n-1 {
+		panic(shortE)
+	}
+	if len(work) < 4*(n-1) {
+		panic(shortWork)
+	}
+
+	var info int
+	bi := blas64.Implementation()
+	const maxIter = 6
+
+	if n != 1 {
+		// If the singular vectors do not need to be computed, use qd algorithm.
+		if !(ncvt > 0 || nru > 0 || ncc > 0) {
+			info = impl.Dlasq1(n, d, e, work)
+			// If info is 2 dqds didn't finish, and so try to.
+			if info != 2 {
+				return info == 0
+			}
+		}
+		nm1 := n - 1
+		nm12 := nm1 + nm1
+		nm13 := nm12 + nm1
+		idir := 0
+
+		eps := dlamchE
+		unfl := dlamchS
+		lower := uplo == blas.Lower
+		var cs, sn, r float64
+		if lower {
+			for i := 0; i < n-1; i++ {
+				cs, sn, r = impl.Dlartg(d[i], e[i])
+				d[i] = r
+				e[i] = sn * d[i+1]
+				d[i+1] *= cs
+				work[i] = cs
+				work[nm1+i] = sn
+			}
+			if nru > 0 {
+				impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, n, work, work[n-1:], u, ldu)
+			}
+			if ncc > 0 {
+				impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, n, ncc, work, work[n-1:], c, ldc)
+			}
+		}
+		// Compute singular values to a relative accuracy of tol. If tol is negative
+		// the values will be computed to an absolute accuracy of math.Abs(tol) * norm(b)
+		tolmul := math.Max(10, math.Min(100, math.Pow(eps, -1.0/8)))
+		tol := tolmul * eps
+		var smax float64
+		for i := 0; i < n; i++ {
+			smax = math.Max(smax, math.Abs(d[i]))
+		}
+		for i := 0; i < n-1; i++ {
+			smax = math.Max(smax, math.Abs(e[i]))
+		}
+
+		var sminl float64
+		var thresh float64
+		if tol >= 0 {
+			sminoa := math.Abs(d[0])
+			if sminoa != 0 {
+				mu := sminoa
+				for i := 1; i < n; i++ {
+					mu = math.Abs(d[i]) * (mu / (mu + math.Abs(e[i-1])))
+					sminoa = math.Min(sminoa, mu)
+					if sminoa == 0 {
+						break
+					}
+				}
+			}
+			sminoa = sminoa / math.Sqrt(float64(n))
+			thresh = math.Max(tol*sminoa, float64(maxIter*n*n)*unfl)
+		} else {
+			thresh = math.Max(math.Abs(tol)*smax, float64(maxIter*n*n)*unfl)
+		}
+		// Prepare for the main iteration loop for the singular values.
+		maxIt := maxIter * n * n
+		iter := 0
+		oldl2 := -1
+		oldm := -1
+		// m points to the last element of unconverged part of matrix.
+		m := n
+
+	Outer:
+		for m > 1 {
+			if iter > maxIt {
+				info = 0
+				for i := 0; i < n-1; i++ {
+					if e[i] != 0 {
+						info++
+					}
+				}
+				return info == 0
+			}
+			// Find diagonal block of matrix to work on.
+			if tol < 0 && math.Abs(d[m-1]) <= thresh {
+				d[m-1] = 0
+			}
+			smax = math.Abs(d[m-1])
+			smin := smax
+			var l2 int
+			var broke bool
+			for l3 := 0; l3 < m-1; l3++ {
+				l2 = m - l3 - 2
+				abss := math.Abs(d[l2])
+				abse := math.Abs(e[l2])
+				if tol < 0 && abss <= thresh {
+					d[l2] = 0
+				}
+				if abse <= thresh {
+					broke = true
+					break
+				}
+				smin = math.Min(smin, abss)
+				smax = math.Max(math.Max(smax, abss), abse)
+			}
+			if broke {
+				e[l2] = 0
+				if l2 == m-2 {
+					// Convergence of bottom singular value, return to top.
+					m--
+					continue
+				}
+				l2++
+			} else {
+				l2 = 0
+			}
+			// e[ll] through e[m-2] are nonzero, e[ll-1] is zero
+			if l2 == m-2 {
+				// Handle 2×2 block separately.
+				var sinr, cosr, sinl, cosl float64
+				d[m-1], d[m-2], sinr, cosr, sinl, cosl = impl.Dlasv2(d[m-2], e[m-2], d[m-1])
+				e[m-2] = 0
+				if ncvt > 0 {
+					bi.Drot(ncvt, vt[(m-2)*ldvt:], 1, vt[(m-1)*ldvt:], 1, cosr, sinr)
+				}
+				if nru > 0 {
+					bi.Drot(nru, u[m-2:], ldu, u[m-1:], ldu, cosl, sinl)
+				}
+				if ncc > 0 {
+					bi.Drot(ncc, c[(m-2)*ldc:], 1, c[(m-1)*ldc:], 1, cosl, sinl)
+				}
+				m -= 2
+				continue
+			}
+			// If working on a new submatrix, choose shift direction from larger end
+			// diagonal element toward smaller.
+			if l2 > oldm-1 || m-1 < oldl2 {
+				if math.Abs(d[l2]) >= math.Abs(d[m-1]) {
+					idir = 1
+				} else {
+					idir = 2
+				}
+			}
+			// Apply convergence tests.
+			// TODO(btracey): There is a lot of similar looking code here. See
+			// if there is a better way to de-duplicate.
+			if idir == 1 {
+				// Run convergence test in forward direction.
+				// First apply standard test to bottom of matrix.
+				if math.Abs(e[m-2]) <= math.Abs(tol)*math.Abs(d[m-1]) || (tol < 0 && math.Abs(e[m-2]) <= thresh) {
+					e[m-2] = 0
+					continue
+				}
+				if tol >= 0 {
+					// If relative accuracy desired, apply convergence criterion forward.
+					mu := math.Abs(d[l2])
+					sminl = mu
+					for l3 := l2; l3 < m-1; l3++ {
+						if math.Abs(e[l3]) <= tol*mu {
+							e[l3] = 0
+							continue Outer
+						}
+						mu = math.Abs(d[l3+1]) * (mu / (mu + math.Abs(e[l3])))
+						sminl = math.Min(sminl, mu)
+					}
+				}
+			} else {
+				// Run convergence test in backward direction.
+				// First apply standard test to top of matrix.
+				if math.Abs(e[l2]) <= math.Abs(tol)*math.Abs(d[l2]) || (tol < 0 && math.Abs(e[l2]) <= thresh) {
+					e[l2] = 0
+					continue
+				}
+				if tol >= 0 {
+					// If relative accuracy desired, apply convergence criterion backward.
+					mu := math.Abs(d[m-1])
+					sminl = mu
+					for l3 := m - 2; l3 >= l2; l3-- {
+						if math.Abs(e[l3]) <= tol*mu {
+							e[l3] = 0
+							continue Outer
+						}
+						mu = math.Abs(d[l3]) * (mu / (mu + math.Abs(e[l3])))
+						sminl = math.Min(sminl, mu)
+					}
+				}
+			}
+			oldl2 = l2
+			oldm = m
+			// Compute shift. First, test if shifting would ruin relative accuracy,
+			// and if so set the shift to zero.
+			var shift float64
+			if tol >= 0 && float64(n)*tol*(sminl/smax) <= math.Max(eps, (1.0/100)*tol) {
+				shift = 0
+			} else {
+				var sl2 float64
+				if idir == 1 {
+					sl2 = math.Abs(d[l2])
+					shift, _ = impl.Dlas2(d[m-2], e[m-2], d[m-1])
+				} else {
+					sl2 = math.Abs(d[m-1])
+					shift, _ = impl.Dlas2(d[l2], e[l2], d[l2+1])
+				}
+				// Test if shift is negligible
+				if sl2 > 0 {
+					if (shift/sl2)*(shift/sl2) < eps {
+						shift = 0
+					}
+				}
+			}
+			iter += m - l2 + 1
+			// If no shift, do simplified QR iteration.
+			if shift == 0 {
+				if idir == 1 {
+					cs := 1.0
+					oldcs := 1.0
+					var sn, r, oldsn float64
+					for i := l2; i < m-1; i++ {
+						cs, sn, r = impl.Dlartg(d[i]*cs, e[i])
+						if i > l2 {
+							e[i-1] = oldsn * r
+						}
+						oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i+1]*sn)
+						work[i-l2] = cs
+						work[i-l2+nm1] = sn
+						work[i-l2+nm12] = oldcs
+						work[i-l2+nm13] = oldsn
+					}
+					h := d[m-1] * cs
+					d[m-1] = h * oldcs
+					e[m-2] = h * oldsn
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[m-2]) < thresh {
+						e[m-2] = 0
+					}
+				} else {
+					cs := 1.0
+					oldcs := 1.0
+					var sn, r, oldsn float64
+					for i := m - 1; i >= l2+1; i-- {
+						cs, sn, r = impl.Dlartg(d[i]*cs, e[i-1])
+						if i < m-1 {
+							e[i] = oldsn * r
+						}
+						oldcs, oldsn, d[i] = impl.Dlartg(oldcs*r, d[i-1]*sn)
+						work[i-l2-1] = cs
+						work[i-l2+nm1-1] = -sn
+						work[i-l2+nm12-1] = oldcs
+						work[i-l2+nm13-1] = -oldsn
+					}
+					h := d[l2] * cs
+					d[l2] = h * oldcs
+					e[l2] = h * oldsn
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[l2]) <= thresh {
+						e[l2] = 0
+					}
+				}
+			} else {
+				// Use nonzero shift.
+				if idir == 1 {
+					// Chase bulge from top to bottom. Save cosines and sines for
+					// later singular vector updates.
+					f := (math.Abs(d[l2]) - shift) * (math.Copysign(1, d[l2]) + shift/d[l2])
+					g := e[l2]
+					var cosl, sinl float64
+					for i := l2; i < m-1; i++ {
+						cosr, sinr, r := impl.Dlartg(f, g)
+						if i > l2 {
+							e[i-1] = r
+						}
+						f = cosr*d[i] + sinr*e[i]
+						e[i] = cosr*e[i] - sinr*d[i]
+						g = sinr * d[i+1]
+						d[i+1] *= cosr
+						cosl, sinl, r = impl.Dlartg(f, g)
+						d[i] = r
+						f = cosl*e[i] + sinl*d[i+1]
+						d[i+1] = cosl*d[i+1] - sinl*e[i]
+						if i < m-2 {
+							g = sinl * e[i+1]
+							e[i+1] = cosl * e[i+1]
+						}
+						work[i-l2] = cosr
+						work[i-l2+nm1] = sinr
+						work[i-l2+nm12] = cosl
+						work[i-l2+nm13] = sinl
+					}
+					e[m-2] = f
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncvt, work, work[n-1:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Forward, nru, m-l2, work[nm12:], work[nm13:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Forward, m-l2, ncc, work[nm12:], work[nm13:], c[l2*ldc:], ldc)
+					}
+					if math.Abs(e[m-2]) <= thresh {
+						e[m-2] = 0
+					}
+				} else {
+					// Chase bulge from top to bottom. Save cosines and sines for
+					// later singular vector updates.
+					f := (math.Abs(d[m-1]) - shift) * (math.Copysign(1, d[m-1]) + shift/d[m-1])
+					g := e[m-2]
+					for i := m - 1; i > l2; i-- {
+						cosr, sinr, r := impl.Dlartg(f, g)
+						if i < m-1 {
+							e[i] = r
+						}
+						f = cosr*d[i] + sinr*e[i-1]
+						e[i-1] = cosr*e[i-1] - sinr*d[i]
+						g = sinr * d[i-1]
+						d[i-1] *= cosr
+						cosl, sinl, r := impl.Dlartg(f, g)
+						d[i] = r
+						f = cosl*e[i-1] + sinl*d[i-1]
+						d[i-1] = cosl*d[i-1] - sinl*e[i-1]
+						if i > l2+1 {
+							g = sinl * e[i-2]
+							e[i-2] *= cosl
+						}
+						work[i-l2-1] = cosr
+						work[i-l2+nm1-1] = -sinr
+						work[i-l2+nm12-1] = cosl
+						work[i-l2+nm13-1] = -sinl
+					}
+					e[l2] = f
+					if math.Abs(e[l2]) <= thresh {
+						e[l2] = 0
+					}
+					if ncvt > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncvt, work[nm12:], work[nm13:], vt[l2*ldvt:], ldvt)
+					}
+					if nru > 0 {
+						impl.Dlasr(blas.Right, lapack.Variable, lapack.Backward, nru, m-l2, work, work[n-1:], u[l2:], ldu)
+					}
+					if ncc > 0 {
+						impl.Dlasr(blas.Left, lapack.Variable, lapack.Backward, m-l2, ncc, work, work[n-1:], c[l2*ldc:], ldc)
+					}
+				}
+			}
+		}
+	}
+
+	// All singular values converged, make them positive.
+	for i := 0; i < n; i++ {
+		if d[i] < 0 {
+			d[i] *= -1
+			if ncvt > 0 {
+				bi.Dscal(ncvt, -1, vt[i*ldvt:], 1)
+			}
+		}
+	}
+
+	// Sort the singular values in decreasing order.
+	for i := 0; i < n-1; i++ {
+		isub := 0
+		smin := d[0]
+		for j := 1; j < n-i; j++ {
+			if d[j] <= smin {
+				isub = j
+				smin = d[j]
+			}
+		}
+		if isub != n-i {
+			// Swap singular values and vectors.
+			d[isub] = d[n-i-1]
+			d[n-i-1] = smin
+			if ncvt > 0 {
+				bi.Dswap(ncvt, vt[isub*ldvt:], 1, vt[(n-i-1)*ldvt:], 1)
+			}
+			if nru > 0 {
+				bi.Dswap(nru, u[isub:], ldu, u[n-i-1:], ldu)
+			}
+			if ncc > 0 {
+				bi.Dswap(ncc, c[isub*ldc:], 1, c[(n-i-1)*ldc:], 1)
+			}
+		}
+	}
+	info = 0
+	for i := 0; i < n-1; i++ {
+		if e[i] != 0 {
+			info++
+		}
+	}
+	return info == 0
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebak.go
@ -0,0 +1,89 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgebak updates an n×m matrix V as
+//  V = P D V,        if side == lapack.EVRight,
+//  V = P D^{-1} V,   if side == lapack.EVLeft,
+// where P and D are n×n permutation and scaling matrices, respectively,
+// implicitly represented by job, scale, ilo and ihi as returned by Dgebal.
+//
+// Typically, columns of the matrix V contain the right or left (determined by
+// side) eigenvectors of the balanced matrix output by Dgebal, and Dgebak forms
+// the eigenvectors of the original matrix.
+//
+// Dgebak is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgebak(job lapack.BalanceJob, side lapack.EVSide, n, ilo, ihi int, scale []float64, m int, v []float64, ldv int) {
+	switch {
+	case job != lapack.BalanceNone && job != lapack.Permute && job != lapack.Scale && job != lapack.PermuteScale:
+		panic(badBalanceJob)
+	case side != lapack.EVLeft && side != lapack.EVRight:
+		panic(badEVSide)
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(0, n-1) < ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case m < 0:
+		panic(mLT0)
+	case ldv < max(1, m):
+		panic(badLdV)
+	}
+
+	// Quick return if possible.
+	if n == 0 || m == 0 {
+		return
+	}
+
+	if len(scale) < n {
+		panic(shortScale)
+	}
+	if len(v) < (n-1)*ldv+m {
+		panic(shortV)
+	}
+
+	// Quick return if possible.
+	if job == lapack.BalanceNone {
+		return
+	}
+
+	bi := blas64.Implementation()
+	if ilo != ihi && job != lapack.Permute {
+		// Backward balance.
+		if side == lapack.EVRight {
+			for i := ilo; i <= ihi; i++ {
+				bi.Dscal(m, scale[i], v[i*ldv:], 1)
+			}
+		} else {
+			for i := ilo; i <= ihi; i++ {
+				bi.Dscal(m, 1/scale[i], v[i*ldv:], 1)
+			}
+		}
+	}
+	if job == lapack.Scale {
+		return
+	}
+	// Backward permutation.
+	for i := ilo - 1; i >= 0; i-- {
+		k := int(scale[i])
+		if k == i {
+			continue
+		}
+		bi.Dswap(m, v[i*ldv:], 1, v[k*ldv:], 1)
+	}
+	for i := ihi + 1; i < n; i++ {
+		k := int(scale[i])
+		if k == i {
+			continue
+		}
+		bi.Dswap(m, v[i*ldv:], 1, v[k*ldv:], 1)
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebal.go
@ -0,0 +1,239 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgebal balances an n×n matrix A. Balancing consists of two stages, permuting
+// and scaling. Both steps are optional and depend on the value of job.
+//
+// Permuting consists of applying a permutation matrix P such that the matrix
+// that results from P^T*A*P takes the upper block triangular form
+//            [ T1  X  Y  ]
+//  P^T A P = [  0  B  Z  ],
+//            [  0  0  T2 ]
+// where T1 and T2 are upper triangular matrices and B contains at least one
+// nonzero off-diagonal element in each row and column. The indices ilo and ihi
+// mark the starting and ending columns of the submatrix B. The eigenvalues of A
+// isolated in the first 0 to ilo-1 and last ihi+1 to n-1 elements on the
+// diagonal can be read off without any roundoff error.
+//
+// Scaling consists of applying a diagonal similarity transformation D such that
+// D^{-1}*B*D has the 1-norm of each row and its corresponding column nearly
+// equal. The output matrix is
+//  [ T1     X*D          Y    ]
+//  [  0  inv(D)*B*D  inv(D)*Z ].
+//  [  0      0           T2   ]
+// Scaling may reduce the 1-norm of the matrix, and improve the accuracy of
+// the computed eigenvalues and/or eigenvectors.
+//
+// job specifies the operations that will be performed on A.
+// If job is lapack.BalanceNone, Dgebal sets scale[i] = 1 for all i and returns ilo=0, ihi=n-1.
+// If job is lapack.Permute, only permuting will be done.
+// If job is lapack.Scale, only scaling will be done.
+// If job is lapack.PermuteScale, both permuting and scaling will be done.
+//
+// On return, if job is lapack.Permute or lapack.PermuteScale, it will hold that
+//  A[i,j] == 0,   for i > j and j ∈ {0, ..., ilo-1, ihi+1, ..., n-1}.
+// If job is lapack.BalanceNone or lapack.Scale, or if n == 0, it will hold that
+//  ilo == 0 and ihi == n-1.
+//
+// On return, scale will contain information about the permutations and scaling
+// factors applied to A. If π(j) denotes the index of the column interchanged
+// with column j, and D[j,j] denotes the scaling factor applied to column j,
+// then
+//  scale[j] == π(j),     for j ∈ {0, ..., ilo-1, ihi+1, ..., n-1},
+//           == D[j,j],   for j ∈ {ilo, ..., ihi}.
+// scale must have length equal to n, otherwise Dgebal will panic.
+//
+// Dgebal is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgebal(job lapack.BalanceJob, n int, a []float64, lda int, scale []float64) (ilo, ihi int) {
+	switch {
+	case job != lapack.BalanceNone && job != lapack.Permute && job != lapack.Scale && job != lapack.PermuteScale:
+		panic(badBalanceJob)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	ilo = 0
+	ihi = n - 1
+
+	if n == 0 {
+		return ilo, ihi
+	}
+
+	if len(scale) != n {
+		panic(shortScale)
+	}
+
+	if job == lapack.BalanceNone {
+		for i := range scale {
+			scale[i] = 1
+		}
+		return ilo, ihi
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	bi := blas64.Implementation()
+	swapped := true
+
+	if job == lapack.Scale {
+		goto scaling
+	}
+
+	// Permutation to isolate eigenvalues if possible.
+	//
+	// Search for rows isolating an eigenvalue and push them down.
+	for swapped {
+		swapped = false
+	rows:
+		for i := ihi; i >= 0; i-- {
+			for j := 0; j <= ihi; j++ {
+				if i == j {
+					continue
+				}
+				if a[i*lda+j] != 0 {
+					continue rows
+				}
+			}
+			// Row i has only zero off-diagonal elements in the
+			// block A[ilo:ihi+1,ilo:ihi+1].
+			scale[ihi] = float64(i)
+			if i != ihi {
+				bi.Dswap(ihi+1, a[i:], lda, a[ihi:], lda)
+				bi.Dswap(n, a[i*lda:], 1, a[ihi*lda:], 1)
+			}
+			if ihi == 0 {
+				scale[0] = 1
+				return ilo, ihi
+			}
+			ihi--
+			swapped = true
+			break
+		}
+	}
+	// Search for columns isolating an eigenvalue and push them left.
+	swapped = true
+	for swapped {
+		swapped = false
+	columns:
+		for j := ilo; j <= ihi; j++ {
+			for i := ilo; i <= ihi; i++ {
+				if i == j {
+					continue
+				}
+				if a[i*lda+j] != 0 {
+					continue columns
+				}
+			}
+			// Column j has only zero off-diagonal elements in the
+			// block A[ilo:ihi+1,ilo:ihi+1].
+			scale[ilo] = float64(j)
+			if j != ilo {
+				bi.Dswap(ihi+1, a[j:], lda, a[ilo:], lda)
+				bi.Dswap(n-ilo, a[j*lda+ilo:], 1, a[ilo*lda+ilo:], 1)
+			}
+			swapped = true
+			ilo++
+			break
+		}
+	}
+
+scaling:
+	for i := ilo; i <= ihi; i++ {
+		scale[i] = 1
+	}
+
+	if job == lapack.Permute {
+		return ilo, ihi
+	}
+
+	// Balance the submatrix in rows ilo to ihi.
+
+	const (
+		// sclfac should be a power of 2 to avoid roundoff errors.
+		// Elements of scale are restricted to powers of sclfac,
+		// therefore the matrix will be only nearly balanced.
+		sclfac = 2
+		// factor determines the minimum reduction of the row and column
+		// norms that is considered non-negligible. It must be less than 1.
+		factor = 0.95
+	)
+	sfmin1 := dlamchS / dlamchP
+	sfmax1 := 1 / sfmin1
+	sfmin2 := sfmin1 * sclfac
+	sfmax2 := 1 / sfmin2
+
+	// Iterative loop for norm reduction.
+	var conv bool
+	for !conv {
+		conv = true
+		for i := ilo; i <= ihi; i++ {
+			c := bi.Dnrm2(ihi-ilo+1, a[ilo*lda+i:], lda)
+			r := bi.Dnrm2(ihi-ilo+1, a[i*lda+ilo:], 1)
+			ica := bi.Idamax(ihi+1, a[i:], lda)
+			ca := math.Abs(a[ica*lda+i])
+			ira := bi.Idamax(n-ilo, a[i*lda+ilo:], 1)
+			ra := math.Abs(a[i*lda+ilo+ira])
+
+			// Guard against zero c or r due to underflow.
+			if c == 0 || r == 0 {
+				continue
+			}
+			g := r / sclfac
+			f := 1.0
+			s := c + r
+			for c < g && math.Max(f, math.Max(c, ca)) < sfmax2 && math.Min(r, math.Min(g, ra)) > sfmin2 {
+				if math.IsNaN(c + f + ca + r + g + ra) {
+					// Panic if NaN to avoid infinite loop.
+					panic("lapack: NaN")
+				}
+				f *= sclfac
+				c *= sclfac
+				ca *= sclfac
+				g /= sclfac
+				r /= sclfac
+				ra /= sclfac
+			}
+			g = c / sclfac
+			for r <= g && math.Max(r, ra) < sfmax2 && math.Min(math.Min(f, c), math.Min(g, ca)) > sfmin2 {
+				f /= sclfac
+				c /= sclfac
+				ca /= sclfac
+				g /= sclfac
+				r *= sclfac
+				ra *= sclfac
+			}
+
+			if c+r >= factor*s {
+				// Reduction would be negligible.
+				continue
+			}
+			if f < 1 && scale[i] < 1 && f*scale[i] <= sfmin1 {
+				continue
+			}
+			if f > 1 && scale[i] > 1 && scale[i] >= sfmax1/f {
+				continue
+			}
+
+			// Now balance.
+			scale[i] *= f
+			bi.Dscal(n-ilo, 1/f, a[i*lda+ilo:], 1)
+			bi.Dscal(ihi+1, f, a[i:], lda)
+			conv = false
+		}
+	}
+	return ilo, ihi
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebd2.go
@ -0,0 +1,86 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgebd2 reduces an m×n matrix A to upper or lower bidiagonal form by an orthogonal
+// transformation.
+//  Q^T * A * P = B
+// if m >= n, B is upper diagonal, otherwise B is lower bidiagonal.
+// d is the diagonal, len = min(m,n)
+// e is the off-diagonal len = min(m,n)-1
+//
+// Dgebd2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgebd2(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	minmn := min(m, n)
+	if minmn == 0 {
+		return
+	}
+
+	switch {
+	case len(d) < minmn:
+		panic(shortD)
+	case len(e) < minmn-1:
+		panic(shortE)
+	case len(tauQ) < minmn:
+		panic(shortTauQ)
+	case len(tauP) < minmn:
+		panic(shortTauP)
+	case len(work) < max(m, n):
+		panic(shortWork)
+	}
+
+	if m >= n {
+		for i := 0; i < n; i++ {
+			a[i*lda+i], tauQ[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min(i+1, m-1)*lda+i:], lda)
+			d[i] = a[i*lda+i]
+			a[i*lda+i] = 1
+			// Apply H_i to A[i:m, i+1:n] from the left.
+			if i < n-1 {
+				impl.Dlarf(blas.Left, m-i, n-i-1, a[i*lda+i:], lda, tauQ[i], a[i*lda+i+1:], lda, work)
+			}
+			a[i*lda+i] = d[i]
+			if i < n-1 {
+				a[i*lda+i+1], tauP[i] = impl.Dlarfg(n-i-1, a[i*lda+i+1], a[i*lda+min(i+2, n-1):], 1)
+				e[i] = a[i*lda+i+1]
+				a[i*lda+i+1] = 1
+				impl.Dlarf(blas.Right, m-i-1, n-i-1, a[i*lda+i+1:], 1, tauP[i], a[(i+1)*lda+i+1:], lda, work)
+				a[i*lda+i+1] = e[i]
+			} else {
+				tauP[i] = 0
+			}
+		}
+		return
+	}
+	for i := 0; i < m; i++ {
+		a[i*lda+i], tauP[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1)
+		d[i] = a[i*lda+i]
+		a[i*lda+i] = 1
+		if i < m-1 {
+			impl.Dlarf(blas.Right, m-i-1, n-i, a[i*lda+i:], 1, tauP[i], a[(i+1)*lda+i:], lda, work)
+		}
+		a[i*lda+i] = d[i]
+		if i < m-1 {
+			a[(i+1)*lda+i], tauQ[i] = impl.Dlarfg(m-i-1, a[(i+1)*lda+i], a[min(i+2, m-1)*lda+i:], lda)
+			e[i] = a[(i+1)*lda+i]
+			a[(i+1)*lda+i] = 1
+			impl.Dlarf(blas.Left, m-i-1, n-i-1, a[(i+1)*lda+i:], lda, tauQ[i], a[(i+1)*lda+i+1:], lda, work)
+			a[(i+1)*lda+i] = e[i]
+		} else {
+			tauQ[i] = 0
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgebrd.go
@ -0,0 +1,161 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by
+// an orthogonal transformation:
+//  Q^T * A * P = B.
+// The diagonal elements of B are stored in d and the off-diagonal elements are stored
+// in e. These are additionally stored along the diagonal of A and the off-diagonal
+// of A. If m >= n B is an upper-bidiagonal matrix, and if m < n B is a
+// lower-bidiagonal matrix.
+//
+// The remaining elements of A store the data needed to construct Q and P.
+// The matrices Q and P are products of elementary reflectors
+//  if m >= n, Q = H_0 * H_1 * ... * H_{n-1},
+//             P = G_0 * G_1 * ... * G_{n-2},
+//  if m < n,  Q = H_0 * H_1 * ... * H_{m-2},
+//             P = G_0 * G_1 * ... * G_{m-1},
+// where
+//  H_i = I - tauQ[i] * v_i * v_i^T,
+//  G_i = I - tauP[i] * u_i * u_i^T.
+//
+// As an example, on exit the entries of A when m = 6, and n = 5
+//  [ d   e  u1  u1  u1]
+//  [v1   d   e  u2  u2]
+//  [v1  v2   d   e  u3]
+//  [v1  v2  v3   d   e]
+//  [v1  v2  v3  v4   d]
+//  [v1  v2  v3  v4  v5]
+// and when m = 5, n = 6
+//  [ d  u1  u1  u1  u1  u1]
+//  [ e   d  u2  u2  u2  u2]
+//  [v1   e   d  u3  u3  u3]
+//  [v1  v2   e   d  u4  u4]
+//  [v1  v2  v3   e   d  u5]
+//
+// d, tauQ, and tauP must all have length at least min(m,n), and e must have
+// length min(m,n) - 1, unless lwork is -1 when there is no check except for
+// work which must have a length of at least one.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= max(1,m,n) or be -1 and this function will panic otherwise.
+// Dgebrd is blocked decomposition, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgebrd,
+// the optimal work length will be stored into work[0].
+//
+// Dgebrd is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, max(m, n)) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	minmn := min(m, n)
+	if minmn == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DGEBRD", " ", m, n, -1, -1)
+	lwkopt := (m + n) * nb
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(d) < minmn:
+		panic(shortD)
+	case len(e) < minmn-1:
+		panic(shortE)
+	case len(tauQ) < minmn:
+		panic(shortTauQ)
+	case len(tauP) < minmn:
+		panic(shortTauP)
+	}
+
+	nx := minmn
+	ws := max(m, n)
+	if 1 < nb && nb < minmn {
+		// At least one blocked operation can be done.
+		// Get the crossover point nx.
+		nx = max(nb, impl.Ilaenv(3, "DGEBRD", " ", m, n, -1, -1))
+		// Determine when to switch from blocked to unblocked code.
+		if nx < minmn {
+			// At least one blocked operation will be done.
+			ws = (m + n) * nb
+			if lwork < ws {
+				// Not enough work space for the optimal nb,
+				// consider using a smaller block size.
+				nbmin := impl.Ilaenv(2, "DGEBRD", " ", m, n, -1, -1)
+				if lwork >= (m+n)*nbmin {
+					// Enough work space for minimum block size.
+					nb = lwork / (m + n)
+				} else {
+					nb = minmn
+					nx = minmn
+				}
+			}
+		}
+	}
+	bi := blas64.Implementation()
+	ldworkx := nb
+	ldworky := nb
+	var i int
+	for i = 0; i < minmn-nx; i += nb {
+		// Reduce rows and columns i:i+nb to bidiagonal form and return
+		// the matrices X and Y which are needed to update the unreduced
+		// part of the matrix.
+		// X is stored in the first m rows of work, y in the next rows.
+		x := work[:m*ldworkx]
+		y := work[m*ldworkx:]
+		impl.Dlabrd(m-i, n-i, nb, a[i*lda+i:], lda,
+			d[i:], e[i:], tauQ[i:], tauP[i:],
+			x, ldworkx, y, ldworky)
+
+		// Update the trailing submatrix A[i+nb:m,i+nb:n], using an update
+		// of the form  A := A - V*Y**T - X*U**T
+		bi.Dgemm(blas.NoTrans, blas.Trans, m-i-nb, n-i-nb, nb,
+			-1, a[(i+nb)*lda+i:], lda, y[nb*ldworky:], ldworky,
+			1, a[(i+nb)*lda+i+nb:], lda)
+
+		bi.Dgemm(blas.NoTrans, blas.NoTrans, m-i-nb, n-i-nb, nb,
+			-1, x[nb*ldworkx:], ldworkx, a[i*lda+i+nb:], lda,
+			1, a[(i+nb)*lda+i+nb:], lda)
+
+		// Copy diagonal and off-diagonal elements of B back into A.
+		if m >= n {
+			for j := i; j < i+nb; j++ {
+				a[j*lda+j] = d[j]
+				a[j*lda+j+1] = e[j]
+			}
+		} else {
+			for j := i; j < i+nb; j++ {
+				a[j*lda+j] = d[j]
+				a[(j+1)*lda+j] = e[j]
+			}
+		}
+	}
+	// Use unblocked code to reduce the remainder of the matrix.
+	impl.Dgebd2(m-i, n-i, a[i*lda+i:], lda, d[i:], e[i:], tauQ[i:], tauP[i:], work)
+	work[0] = float64(ws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgecon.go
@ -0,0 +1,92 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgecon estimates the reciprocal of the condition number of the n×n matrix A
+// given the LU decomposition of the matrix. The condition number computed may
+// be based on the 1-norm or the ∞-norm.
+//
+// The slice a contains the result of the LU decomposition of A as computed by Dgetrf.
+//
+// anorm is the corresponding 1-norm or ∞-norm of the original matrix A.
+//
+// work is a temporary data slice of length at least 4*n and Dgecon will panic otherwise.
+//
+// iwork is a temporary data slice of length at least n and Dgecon will panic otherwise.
+func (impl Implementation) Dgecon(norm lapack.MatrixNorm, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 {
+	switch {
+	case norm != lapack.MaxColumnSum && norm != lapack.MaxRowSum:
+		panic(badNorm)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 1
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(work) < 4*n:
+		panic(shortWork)
+	case len(iwork) < n:
+		panic(shortIWork)
+	}
+
+	// Quick return if possible.
+	if anorm == 0 {
+		return 0
+	}
+
+	bi := blas64.Implementation()
+	var rcond, ainvnm float64
+	var kase int
+	var normin bool
+	isave := new([3]int)
+	onenrm := norm == lapack.MaxColumnSum
+	smlnum := dlamchS
+	kase1 := 2
+	if onenrm {
+		kase1 = 1
+	}
+	for {
+		ainvnm, kase = impl.Dlacn2(n, work[n:], work, iwork, ainvnm, kase, isave)
+		if kase == 0 {
+			if ainvnm != 0 {
+				rcond = (1 / ainvnm) / anorm
+			}
+			return rcond
+		}
+		var sl, su float64
+		if kase == kase1 {
+			sl = impl.Dlatrs(blas.Lower, blas.NoTrans, blas.Unit, normin, n, a, lda, work, work[2*n:])
+			su = impl.Dlatrs(blas.Upper, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[3*n:])
+		} else {
+			su = impl.Dlatrs(blas.Upper, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[3*n:])
+			sl = impl.Dlatrs(blas.Lower, blas.Trans, blas.Unit, normin, n, a, lda, work, work[2*n:])
+		}
+		scale := sl * su
+		normin = true
+		if scale != 1 {
+			ix := bi.Idamax(n, work, 1)
+			if scale == 0 || scale < math.Abs(work[ix])*smlnum {
+				return rcond
+			}
+			impl.Drscl(n, scale, work, 1)
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeev.go
@ -0,0 +1,279 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgeev computes the eigenvalues and, optionally, the left and/or right
+// eigenvectors for an n×n real nonsymmetric matrix A.
+//
+// The right eigenvector v_j of A corresponding to an eigenvalue λ_j
+// is defined by
+//  A v_j = λ_j v_j,
+// and the left eigenvector u_j corresponding to an eigenvalue λ_j is defined by
+//  u_j^H A = λ_j u_j^H,
+// where u_j^H is the conjugate transpose of u_j.
+//
+// On return, A will be overwritten and the left and right eigenvectors will be
+// stored, respectively, in the columns of the n×n matrices VL and VR in the
+// same order as their eigenvalues. If the j-th eigenvalue is real, then
+//  u_j = VL[:,j],
+//  v_j = VR[:,j],
+// and if it is not real, then j and j+1 form a complex conjugate pair and the
+// eigenvectors can be recovered as
+//  u_j     = VL[:,j] + i*VL[:,j+1],
+//  u_{j+1} = VL[:,j] - i*VL[:,j+1],
+//  v_j     = VR[:,j] + i*VR[:,j+1],
+//  v_{j+1} = VR[:,j] - i*VR[:,j+1],
+// where i is the imaginary unit. The computed eigenvectors are normalized to
+// have Euclidean norm equal to 1 and largest component real.
+//
+// Left eigenvectors will be computed only if jobvl == lapack.LeftEVCompute,
+// otherwise jobvl must be lapack.LeftEVNone.
+// Right eigenvectors will be computed only if jobvr == lapack.RightEVCompute,
+// otherwise jobvr must be lapack.RightEVNone.
+// For other values of jobvl and jobvr Dgeev will panic.
+//
+// wr and wi contain the real and imaginary parts, respectively, of the computed
+// eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with
+// the eigenvalue having the positive imaginary part first.
+// wr and wi must have length n, and Dgeev will panic otherwise.
+//
+// work must have length at least lwork and lwork must be at least max(1,4*n) if
+// the left or right eigenvectors are computed, and at least max(1,3*n) if no
+// eigenvectors are computed. For good performance, lwork must generally be
+// larger.  On return, optimal value of lwork will be stored in work[0].
+//
+// If lwork == -1, instead of performing Dgeev, the function only calculates the
+// optimal vaule of lwork and stores it into work[0].
+//
+// On return, first is the index of the first valid eigenvalue. If first == 0,
+// all eigenvalues and eigenvectors have been computed. If first is positive,
+// Dgeev failed to compute all the eigenvalues, no eigenvectors have been
+// computed and wr[first:] and wi[first:] contain those eigenvalues which have
+// converged.
+func (impl Implementation) Dgeev(jobvl lapack.LeftEVJob, jobvr lapack.RightEVJob, n int, a []float64, lda int, wr, wi []float64, vl []float64, ldvl int, vr []float64, ldvr int, work []float64, lwork int) (first int) {
+	wantvl := jobvl == lapack.LeftEVCompute
+	wantvr := jobvr == lapack.RightEVCompute
+	var minwrk int
+	if wantvl || wantvr {
+		minwrk = max(1, 4*n)
+	} else {
+		minwrk = max(1, 3*n)
+	}
+	switch {
+	case jobvl != lapack.LeftEVCompute && jobvl != lapack.LeftEVNone:
+		panic(badLeftEVJob)
+	case jobvr != lapack.RightEVCompute && jobvr != lapack.RightEVNone:
+		panic(badRightEVJob)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldvl < 1 || (ldvl < n && wantvl):
+		panic(badLdVL)
+	case ldvr < 1 || (ldvr < n && wantvr):
+		panic(badLdVR)
+	case lwork < minwrk && lwork != -1:
+		panic(badLWork)
+	case len(work) < lwork:
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return 0
+	}
+
+	maxwrk := 2*n + n*impl.Ilaenv(1, "DGEHRD", " ", n, 1, n, 0)
+	if wantvl || wantvr {
+		maxwrk = max(maxwrk, 2*n+(n-1)*impl.Ilaenv(1, "DORGHR", " ", n, 1, n, -1))
+		impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, 0, n-1,
+			a, lda, wr, wi, nil, n, work, -1)
+		maxwrk = max(maxwrk, max(n+1, n+int(work[0])))
+		side := lapack.EVLeft
+		if wantvr {
+			side = lapack.EVRight
+		}
+		impl.Dtrevc3(side, lapack.EVAllMulQ, nil, n, a, lda, vl, ldvl, vr, ldvr,
+			n, work, -1)
+		maxwrk = max(maxwrk, n+int(work[0]))
+		maxwrk = max(maxwrk, 4*n)
+	} else {
+		impl.Dhseqr(lapack.EigenvaluesOnly, lapack.SchurNone, n, 0, n-1,
+			a, lda, wr, wi, vr, ldvr, work, -1)
+		maxwrk = max(maxwrk, max(n+1, n+int(work[0])))
+	}
+	maxwrk = max(maxwrk, minwrk)
+
+	if lwork == -1 {
+		work[0] = float64(maxwrk)
+		return 0
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(wr) != n:
+		panic(badLenWr)
+	case len(wi) != n:
+		panic(badLenWi)
+	case len(vl) < (n-1)*ldvl+n && wantvl:
+		panic(shortVL)
+	case len(vr) < (n-1)*ldvr+n && wantvr:
+		panic(shortVR)
+	}
+
+	// Get machine constants.
+	smlnum := math.Sqrt(dlamchS) / dlamchP
+	bignum := 1 / smlnum
+
+	// Scale A if max element outside range [smlnum,bignum].
+	anrm := impl.Dlange(lapack.MaxAbs, n, n, a, lda, nil)
+	var scalea bool
+	var cscale float64
+	if 0 < anrm && anrm < smlnum {
+		scalea = true
+		cscale = smlnum
+	} else if anrm > bignum {
+		scalea = true
+		cscale = bignum
+	}
+	if scalea {
+		impl.Dlascl(lapack.General, 0, 0, anrm, cscale, n, n, a, lda)
+	}
+
+	// Balance the matrix.
+	workbal := work[:n]
+	ilo, ihi := impl.Dgebal(lapack.PermuteScale, n, a, lda, workbal)
+
+	// Reduce to upper Hessenberg form.
+	iwrk := 2 * n
+	tau := work[n : iwrk-1]
+	impl.Dgehrd(n, ilo, ihi, a, lda, tau, work[iwrk:], lwork-iwrk)
+
+	var side lapack.EVSide
+	if wantvl {
+		side = lapack.EVLeft
+		// Copy Householder vectors to VL.
+		impl.Dlacpy(blas.Lower, n, n, a, lda, vl, ldvl)
+		// Generate orthogonal matrix in VL.
+		impl.Dorghr(n, ilo, ihi, vl, ldvl, tau, work[iwrk:], lwork-iwrk)
+		// Perform QR iteration, accumulating Schur vectors in VL.
+		iwrk = n
+		first = impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, ilo, ihi,
+			a, lda, wr, wi, vl, ldvl, work[iwrk:], lwork-iwrk)
+		if wantvr {
+			// Want left and right eigenvectors.
+			// Copy Schur vectors to VR.
+			side = lapack.EVBoth
+			impl.Dlacpy(blas.All, n, n, vl, ldvl, vr, ldvr)
+		}
+	} else if wantvr {
+		side = lapack.EVRight
+		// Copy Householder vectors to VR.
+		impl.Dlacpy(blas.Lower, n, n, a, lda, vr, ldvr)
+		// Generate orthogonal matrix in VR.
+		impl.Dorghr(n, ilo, ihi, vr, ldvr, tau, work[iwrk:], lwork-iwrk)
+		// Perform QR iteration, accumulating Schur vectors in VR.
+		iwrk = n
+		first = impl.Dhseqr(lapack.EigenvaluesAndSchur, lapack.SchurOrig, n, ilo, ihi,
+			a, lda, wr, wi, vr, ldvr, work[iwrk:], lwork-iwrk)
+	} else {
+		// Compute eigenvalues only.
+		iwrk = n
+		first = impl.Dhseqr(lapack.EigenvaluesOnly, lapack.SchurNone, n, ilo, ihi,
+			a, lda, wr, wi, nil, 1, work[iwrk:], lwork-iwrk)
+	}
+
+	if first > 0 {
+		if scalea {
+			// Undo scaling.
+			impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wr[first:], 1)
+			impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wi[first:], 1)
+			impl.Dlascl(lapack.General, 0, 0, cscale, anrm, ilo, 1, wr, 1)
+			impl.Dlascl(lapack.General, 0, 0, cscale, anrm, ilo, 1, wi, 1)
+		}
+		work[0] = float64(maxwrk)
+		return first
+	}
+
+	if wantvl || wantvr {
+		// Compute left and/or right eigenvectors.
+		impl.Dtrevc3(side, lapack.EVAllMulQ, nil, n,
+			a, lda, vl, ldvl, vr, ldvr, n, work[iwrk:], lwork-iwrk)
+	}
+	bi := blas64.Implementation()
+	if wantvl {
+		// Undo balancing of left eigenvectors.
+		impl.Dgebak(lapack.PermuteScale, lapack.EVLeft, n, ilo, ihi, workbal, n, vl, ldvl)
+		// Normalize left eigenvectors and make largest component real.
+		for i, wii := range wi {
+			if wii < 0 {
+				continue
+			}
+			if wii == 0 {
+				scl := 1 / bi.Dnrm2(n, vl[i:], ldvl)
+				bi.Dscal(n, scl, vl[i:], ldvl)
+				continue
+			}
+			scl := 1 / impl.Dlapy2(bi.Dnrm2(n, vl[i:], ldvl), bi.Dnrm2(n, vl[i+1:], ldvl))
+			bi.Dscal(n, scl, vl[i:], ldvl)
+			bi.Dscal(n, scl, vl[i+1:], ldvl)
+			for k := 0; k < n; k++ {
+				vi := vl[k*ldvl+i]
+				vi1 := vl[k*ldvl+i+1]
+				work[iwrk+k] = vi*vi + vi1*vi1
+			}
+			k := bi.Idamax(n, work[iwrk:iwrk+n], 1)
+			cs, sn, _ := impl.Dlartg(vl[k*ldvl+i], vl[k*ldvl+i+1])
+			bi.Drot(n, vl[i:], ldvl, vl[i+1:], ldvl, cs, sn)
+			vl[k*ldvl+i+1] = 0
+		}
+	}
+	if wantvr {
+		// Undo balancing of right eigenvectors.
+		impl.Dgebak(lapack.PermuteScale, lapack.EVRight, n, ilo, ihi, workbal, n, vr, ldvr)
+		// Normalize right eigenvectors and make largest component real.
+		for i, wii := range wi {
+			if wii < 0 {
+				continue
+			}
+			if wii == 0 {
+				scl := 1 / bi.Dnrm2(n, vr[i:], ldvr)
+				bi.Dscal(n, scl, vr[i:], ldvr)
+				continue
+			}
+			scl := 1 / impl.Dlapy2(bi.Dnrm2(n, vr[i:], ldvr), bi.Dnrm2(n, vr[i+1:], ldvr))
+			bi.Dscal(n, scl, vr[i:], ldvr)
+			bi.Dscal(n, scl, vr[i+1:], ldvr)
+			for k := 0; k < n; k++ {
+				vi := vr[k*ldvr+i]
+				vi1 := vr[k*ldvr+i+1]
+				work[iwrk+k] = vi*vi + vi1*vi1
+			}
+			k := bi.Idamax(n, work[iwrk:iwrk+n], 1)
+			cs, sn, _ := impl.Dlartg(vr[k*ldvr+i], vr[k*ldvr+i+1])
+			bi.Drot(n, vr[i:], ldvr, vr[i+1:], ldvr, cs, sn)
+			vr[k*ldvr+i+1] = 0
+		}
+	}
+
+	if scalea {
+		// Undo scaling.
+		impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wr[first:], 1)
+		impl.Dlascl(lapack.General, 0, 0, cscale, anrm, n-first, 1, wi[first:], 1)
+	}
+
+	work[0] = float64(maxwrk)
+	return first
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehd2.go
@ -0,0 +1,97 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgehd2 reduces a block of a general n×n matrix A to upper Hessenberg form H
+// by an orthogonal similarity transformation Q^T * A * Q = H.
+//
+// The matrix Q is represented as a product of (ihi-ilo) elementary
+// reflectors
+//  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
+// Each H_i has the form
+//  H_i = I - tau[i] * v * v^T
+// where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
+// v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
+//
+// On entry, a contains the n×n general matrix to be reduced. On return, the
+// upper triangle and the first subdiagonal of A are overwritten with the upper
+// Hessenberg matrix H, and the elements below the first subdiagonal, with the
+// slice tau, represent the orthogonal matrix Q as a product of elementary
+// reflectors.
+//
+// The contents of A are illustrated by the following example, with n = 7, ilo =
+// 1 and ihi = 5.
+// On entry,
+//  [ a   a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [                         a ]
+// on return,
+//  [ a   a   h   h   h   h   a ]
+//  [     a   h   h   h   h   a ]
+//  [     h   h   h   h   h   h ]
+//  [     v1  h   h   h   h   h ]
+//  [     v1  v2  h   h   h   h ]
+//  [     v1  v2  v3  h   h   h ]
+//  [                         a ]
+// where a denotes an element of the original matrix A, h denotes a
+// modified element of the upper Hessenberg matrix H, and vi denotes an
+// element of the vector defining H_i.
+//
+// ilo and ihi determine the block of A that will be reduced to upper Hessenberg
+// form. It must hold that 0 <= ilo <= ihi <= max(0, n-1), otherwise Dgehd2 will
+// panic.
+//
+// On return, tau will contain the scalar factors of the elementary reflectors.
+// It must have length equal to n-1, otherwise Dgehd2 will panic.
+//
+// work must have length at least n, otherwise Dgehd2 will panic.
+//
+// Dgehd2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgehd2(n, ilo, ihi int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(0, n-1) < ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(tau) != n-1:
+		panic(badLenTau)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	for i := ilo; i < ihi; i++ {
+		// Compute elementary reflector H_i to annihilate A[i+2:ihi+1,i].
+		var aii float64
+		aii, tau[i] = impl.Dlarfg(ihi-i, a[(i+1)*lda+i], a[min(i+2, n-1)*lda+i:], lda)
+		a[(i+1)*lda+i] = 1
+
+		// Apply H_i to A[0:ihi+1,i+1:ihi+1] from the right.
+		impl.Dlarf(blas.Right, ihi+1, ihi-i, a[(i+1)*lda+i:], lda, tau[i], a[i+1:], lda, work)
+
+		// Apply H_i to A[i+1:ihi+1,i+1:n] from the left.
+		impl.Dlarf(blas.Left, ihi-i, n-i-1, a[(i+1)*lda+i:], lda, tau[i], a[(i+1)*lda+i+1:], lda, work)
+		a[(i+1)*lda+i] = aii
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgehrd.go
@ -0,0 +1,194 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
+// form H by an orthogonal similarity transformation Q^T * A * Q = H.
+//
+// The matrix Q is represented as a product of (ihi-ilo) elementary
+// reflectors
+//  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
+// Each H_i has the form
+//  H_i = I - tau[i] * v * v^T
+// where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
+// v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
+//
+// On entry, a contains the n×n general matrix to be reduced. On return, the
+// upper triangle and the first subdiagonal of A will be overwritten with the
+// upper Hessenberg matrix H, and the elements below the first subdiagonal, with
+// the slice tau, represent the orthogonal matrix Q as a product of elementary
+// reflectors.
+//
+// The contents of a are illustrated by the following example, with n = 7, ilo =
+// 1 and ihi = 5.
+// On entry,
+//  [ a   a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [     a   a   a   a   a   a ]
+//  [                         a ]
+// on return,
+//  [ a   a   h   h   h   h   a ]
+//  [     a   h   h   h   h   a ]
+//  [     h   h   h   h   h   h ]
+//  [     v1  h   h   h   h   h ]
+//  [     v1  v2  h   h   h   h ]
+//  [     v1  v2  v3  h   h   h ]
+//  [                         a ]
+// where a denotes an element of the original matrix A, h denotes a
+// modified element of the upper Hessenberg matrix H, and vi denotes an
+// element of the vector defining H_i.
+//
+// ilo and ihi determine the block of A that will be reduced to upper Hessenberg
+// form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
+// -1 if n == 0, otherwise Dgehrd will panic.
+//
+// On return, tau will contain the scalar factors of the elementary reflectors.
+// Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
+// equal to n-1 if n > 0, otherwise Dgehrd will panic.
+//
+// work must have length at least lwork and lwork must be at least max(1,n),
+// otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
+// lwork.
+//
+// If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
+// will be stored in work[0].
+//
+// Dgehrd is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(0, n-1) < ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, n) && lwork != -1:
+		panic(badLWork)
+	case len(work) < lwork:
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	const (
+		nbmax = 64
+		ldt   = nbmax + 1
+		tsize = ldt * nbmax
+	)
+	// Compute the workspace requirements.
+	nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
+	lwkopt := n*nb + tsize
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+	if len(tau) != n-1 {
+		panic(badLenTau)
+	}
+
+	// Set tau[:ilo] and tau[ihi:] to zero.
+	for i := 0; i < ilo; i++ {
+		tau[i] = 0
+	}
+	for i := ihi; i < n-1; i++ {
+		tau[i] = 0
+	}
+
+	// Quick return if possible.
+	nh := ihi - ilo + 1
+	if nh <= 1 {
+		work[0] = 1
+		return
+	}
+
+	// Determine the block size.
+	nbmin := 2
+	var nx int
+	if 1 < nb && nb < nh {
+		// Determine when to cross over from blocked to unblocked code
+		// (last block is always handled by unblocked code).
+		nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
+		if nx < nh {
+			// Determine if workspace is large enough for blocked code.
+			if lwork < n*nb+tsize {
+				// Not enough workspace to use optimal nb:
+				// determine the minimum value of nb, and reduce
+				// nb or force use of unblocked code.
+				nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
+				if lwork >= n*nbmin+tsize {
+					nb = (lwork - tsize) / n
+				} else {
+					nb = 1
+				}
+			}
+		}
+	}
+	ldwork := nb // work is used as an n×nb matrix.
+
+	var i int
+	if nb < nbmin || nh <= nb {
+		// Use unblocked code below.
+		i = ilo
+	} else {
+		// Use blocked code.
+		bi := blas64.Implementation()
+		iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
+		for i = ilo; i < ihi-nx; i += nb {
+			ib := min(nb, ihi-i)
+
+			// Reduce columns [i:i+ib] to Hessenberg form, returning the
+			// matrices V and T of the block reflector H = I - V*T*V^T
+			// which performs the reduction, and also the matrix Y = A*V*T.
+			impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
+
+			// Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
+			// right, computing  A := A - Y * V^T. V[i+ib,i+ib-1] must be set
+			// to 1.
+			ei := a[(i+ib)*lda+i+ib-1]
+			a[(i+ib)*lda+i+ib-1] = 1
+			bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
+				-1, work, ldwork,
+				a[(i+ib)*lda+i:], lda,
+				1, a[i+ib:], lda)
+			a[(i+ib)*lda+i+ib-1] = ei
+
+			// Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
+			// right.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
+				1, a[(i+1)*lda+i:], lda, work, ldwork)
+			for j := 0; j <= ib-2; j++ {
+				bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
+			}
+
+			// Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
+			// left.
+			impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
+				ihi-i, n-i-ib, ib,
+				a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
+		}
+	}
+	// Use unblocked code to reduce the rest of the matrix.
+	impl.Dgehd2(n, i, ihi, a, lda, tau, work)
+	work[0] = float64(lwkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelq2.go
@ -0,0 +1,65 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgelq2 computes the LQ factorization of the m×n matrix A.
+//
+// In an LQ factorization, L is a lower triangular m×n matrix, and Q is an n×n
+// orthonormal matrix.
+//
+// a is modified to contain the information to construct L and Q.
+// The lower triangle of a contains the matrix L. The upper triangular elements
+// (not including the diagonal) contain the elementary reflectors. tau is modified
+// to contain the reflector scales. tau must have length of at least k = min(m,n)
+// and this function will panic otherwise.
+//
+// See Dgeqr2 for a description of the elementary reflectors and orthonormal
+// matrix Q. Q is constructed as a product of these elementary reflectors,
+// Q = H_{k-1} * ... * H_1 * H_0.
+//
+// work is temporary storage of length at least m and this function will panic otherwise.
+//
+// Dgelq2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgelq2(m, n int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(work) < m:
+		panic(shortWork)
+	}
+
+	for i := 0; i < k; i++ {
+		a[i*lda+i], tau[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1)
+		if i < m-1 {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Right, m-i-1, n-i,
+				a[i*lda+i:], 1,
+				tau[i],
+				a[(i+1)*lda+i:], lda,
+				work)
+			a[i*lda+i] = aii
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgelqf.go
@ -0,0 +1,97 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgelqf computes the LQ factorization of the m×n matrix A using a blocked
+// algorithm. See the documentation for Dgelq2 for a description of the
+// parameters at entry and exit.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m, and this function will panic otherwise.
+// Dgelqf is a blocked LQ factorization, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgelqf,
+// the optimal work length will be stored into work[0].
+//
+// tau must have length at least min(m,n), and this function will panic otherwise.
+func (impl Implementation) Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, m) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	k := min(m, n)
+	if k == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+	if lwork == -1 {
+		work[0] = float64(m * nb)
+		return
+	}
+
+	if len(a) < (m-1)*lda+n {
+		panic(shortA)
+	}
+	if len(tau) < k {
+		panic(shortTau)
+	}
+
+	// Find the optimal blocking size based on the size of available memory
+	// and optimal machine parameters.
+	nbmin := 2
+	var nx int
+	iws := m
+	if 1 < nb && nb < k {
+		nx = max(0, impl.Ilaenv(3, "DGELQF", " ", m, n, -1, -1))
+		if nx < k {
+			iws = m * nb
+			if lwork < iws {
+				nb = lwork / m
+				nbmin = max(2, impl.Ilaenv(2, "DGELQF", " ", m, n, -1, -1))
+			}
+		}
+	}
+	ldwork := nb
+	// Computed blocked LQ factorization.
+	var i int
+	if nbmin <= nb && nb < k && nx < k {
+		for i = 0; i < k-nx; i += nb {
+			ib := min(k-i, nb)
+			impl.Dgelq2(ib, n-i, a[i*lda+i:], lda, tau[i:], work)
+			if i+ib < m {
+				impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+				impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Forward, lapack.RowWise,
+					m-i-ib, n-i, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[(i+ib)*lda+i:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+	}
+	// Perform unblocked LQ factorization on the remainder.
+	if i < k {
+		impl.Dgelq2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgels.go
@ -0,0 +1,219 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgels finds a minimum-norm solution based on the matrices A and B using the
+// QR or LQ factorization. Dgels returns false if the matrix
+// A is singular, and true if this solution was successfully found.
+//
+// The minimization problem solved depends on the input parameters.
+//
+//  1. If m >= n and trans == blas.NoTrans, Dgels finds X such that || A*X - B||_2
+//     is minimized.
+//  2. If m < n and trans == blas.NoTrans, Dgels finds the minimum norm solution of
+//     A * X = B.
+//  3. If m >= n and trans == blas.Trans, Dgels finds the minimum norm solution of
+//     A^T * X = B.
+//  4. If m < n and trans == blas.Trans, Dgels finds X such that || A*X - B||_2
+//     is minimized.
+// Note that the least-squares solutions (cases 1 and 3) perform the minimization
+// per column of B. This is not the same as finding the minimum-norm matrix.
+//
+// The matrix A is a general matrix of size m×n and is modified during this call.
+// The input matrix B is of size max(m,n)×nrhs, and serves two purposes. On entry,
+// the elements of b specify the input matrix B. B has size m×nrhs if
+// trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the
+// leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans,
+// this submatrix is of size n×nrhs, and of size m×nrhs otherwise.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic
+// otherwise. A longer work will enable blocked algorithms to be called.
+// In the special case that lwork == -1, work[0] will be set to the optimal working
+// length.
+func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool {
+	mn := min(m, n)
+	minwrk := mn + max(mn, nrhs)
+	switch {
+	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case nrhs < 0:
+		panic(nrhsLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldb < max(1, nrhs):
+		panic(badLdB)
+	case lwork < max(1, minwrk) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if mn == 0 || nrhs == 0 {
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		work[0] = 1
+		return true
+	}
+
+	// Find optimal block size.
+	var nb int
+	if m >= n {
+		nb = impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
+		if trans != blas.NoTrans {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LN", m, nrhs, n, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMQR", "LT", m, nrhs, n, -1))
+		}
+	} else {
+		nb = impl.Ilaenv(1, "DGELQF", " ", m, n, -1, -1)
+		if trans != blas.NoTrans {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LT", n, nrhs, m, -1))
+		} else {
+			nb = max(nb, impl.Ilaenv(1, "DORMLQ", "LN", n, nrhs, m, -1))
+		}
+	}
+	wsize := max(1, mn+max(mn, nrhs)*nb)
+	work[0] = float64(wsize)
+
+	if lwork == -1 {
+		return true
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(b) < (max(m, n)-1)*ldb+nrhs:
+		panic(shortB)
+	}
+
+	// Scale the input matrices if they contain extreme values.
+	smlnum := dlamchS / dlamchP
+	bignum := 1 / smlnum
+	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
+	var iascl int
+	if anrm > 0 && anrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
+		iascl = 1
+	} else if anrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
+	} else if anrm == 0 {
+		// Matrix is all zeros.
+		impl.Dlaset(blas.All, max(m, n), nrhs, 0, 0, b, ldb)
+		return true
+	}
+	brow := m
+	if trans != blas.NoTrans {
+		brow = n
+	}
+	bnrm := impl.Dlange(lapack.MaxAbs, brow, nrhs, b, ldb, nil)
+	ibscl := 0
+	if bnrm > 0 && bnrm < smlnum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, smlnum, brow, nrhs, b, ldb)
+		ibscl = 1
+	} else if bnrm > bignum {
+		impl.Dlascl(lapack.General, 0, 0, bnrm, bignum, brow, nrhs, b, ldb)
+		ibscl = 2
+	}
+
+	// Solve the minimization problem using a QR or an LQ decomposition.
+	var scllen int
+	if m >= n {
+		impl.Dgeqrf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if trans == blas.NoTrans {
+			impl.Dormqr(blas.Left, blas.Trans, m, nrhs, n,
+				a, lda,
+				work[:n],
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Upper, blas.NoTrans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			scllen = n
+		} else {
+			ok := impl.Dtrtrs(blas.Upper, blas.Trans, blas.NonUnit, n, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := n; i < m; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormqr(blas.Left, blas.NoTrans, m, nrhs, n,
+				a, lda,
+				work[:n],
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = m
+		}
+	} else {
+		impl.Dgelqf(m, n, a, lda, work, work[mn:], lwork-mn)
+		if trans == blas.NoTrans {
+			ok := impl.Dtrtrs(blas.Lower, blas.NoTrans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+			for i := m; i < n; i++ {
+				for j := 0; j < nrhs; j++ {
+					b[i*ldb+j] = 0
+				}
+			}
+			impl.Dormlq(blas.Left, blas.Trans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			scllen = n
+		} else {
+			impl.Dormlq(blas.Left, blas.NoTrans, n, nrhs, m,
+				a, lda,
+				work,
+				b, ldb,
+				work[mn:], lwork-mn)
+			ok := impl.Dtrtrs(blas.Lower, blas.Trans, blas.NonUnit,
+				m, nrhs,
+				a, lda,
+				b, ldb)
+			if !ok {
+				return false
+			}
+		}
+	}
+
+	// Adjust answer vector based on scaling.
+	if iascl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, scllen, nrhs, b, ldb)
+	}
+	if iascl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 1 {
+		impl.Dlascl(lapack.General, 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb)
+	}
+	if ibscl == 2 {
+		impl.Dlascl(lapack.General, 0, 0, bignum, bnrm, scllen, nrhs, b, ldb)
+	}
+
+	work[0] = float64(wsize)
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeql2.go
@ -0,0 +1,61 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgeql2 computes the QL factorization of the m×n matrix A. That is, Dgeql2
+// computes Q and L such that
+//  A = Q * L
+// where Q is an m×m orthonormal matrix and L is a lower trapezoidal matrix.
+//
+// Q is represented as a product of elementary reflectors,
+//  Q = H_{k-1} * ... * H_1 * H_0
+// where k = min(m,n) and each H_i has the form
+//  H_i = I - tau[i] * v_i * v_i^T
+// Vector v_i has v[m-k+i+1:m] = 0, v[m-k+i] = 1, and v[:m-k+i+1] is stored on
+// exit in A[0:m-k+i-1, n-k+i].
+//
+// tau must have length at least min(m,n), and Dgeql2 will panic otherwise.
+//
+// work is temporary memory storage and must have length at least n.
+//
+// Dgeql2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgeql2(m, n int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	var aii float64
+	for i := k - 1; i >= 0; i-- {
+		// Generate elementary reflector H_i to annihilate A[0:m-k+i-1, n-k+i].
+		aii, tau[i] = impl.Dlarfg(m-k+i+1, a[(m-k+i)*lda+n-k+i], a[n-k+i:], lda)
+
+		// Apply H_i to A[0:m-k+i, 0:n-k+i-1] from the left.
+		a[(m-k+i)*lda+n-k+i] = 1
+		impl.Dlarf(blas.Left, m-k+i+1, n-k+i, a[n-k+i:], lda, tau[i], a, lda, work)
+		a[(m-k+i)*lda+n-k+i] = aii
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqp3.go
@ -0,0 +1,186 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgeqp3 computes a QR factorization with column pivoting of the
+// m×n matrix A: A*P = Q*R using Level 3 BLAS.
+//
+// The matrix Q is represented as a product of elementary reflectors
+//  Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
+// Each H_i has the form
+//  H_i = I - tau * v * v^T
+// where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
+// v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
+//
+// jpvt specifies a column pivot to be applied to A. If
+// jpvt[j] is at least zero, the jth column of A is permuted
+// to the front of A*P (a leading column), if jpvt[j] is -1
+// the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
+// will panic. On return, jpvt holds the permutation that was
+// applied; the jth column of A*P was the jpvt[j] column of A.
+// jpvt must have length n or Dgeqp3 will panic.
+//
+// tau holds the scalar factors of the elementary reflectors.
+// It must have length min(m, n), otherwise Dgeqp3 will panic.
+//
+// work must have length at least max(1,lwork), and lwork must be at least
+// 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
+// be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
+// work[0] will contain the optimal value of lwork.
+//
+// If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
+// will be stored in work[0].
+//
+// Dgeqp3 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
+	const (
+		inb    = 1
+		inbmin = 2
+		ixover = 3
+	)
+
+	minmn := min(m, n)
+	iws := 3*n + 1
+	if minmn == 0 {
+		iws = 1
+	}
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < iws && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if minmn == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
+	if lwork == -1 {
+		work[0] = float64(2*n + (n+1)*nb)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(jpvt) != n:
+		panic(badLenJpvt)
+	case len(tau) < minmn:
+		panic(shortTau)
+	}
+
+	for _, v := range jpvt {
+		if v < -1 || n <= v {
+			panic(badJpvt)
+		}
+	}
+
+	bi := blas64.Implementation()
+
+	// Move initial columns up front.
+	var nfxd int
+	for j := 0; j < n; j++ {
+		if jpvt[j] == -1 {
+			jpvt[j] = j
+			continue
+		}
+		if j != nfxd {
+			bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
+			jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
+		} else {
+			jpvt[j] = j
+		}
+		nfxd++
+	}
+
+	// Factorize nfxd columns.
+	//
+	// Compute the QR factorization of nfxd columns and update remaining columns.
+	if nfxd > 0 {
+		na := min(m, nfxd)
+		impl.Dgeqrf(m, na, a, lda, tau, work, lwork)
+		iws = max(iws, int(work[0]))
+		if na < n {
+			impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
+				work, lwork)
+			iws = max(iws, int(work[0]))
+		}
+	}
+
+	if nfxd >= minmn {
+		work[0] = float64(iws)
+		return
+	}
+
+	// Factorize free columns.
+	sm := m - nfxd
+	sn := n - nfxd
+	sminmn := minmn - nfxd
+
+	// Determine the block size.
+	nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
+	nbmin := 2
+	nx := 0
+
+	if 1 < nb && nb < sminmn {
+		// Determine when to cross over from blocked to unblocked code.
+		nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
+
+		if nx < sminmn {
+			// Determine if workspace is large enough for blocked code.
+			minws := 2*sn + (sn+1)*nb
+			iws = max(iws, minws)
+			if lwork < minws {
+				// Not enough workspace to use optimal nb. Reduce
+				// nb and determine the minimum value of nb.
+				nb = (lwork - 2*sn) / (sn + 1)
+				nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
+			}
+		}
+	}
+
+	// Initialize partial column norms.
+	// The first n elements of work store the exact column norms.
+	for j := nfxd; j < n; j++ {
+		work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
+		work[n+j] = work[j]
+	}
+	j := nfxd
+	if nbmin <= nb && nb < sminmn && nx < sminmn {
+		// Use blocked code initially.
+
+		// Compute factorization.
+		var fjb int
+		for topbmn := minmn - nx; j < topbmn; j += fjb {
+			jb := min(nb, topbmn-j)
+
+			// Factorize jb columns among columns j:n.
+			fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
+				work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
+		}
+	}
+
+	// Use unblocked code to factor the last or only block.
+	if j < minmn {
+		impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
+			work[j:n], work[j+n:2*n], work[2*n:])
+	}
+
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqr2.go
@ -0,0 +1,76 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgeqr2 computes a QR factorization of the m×n matrix A.
+//
+// In a QR factorization, Q is an m×m orthonormal matrix, and R is an
+// upper triangular m×n matrix.
+//
+// A is modified to contain the information to construct Q and R.
+// The upper triangle of a contains the matrix R. The lower triangular elements
+// (not including the diagonal) contain the elementary reflectors. tau is modified
+// to contain the reflector scales. tau must have length at least min(m,n), and
+// this function will panic otherwise.
+//
+// The ith elementary reflector can be explicitly constructed by first extracting
+// the
+//  v[j] = 0           j < i
+//  v[j] = 1           j == i
+//  v[j] = a[j*lda+i]  j > i
+// and computing H_i = I - tau[i] * v * v^T.
+//
+// The orthonormal matrix Q can be constructed from a product of these elementary
+// reflectors, Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n).
+//
+// work is temporary storage of length at least n and this function will panic otherwise.
+//
+// Dgeqr2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []float64) {
+	// TODO(btracey): This is oriented such that columns of a are eliminated.
+	// This likely could be re-arranged to take better advantage of row-major
+	// storage.
+
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	}
+
+	for i := 0; i < k; i++ {
+		// Generate elementary reflector H_i.
+		a[i*lda+i], tau[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min((i+1), m-1)*lda+i:], lda)
+		if i < n-1 {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Left, m-i, n-i-1,
+				a[i*lda+i:], lda,
+				tau[i],
+				a[i*lda+i+1:], lda,
+				work)
+			a[i*lda+i] = aii
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgeqrf.go
@ -0,0 +1,108 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgeqrf computes the QR factorization of the m×n matrix A using a blocked
+// algorithm. See the documentation for Dgeqr2 for a description of the
+// parameters at entry and exit.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// The length of work must be at least max(1, lwork) and lwork must be -1
+// or at least n, otherwise this function will panic.
+// Dgeqrf is a blocked QR factorization, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgeqrf,
+// the optimal work length will be stored into work[0].
+//
+// tau must have length at least min(m,n), and this function will panic otherwise.
+func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, n) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		work[0] = 1
+		return
+	}
+
+	// nb is the optimal blocksize, i.e. the number of columns transformed at a time.
+	nb := impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
+	if lwork == -1 {
+		work[0] = float64(n * nb)
+		return
+	}
+
+	if len(a) < (m-1)*lda+n {
+		panic(shortA)
+	}
+	if len(tau) < k {
+		panic(shortTau)
+	}
+
+	nbmin := 2 // Minimal block size.
+	var nx int // Use unblocked (unless changed in the next for loop)
+	iws := n
+	// Only consider blocked if the suggested block size is > 1 and the
+	// number of rows or columns is sufficiently large.
+	if 1 < nb && nb < k {
+		// nx is the block size at which the code switches from blocked
+		// to unblocked.
+		nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1))
+		if k > nx {
+			iws = n * nb
+			if lwork < iws {
+				// Not enough workspace to use the optimal block
+				// size. Get the minimum block size instead.
+				nb = lwork / n
+				nbmin = max(2, impl.Ilaenv(2, "DGEQRF", " ", m, n, -1, -1))
+			}
+		}
+	}
+
+	// Compute QR using a blocked algorithm.
+	var i int
+	if nbmin <= nb && nb < k && nx < k {
+		ldwork := nb
+		for i = 0; i < k-nx; i += nb {
+			ib := min(k-i, nb)
+			// Compute the QR factorization of the current block.
+			impl.Dgeqr2(m-i, ib, a[i*lda+i:], lda, tau[i:], work)
+			if i+ib < n {
+				// Form the triangular factor of the block reflector and apply H^T
+				// In Dlarft, work becomes the T matrix.
+				impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+				impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
+					m-i, n-i-ib, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[i*lda+i+ib:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+	}
+	// Call unblocked code on the remaining columns.
+	if i < k {
+		impl.Dgeqr2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerq2.go
@ -0,0 +1,68 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dgerq2 computes an RQ factorization of the m×n matrix A,
+//  A = R * Q.
+// On exit, if m <= n, the upper triangle of the subarray
+// A[0:m, n-m:n] contains the m×m upper triangular matrix R.
+// If m >= n, the elements on and above the (m-n)-th subdiagonal
+// contain the m×n upper trapezoidal matrix R.
+// The remaining elements, with tau, represent the
+// orthogonal matrix Q as a product of min(m,n) elementary
+// reflectors.
+//
+// The matrix Q is represented as a product of elementary reflectors
+//  Q = H_0 H_1 . . . H_{min(m,n)-1}.
+// Each H(i) has the form
+//  H_i = I - tau_i * v * v^T
+// where v is a vector with v[0:n-k+i-1] stored in A[m-k+i, 0:n-k+i-1],
+// v[n-k+i:n] = 0 and v[n-k+i] = 1.
+//
+// tau must have length min(m,n) and work must have length m, otherwise
+// Dgerq2 will panic.
+//
+// Dgerq2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgerq2(m, n int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case len(work) < m:
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	}
+
+	for i := k - 1; i >= 0; i-- {
+		// Generate elementary reflector H[i] to annihilate
+		// A[m-k+i, 0:n-k+i-1].
+		mki := m - k + i
+		nki := n - k + i
+		var aii float64
+		aii, tau[i] = impl.Dlarfg(nki+1, a[mki*lda+nki], a[mki*lda:], 1)
+
+		// Apply H[i] to A[0:m-k+i-1, 0:n-k+i] from the right.
+		a[mki*lda+nki] = 1
+		impl.Dlarf(blas.Right, mki, nki+1, a[mki*lda:], 1, tau[i], a, lda, work)
+		a[mki*lda+nki] = aii
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgerqf.go
@ -0,0 +1,129 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dgerqf computes an RQ factorization of the m×n matrix A,
+//  A = R * Q.
+// On exit, if m <= n, the upper triangle of the subarray
+// A[0:m, n-m:n] contains the m×m upper triangular matrix R.
+// If m >= n, the elements on and above the (m-n)-th subdiagonal
+// contain the m×n upper trapezoidal matrix R.
+// The remaining elements, with tau, represent the
+// orthogonal matrix Q as a product of min(m,n) elementary
+// reflectors.
+//
+// The matrix Q is represented as a product of elementary reflectors
+//  Q = H_0 H_1 . . . H_{min(m,n)-1}.
+// Each H(i) has the form
+//  H_i = I - tau_i * v * v^T
+// where v is a vector with v[0:n-k+i-1] stored in A[m-k+i, 0:n-k+i-1],
+// v[n-k+i:n] = 0 and v[n-k+i] = 1.
+//
+// tau must have length min(m,n), work must have length max(1, lwork),
+// and lwork must be -1 or at least max(1, m), otherwise Dgerqf will panic.
+// On exit, work[0] will contain the optimal length for work.
+//
+// Dgerqf is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dgerqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, m) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	k := min(m, n)
+	if k == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DGERQF", " ", m, n, -1, -1)
+	if lwork == -1 {
+		work[0] = float64(m * nb)
+		return
+	}
+
+	if len(a) < (m-1)*lda+n {
+		panic(shortA)
+	}
+	if len(tau) != k {
+		panic(badLenTau)
+	}
+
+	nbmin := 2
+	nx := 1
+	iws := m
+	var ldwork int
+	if 1 < nb && nb < k {
+		// Determine when to cross over from blocked to unblocked code.
+		nx = max(0, impl.Ilaenv(3, "DGERQF", " ", m, n, -1, -1))
+		if nx < k {
+			// Determine whether workspace is large enough for blocked code.
+			iws = m * nb
+			if lwork < iws {
+				// Not enough workspace to use optimal nb. Reduce
+				// nb and determine the minimum value of nb.
+				nb = lwork / m
+				nbmin = max(2, impl.Ilaenv(2, "DGERQF", " ", m, n, -1, -1))
+			}
+			ldwork = nb
+		}
+	}
+
+	var mu, nu int
+	if nbmin <= nb && nb < k && nx < k {
+		// Use blocked code initially.
+		// The last kk rows are handled by the block method.
+		ki := ((k - nx - 1) / nb) * nb
+		kk := min(k, ki+nb)
+
+		var i int
+		for i = k - kk + ki; i >= k-kk; i -= nb {
+			ib := min(k-i, nb)
+
+			// Compute the RQ factorization of the current block
+			// A[m-k+i:m-k+i+ib-1, 0:n-k+i+ib-1].
+			impl.Dgerq2(ib, n-k+i+ib, a[(m-k+i)*lda:], lda, tau[i:], work)
+			if m-k+i > 0 {
+				// Form the triangular factor of the block reflector
+				// H = H_{i+ib-1} . . . H_{i+1} H_i.
+				impl.Dlarft(lapack.Backward, lapack.RowWise,
+					n-k+i+ib, ib, a[(m-k+i)*lda:], lda, tau[i:],
+					work, ldwork)
+
+				// Apply H to A[0:m-k+i-1, 0:n-k+i+ib-1] from the right.
+				impl.Dlarfb(blas.Right, blas.NoTrans, lapack.Backward, lapack.RowWise,
+					m-k+i, n-k+i+ib, ib, a[(m-k+i)*lda:], lda,
+					work, ldwork,
+					a, lda,
+					work[ib*ldwork:], ldwork)
+			}
+		}
+		mu = m - k + i + nb
+		nu = n - k + i + nb
+	} else {
+		mu = m
+		nu = n
+	}
+
+	// Use unblocked code to factor the last or only block.
+	if mu > 0 && nu > 0 {
+		impl.Dgerq2(mu, nu, a, lda, tau, work)
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgesvd.go
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetf2.go
@ -0,0 +1,84 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgetf2 computes the LU decomposition of the m×n matrix A.
+// The LU decomposition is a factorization of a into
+//  A = P * L * U
+// where P is a permutation matrix, L is a unit lower triangular matrix, and
+// U is a (usually) non-unit upper triangular matrix. On exit, L and U are stored
+// in place into a.
+//
+// ipiv is a permutation vector. It indicates that row i of the matrix was
+// changed with ipiv[i]. ipiv must have length at least min(m,n), and will panic
+// otherwise. ipiv is zero-indexed.
+//
+// Dgetf2 returns whether the matrix A is singular. The LU decomposition will
+// be computed regardless of the singularity of A, but division by zero
+// will occur if the false is returned and the result is used to solve a
+// system of equations.
+//
+// Dgetf2 is an internal routine. It is exported for testing purposes.
+func (Implementation) Dgetf2(m, n int, a []float64, lda int, ipiv []int) (ok bool) {
+	mn := min(m, n)
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if mn == 0 {
+		return true
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(ipiv) != mn:
+		panic(badLenIpiv)
+	}
+
+	bi := blas64.Implementation()
+
+	sfmin := dlamchS
+	ok = true
+	for j := 0; j < mn; j++ {
+		// Find a pivot and test for singularity.
+		jp := j + bi.Idamax(m-j, a[j*lda+j:], lda)
+		ipiv[j] = jp
+		if a[jp*lda+j] == 0 {
+			ok = false
+		} else {
+			// Swap the rows if necessary.
+			if jp != j {
+				bi.Dswap(n, a[j*lda:], 1, a[jp*lda:], 1)
+			}
+			if j < m-1 {
+				aj := a[j*lda+j]
+				if math.Abs(aj) >= sfmin {
+					bi.Dscal(m-j-1, 1/aj, a[(j+1)*lda+j:], lda)
+				} else {
+					for i := 0; i < m-j-1; i++ {
+						a[(j+1)*lda+j] = a[(j+1)*lda+j] / a[lda*j+j]
+					}
+				}
+			}
+		}
+		if j < mn-1 {
+			bi.Dger(m-j-1, n-j-1, -1, a[(j+1)*lda+j:], lda, a[j*lda+j+1:], 1, a[(j+1)*lda+j+1:], lda)
+		}
+	}
+	return ok
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrf.go
@ -0,0 +1,85 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgetrf computes the LU decomposition of the m×n matrix A.
+// The LU decomposition is a factorization of A into
+//  A = P * L * U
+// where P is a permutation matrix, L is a unit lower triangular matrix, and
+// U is a (usually) non-unit upper triangular matrix. On exit, L and U are stored
+// in place into a.
+//
+// ipiv is a permutation vector. It indicates that row i of the matrix was
+// changed with ipiv[i]. ipiv must have length at least min(m,n), and will panic
+// otherwise. ipiv is zero-indexed.
+//
+// Dgetrf is the blocked version of the algorithm.
+//
+// Dgetrf returns whether the matrix A is singular. The LU decomposition will
+// be computed regardless of the singularity of A, but division by zero
+// will occur if the false is returned and the result is used to solve a
+// system of equations.
+func (impl Implementation) Dgetrf(m, n int, a []float64, lda int, ipiv []int) (ok bool) {
+	mn := min(m, n)
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if mn == 0 {
+		return true
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(ipiv) != mn:
+		panic(badLenIpiv)
+	}
+
+	bi := blas64.Implementation()
+
+	nb := impl.Ilaenv(1, "DGETRF", " ", m, n, -1, -1)
+	if nb <= 1 || mn <= nb {
+		// Use the unblocked algorithm.
+		return impl.Dgetf2(m, n, a, lda, ipiv)
+	}
+	ok = true
+	for j := 0; j < mn; j += nb {
+		jb := min(mn-j, nb)
+		blockOk := impl.Dgetf2(m-j, jb, a[j*lda+j:], lda, ipiv[j:j+jb])
+		if !blockOk {
+			ok = false
+		}
+		for i := j; i <= min(m-1, j+jb-1); i++ {
+			ipiv[i] = j + ipiv[i]
+		}
+		impl.Dlaswp(j, a, lda, j, j+jb-1, ipiv[:j+jb], 1)
+		if j+jb < n {
+			impl.Dlaswp(n-j-jb, a[j+jb:], lda, j, j+jb-1, ipiv[:j+jb], 1)
+			bi.Dtrsm(blas.Left, blas.Lower, blas.NoTrans, blas.Unit,
+				jb, n-j-jb, 1,
+				a[j*lda+j:], lda,
+				a[j*lda+j+jb:], lda)
+			if j+jb < m {
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, m-j-jb, n-j-jb, jb, -1,
+					a[(j+jb)*lda+j:], lda,
+					a[j*lda+j+jb:], lda,
+					1, a[(j+jb)*lda+j+jb:], lda)
+			}
+		}
+	}
+	return ok
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetri.go
@ -0,0 +1,116 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgetri computes the inverse of the matrix A using the LU factorization computed
+// by Dgetrf. On entry, a contains the PLU decomposition of A as computed by
+// Dgetrf and on exit contains the reciprocal of the original matrix.
+//
+// Dgetri will not perform the inversion if the matrix is singular, and returns
+// a boolean indicating whether the inversion was successful.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= n and this function will panic otherwise.
+// Dgetri is a blocked inversion, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dgetri,
+// the optimal work length will be stored into work[0].
+func (impl Implementation) Dgetri(n int, a []float64, lda int, ipiv []int, work []float64, lwork int) (ok bool) {
+	iws := max(1, n)
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < iws && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	if n == 0 {
+		work[0] = 1
+		return true
+	}
+
+	nb := impl.Ilaenv(1, "DGETRI", " ", n, -1, -1, -1)
+	if lwork == -1 {
+		work[0] = float64(n * nb)
+		return true
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(ipiv) != n:
+		panic(badLenIpiv)
+	}
+
+	// Form inv(U).
+	ok = impl.Dtrtri(blas.Upper, blas.NonUnit, n, a, lda)
+	if !ok {
+		return false
+	}
+
+	nbmin := 2
+	if 1 < nb && nb < n {
+		iws = max(n*nb, 1)
+		if lwork < iws {
+			nb = lwork / n
+			nbmin = max(2, impl.Ilaenv(2, "DGETRI", " ", n, -1, -1, -1))
+		}
+	}
+	ldwork := nb
+
+	bi := blas64.Implementation()
+	// Solve the equation inv(A)*L = inv(U) for inv(A).
+	// TODO(btracey): Replace this with a more row-major oriented algorithm.
+	if nb < nbmin || n <= nb {
+		// Unblocked code.
+		for j := n - 1; j >= 0; j-- {
+			for i := j + 1; i < n; i++ {
+				// Copy current column of L to work and replace with zeros.
+				work[i] = a[i*lda+j]
+				a[i*lda+j] = 0
+			}
+			// Compute current column of inv(A).
+			if j < n-1 {
+				bi.Dgemv(blas.NoTrans, n, n-j-1, -1, a[(j+1):], lda, work[(j+1):], 1, 1, a[j:], lda)
+			}
+		}
+	} else {
+		// Blocked code.
+		nn := ((n - 1) / nb) * nb
+		for j := nn; j >= 0; j -= nb {
+			jb := min(nb, n-j)
+			// Copy current block column of L to work and replace
+			// with zeros.
+			for jj := j; jj < j+jb; jj++ {
+				for i := jj + 1; i < n; i++ {
+					work[i*ldwork+(jj-j)] = a[i*lda+jj]
+					a[i*lda+jj] = 0
+				}
+			}
+			// Compute current block column of inv(A).
+			if j+jb < n {
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, n, jb, n-j-jb, -1, a[(j+jb):], lda, work[(j+jb)*ldwork:], ldwork, 1, a[j:], lda)
+			}
+			bi.Dtrsm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, n, jb, 1, work[j*ldwork:], ldwork, a[j:], lda)
+		}
+	}
+	// Apply column interchanges.
+	for j := n - 2; j >= 0; j-- {
+		jp := ipiv[j]
+		if jp != j {
+			bi.Dswap(n, a[j:], lda, a[jp:], lda)
+		}
+	}
+	work[0] = float64(iws)
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dgetrs.go
@ -0,0 +1,72 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dgetrs solves a system of equations using an LU factorization.
+// The system of equations solved is
+//  A * X = B if trans == blas.Trans
+//  A^T * X = B if trans == blas.NoTrans
+// A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs.
+//
+// On entry b contains the elements of the matrix B. On exit, b contains the
+// elements of X, the solution to the system of equations.
+//
+// a and ipiv contain the LU factorization of A and the permutation indices as
+// computed by Dgetrf. ipiv is zero-indexed.
+func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64, lda int, ipiv []int, b []float64, ldb int) {
+	switch {
+	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
+		panic(badTrans)
+	case n < 0:
+		panic(nLT0)
+	case nrhs < 0:
+		panic(nrhsLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldb < max(1, nrhs):
+		panic(badLdB)
+	}
+
+	// Quick return if possible.
+	if n == 0 || nrhs == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(b) < (n-1)*ldb+nrhs:
+		panic(shortB)
+	case len(ipiv) != n:
+		panic(badLenIpiv)
+	}
+
+	bi := blas64.Implementation()
+
+	if trans == blas.NoTrans {
+		// Solve A * X = B.
+		impl.Dlaswp(nrhs, b, ldb, 0, n-1, ipiv, 1)
+		// Solve L * X = B, updating b.
+		bi.Dtrsm(blas.Left, blas.Lower, blas.NoTrans, blas.Unit,
+			n, nrhs, 1, a, lda, b, ldb)
+		// Solve U * X = B, updating b.
+		bi.Dtrsm(blas.Left, blas.Upper, blas.NoTrans, blas.NonUnit,
+			n, nrhs, 1, a, lda, b, ldb)
+		return
+	}
+	// Solve A^T * X = B.
+	// Solve U^T * X = B, updating b.
+	bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit,
+		n, nrhs, 1, a, lda, b, ldb)
+	// Solve L^T * X = B, updating b.
+	bi.Dtrsm(blas.Left, blas.Lower, blas.Trans, blas.Unit,
+		n, nrhs, 1, a, lda, b, ldb)
+	impl.Dlaswp(nrhs, b, ldb, 0, n-1, ipiv, -1)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvd3.go
@ -0,0 +1,242 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dggsvd3 computes the generalized singular value decomposition (GSVD)
+// of an m×n matrix A and p×n matrix B:
+//  U^T*A*Q = D1*[ 0 R ]
+//
+//  V^T*B*Q = D2*[ 0 R ]
+// where U, V and Q are orthogonal matrices.
+//
+// Dggsvd3 returns k and l, the dimensions of the sub-blocks. k+l
+// is the effective numerical rank of the (m+p)×n matrix [ A^T B^T ]^T.
+// R is a (k+l)×(k+l) nonsingular upper triangular matrix, D1 and
+// D2 are m×(k+l) and p×(k+l) diagonal matrices and of the following
+// structures, respectively:
+//
+// If m-k-l >= 0,
+//
+//                    k  l
+//       D1 =     k [ I  0 ]
+//                l [ 0  C ]
+//            m-k-l [ 0  0 ]
+//
+//                  k  l
+//       D2 = l   [ 0  S ]
+//            p-l [ 0  0 ]
+//
+//               n-k-l  k    l
+//  [ 0 R ] = k [  0   R11  R12 ] k
+//            l [  0    0   R22 ] l
+//
+// where
+//
+//  C = diag( alpha_k, ... , alpha_{k+l} ),
+//  S = diag( beta_k,  ... , beta_{k+l} ),
+//  C^2 + S^2 = I.
+//
+// R is stored in
+//  A[0:k+l, n-k-l:n]
+// on exit.
+//
+// If m-k-l < 0,
+//
+//                 k m-k k+l-m
+//      D1 =   k [ I  0    0  ]
+//           m-k [ 0  C    0  ]
+//
+//                   k m-k k+l-m
+//      D2 =   m-k [ 0  S    0  ]
+//           k+l-m [ 0  0    I  ]
+//             p-l [ 0  0    0  ]
+//
+//                 n-k-l  k   m-k  k+l-m
+//  [ 0 R ] =    k [ 0    R11  R12  R13 ]
+//             m-k [ 0     0   R22  R23 ]
+//           k+l-m [ 0     0    0   R33 ]
+//
+// where
+//  C = diag( alpha_k, ... , alpha_m ),
+//  S = diag( beta_k,  ... , beta_m ),
+//  C^2 + S^2 = I.
+//
+//  R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n]
+//      [  0  R22 R23 ]
+// and R33 is stored in
+//  B[m-k:l, n+m-k-l:n] on exit.
+//
+// Dggsvd3 computes C, S, R, and optionally the orthogonal transformation
+// matrices U, V and Q.
+//
+// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
+// is as follows
+//  jobU == lapack.GSVDU        Compute orthogonal matrix U
+//  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
+// The behavior is the same for jobV and jobQ with the exception that instead of
+// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
+// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
+// relevant job parameter is lapack.GSVDNone.
+//
+// alpha and beta must have length n or Dggsvd3 will panic. On exit, alpha and
+// beta contain the generalized singular value pairs of A and B
+//   alpha[0:k] = 1,
+//   beta[0:k]  = 0,
+// if m-k-l >= 0,
+//   alpha[k:k+l] = diag(C),
+//   beta[k:k+l]  = diag(S),
+// if m-k-l < 0,
+//   alpha[k:m]= C, alpha[m:k+l]= 0
+//   beta[k:m] = S, beta[m:k+l] = 1.
+// if k+l < n,
+//   alpha[k+l:n] = 0 and
+//   beta[k+l:n]  = 0.
+//
+// On exit, iwork contains the permutation required to sort alpha descending.
+//
+// iwork must have length n, work must have length at least max(1, lwork), and
+// lwork must be -1 or greater than n, otherwise Dggsvd3 will panic. If
+// lwork is -1, work[0] holds the optimal lwork on return, but Dggsvd3 does
+// not perform the GSVD.
+func (impl Implementation) Dggsvd3(jobU, jobV, jobQ lapack.GSVDJob, m, n, p int, a []float64, lda int, b []float64, ldb int, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64, lwork int, iwork []int) (k, l int, ok bool) {
+	wantu := jobU == lapack.GSVDU
+	wantv := jobV == lapack.GSVDV
+	wantq := jobQ == lapack.GSVDQ
+	switch {
+	case !wantu && jobU != lapack.GSVDNone:
+		panic(badGSVDJob + "U")
+	case !wantv && jobV != lapack.GSVDNone:
+		panic(badGSVDJob + "V")
+	case !wantq && jobQ != lapack.GSVDNone:
+		panic(badGSVDJob + "Q")
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case p < 0:
+		panic(pLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldb < max(1, n):
+		panic(badLdB)
+	case ldu < 1, wantu && ldu < m:
+		panic(badLdU)
+	case ldv < 1, wantv && ldv < p:
+		panic(badLdV)
+	case ldq < 1, wantq && ldq < n:
+		panic(badLdQ)
+	case len(iwork) < n:
+		panic(shortWork)
+	case lwork < 1 && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Determine optimal work length.
+	impl.Dggsvp3(jobU, jobV, jobQ,
+		m, p, n,
+		a, lda,
+		b, ldb,
+		0, 0,
+		u, ldu,
+		v, ldv,
+		q, ldq,
+		iwork,
+		work, work, -1)
+	lwkopt := n + int(work[0])
+	lwkopt = max(lwkopt, 2*n)
+	lwkopt = max(lwkopt, 1)
+	work[0] = float64(lwkopt)
+	if lwork == -1 {
+		return 0, 0, true
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(b) < (p-1)*ldb+n:
+		panic(shortB)
+	case wantu && len(u) < (m-1)*ldu+m:
+		panic(shortU)
+	case wantv && len(v) < (p-1)*ldv+p:
+		panic(shortV)
+	case wantq && len(q) < (n-1)*ldq+n:
+		panic(shortQ)
+	case len(alpha) != n:
+		panic(badLenAlpha)
+	case len(beta) != n:
+		panic(badLenBeta)
+	}
+
+	// Compute the Frobenius norm of matrices A and B.
+	anorm := impl.Dlange(lapack.Frobenius, m, n, a, lda, nil)
+	bnorm := impl.Dlange(lapack.Frobenius, p, n, b, ldb, nil)
+
+	// Get machine precision and set up threshold for determining
+	// the effective numerical rank of the matrices A and B.
+	tola := float64(max(m, n)) * math.Max(anorm, dlamchS) * dlamchP
+	tolb := float64(max(p, n)) * math.Max(bnorm, dlamchS) * dlamchP
+
+	// Preprocessing.
+	k, l = impl.Dggsvp3(jobU, jobV, jobQ,
+		m, p, n,
+		a, lda,
+		b, ldb,
+		tola, tolb,
+		u, ldu,
+		v, ldv,
+		q, ldq,
+		iwork,
+		work[:n], work[n:], lwork-n)
+
+	// Compute the GSVD of two upper "triangular" matrices.
+	_, ok = impl.Dtgsja(jobU, jobV, jobQ,
+		m, p, n,
+		k, l,
+		a, lda,
+		b, ldb,
+		tola, tolb,
+		alpha, beta,
+		u, ldu,
+		v, ldv,
+		q, ldq,
+		work)
+
+	// Sort the singular values and store the pivot indices in iwork
+	// Copy alpha to work, then sort alpha in work.
+	bi := blas64.Implementation()
+	bi.Dcopy(n, alpha, 1, work[:n], 1)
+	ibnd := min(l, m-k)
+	for i := 0; i < ibnd; i++ {
+		// Scan for largest alpha_{k+i}.
+		isub := i
+		smax := work[k+i]
+		for j := i + 1; j < ibnd; j++ {
+			if v := work[k+j]; v > smax {
+				isub = j
+				smax = v
+			}
+		}
+		if isub != i {
+			work[k+isub] = work[k+i]
+			work[k+i] = smax
+			iwork[k+i] = k + isub
+		} else {
+			iwork[k+i] = k + i
+		}
+	}
+
+	work[0] = float64(lwkopt)
+
+	return k, l, ok
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dggsvp3.go
@ -0,0 +1,281 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dggsvp3 computes orthogonal matrices U, V and Q such that
+//
+//                  n-k-l  k    l
+//  U^T*A*Q =    k [ 0    A12  A13 ] if m-k-l >= 0;
+//               l [ 0     0   A23 ]
+//           m-k-l [ 0     0    0  ]
+//
+//                  n-k-l  k    l
+//  U^T*A*Q =    k [ 0    A12  A13 ] if m-k-l < 0;
+//             m-k [ 0     0   A23 ]
+//
+//                  n-k-l  k    l
+//  V^T*B*Q =    l [ 0     0   B13 ]
+//             p-l [ 0     0    0  ]
+//
+// where the k×k matrix A12 and l×l matrix B13 are non-singular
+// upper triangular. A23 is l×l upper triangular if m-k-l >= 0,
+// otherwise A23 is (m-k)×l upper trapezoidal.
+//
+// Dggsvp3 returns k and l, the dimensions of the sub-blocks. k+l
+// is the effective numerical rank of the (m+p)×n matrix [ A^T B^T ]^T.
+//
+// jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
+// is as follows
+//  jobU == lapack.GSVDU        Compute orthogonal matrix U
+//  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
+// The behavior is the same for jobV and jobQ with the exception that instead of
+// lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
+// The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
+// relevant job parameter is lapack.GSVDNone.
+//
+// tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz
+// iteration procedure. Generally, they are the same as used in the preprocessing
+// step, for example,
+//  tola = max(m, n)*norm(A)*eps,
+//  tolb = max(p, n)*norm(B)*eps.
+// Where eps is the machine epsilon.
+//
+// iwork must have length n, work must have length at least max(1, lwork), and
+// lwork must be -1 or greater than zero, otherwise Dggsvp3 will panic.
+//
+// Dggsvp3 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dggsvp3(jobU, jobV, jobQ lapack.GSVDJob, m, p, n int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, iwork []int, tau, work []float64, lwork int) (k, l int) {
+	wantu := jobU == lapack.GSVDU
+	wantv := jobV == lapack.GSVDV
+	wantq := jobQ == lapack.GSVDQ
+	switch {
+	case !wantu && jobU != lapack.GSVDNone:
+		panic(badGSVDJob + "U")
+	case !wantv && jobV != lapack.GSVDNone:
+		panic(badGSVDJob + "V")
+	case !wantq && jobQ != lapack.GSVDNone:
+		panic(badGSVDJob + "Q")
+	case m < 0:
+		panic(mLT0)
+	case p < 0:
+		panic(pLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldb < max(1, n):
+		panic(badLdB)
+	case ldu < 1, wantu && ldu < m:
+		panic(badLdU)
+	case ldv < 1, wantv && ldv < p:
+		panic(badLdV)
+	case ldq < 1, wantq && ldq < n:
+		panic(badLdQ)
+	case len(iwork) != n:
+		panic(shortWork)
+	case lwork < 1 && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	var lwkopt int
+	impl.Dgeqp3(p, n, b, ldb, iwork, tau, work, -1)
+	lwkopt = int(work[0])
+	if wantv {
+		lwkopt = max(lwkopt, p)
+	}
+	lwkopt = max(lwkopt, min(n, p))
+	lwkopt = max(lwkopt, m)
+	if wantq {
+		lwkopt = max(lwkopt, n)
+	}
+	impl.Dgeqp3(m, n, a, lda, iwork, tau, work, -1)
+	lwkopt = max(lwkopt, int(work[0]))
+	lwkopt = max(1, lwkopt)
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return 0, 0
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(b) < (p-1)*ldb+n:
+		panic(shortB)
+	case wantu && len(u) < (m-1)*ldu+m:
+		panic(shortU)
+	case wantv && len(v) < (p-1)*ldv+p:
+		panic(shortV)
+	case wantq && len(q) < (n-1)*ldq+n:
+		panic(shortQ)
+	case len(tau) < n:
+		// tau check must come after lwkopt query since
+		// the Dggsvd3 call for lwkopt query may have
+		// lwork == -1, and tau is provided by work.
+		panic(shortTau)
+	}
+
+	const forward = true
+
+	// QR with column pivoting of B: B*P = V*[ S11 S12 ].
+	//                                       [  0   0  ]
+	for i := range iwork[:n] {
+		iwork[i] = 0
+	}
+	impl.Dgeqp3(p, n, b, ldb, iwork, tau, work, lwork)
+
+	// Update A := A*P.
+	impl.Dlapmt(forward, m, n, a, lda, iwork)
+
+	// Determine the effective rank of matrix B.
+	for i := 0; i < min(p, n); i++ {
+		if math.Abs(b[i*ldb+i]) > tolb {
+			l++
+		}
+	}
+
+	if wantv {
+		// Copy the details of V, and form V.
+		impl.Dlaset(blas.All, p, p, 0, 0, v, ldv)
+		if p > 1 {
+			impl.Dlacpy(blas.Lower, p-1, min(p, n), b[ldb:], ldb, v[ldv:], ldv)
+		}
+		impl.Dorg2r(p, p, min(p, n), v, ldv, tau, work)
+	}
+
+	// Clean up B.
+	for i := 1; i < l; i++ {
+		r := b[i*ldb : i*ldb+i]
+		for j := range r {
+			r[j] = 0
+		}
+	}
+	if p > l {
+		impl.Dlaset(blas.All, p-l, n, 0, 0, b[l*ldb:], ldb)
+	}
+
+	if wantq {
+		// Set Q = I and update Q := Q*P.
+		impl.Dlaset(blas.All, n, n, 0, 1, q, ldq)
+		impl.Dlapmt(forward, n, n, q, ldq, iwork)
+	}
+
+	if p >= l && n != l {
+		// RQ factorization of [ S11 S12 ]: [ S11 S12 ] = [ 0 S12 ]*Z.
+		impl.Dgerq2(l, n, b, ldb, tau, work)
+
+		// Update A := A*Z^T.
+		impl.Dormr2(blas.Right, blas.Trans, m, n, l, b, ldb, tau, a, lda, work)
+
+		if wantq {
+			// Update Q := Q*Z^T.
+			impl.Dormr2(blas.Right, blas.Trans, n, n, l, b, ldb, tau, q, ldq, work)
+		}
+
+		// Clean up B.
+		impl.Dlaset(blas.All, l, n-l, 0, 0, b, ldb)
+		for i := 1; i < l; i++ {
+			r := b[i*ldb+n-l : i*ldb+i+n-l]
+			for j := range r {
+				r[j] = 0
+			}
+		}
+	}
+
+	// Let              N-L     L
+	//            A = [ A11    A12 ] M,
+	//
+	// then the following does the complete QR decomposition of A11:
+	//
+	//          A11 = U*[  0  T12 ]*P1^T.
+	//                  [  0   0  ]
+	for i := range iwork[:n-l] {
+		iwork[i] = 0
+	}
+	impl.Dgeqp3(m, n-l, a, lda, iwork[:n-l], tau, work, lwork)
+
+	// Determine the effective rank of A11.
+	for i := 0; i < min(m, n-l); i++ {
+		if math.Abs(a[i*lda+i]) > tola {
+			k++
+		}
+	}
+
+	// Update A12 := U^T*A12, where A12 = A[0:m, n-l:n].
+	impl.Dorm2r(blas.Left, blas.Trans, m, l, min(m, n-l), a, lda, tau, a[n-l:], lda, work)
+
+	if wantu {
+		// Copy the details of U, and form U.
+		impl.Dlaset(blas.All, m, m, 0, 0, u, ldu)
+		if m > 1 {
+			impl.Dlacpy(blas.Lower, m-1, min(m, n-l), a[lda:], lda, u[ldu:], ldu)
+		}
+		impl.Dorg2r(m, m, min(m, n-l), u, ldu, tau, work)
+	}
+
+	if wantq {
+		// Update Q[0:n, 0:n-l] := Q[0:n, 0:n-l]*P1.
+		impl.Dlapmt(forward, n, n-l, q, ldq, iwork[:n-l])
+	}
+
+	// Clean up A: set the strictly lower triangular part of
+	// A[0:k, 0:k] = 0, and A[k:m, 0:n-l] = 0.
+	for i := 1; i < k; i++ {
+		r := a[i*lda : i*lda+i]
+		for j := range r {
+			r[j] = 0
+		}
+	}
+	if m > k {
+		impl.Dlaset(blas.All, m-k, n-l, 0, 0, a[k*lda:], lda)
+	}
+
+	if n-l > k {
+		// RQ factorization of [ T11 T12 ] = [ 0 T12 ]*Z1.
+		impl.Dgerq2(k, n-l, a, lda, tau, work)
+
+		if wantq {
+			// Update Q[0:n, 0:n-l] := Q[0:n, 0:n-l]*Z1^T.
+			impl.Dorm2r(blas.Right, blas.Trans, n, n-l, k, a, lda, tau, q, ldq, work)
+		}
+
+		// Clean up A.
+		impl.Dlaset(blas.All, k, n-l-k, 0, 0, a, lda)
+		for i := 1; i < k; i++ {
+			r := a[i*lda+n-k-l : i*lda+i+n-k-l]
+			for j := range r {
+				a[j] = 0
+			}
+		}
+	}
+
+	if m > k {
+		// QR factorization of A[k:m, n-l:n].
+		impl.Dgeqr2(m-k, l, a[k*lda+n-l:], lda, tau, work)
+		if wantu {
+			// Update U[:, k:m) := U[:, k:m]*U1.
+			impl.Dorm2r(blas.Right, blas.NoTrans, m, m-k, min(m-k, l), a[k*lda+n-l:], lda, tau, u[k:], ldu, work)
+		}
+
+		// Clean up A.
+		for i := k + 1; i < m; i++ {
+			r := a[i*lda+n-l : i*lda+min(n-l+i-k, n)]
+			for j := range r {
+				r[j] = 0
+			}
+		}
+	}
+
+	work[0] = float64(lwkopt)
+	return k, l
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dhseqr.go
@ -0,0 +1,252 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dhseqr computes the eigenvalues of an n×n Hessenberg matrix H and,
+// optionally, the matrices T and Z from the Schur decomposition
+//  H = Z T Z^T,
+// where T is an n×n upper quasi-triangular matrix (the Schur form), and Z is
+// the n×n orthogonal matrix of Schur vectors.
+//
+// Optionally Z may be postmultiplied into an input orthogonal matrix Q so that
+// this routine can give the Schur factorization of a matrix A which has been
+// reduced to the Hessenberg form H by the orthogonal matrix Q:
+//  A = Q H Q^T = (QZ) T (QZ)^T.
+//
+// If job == lapack.EigenvaluesOnly, only the eigenvalues will be computed.
+// If job == lapack.EigenvaluesAndSchur, the eigenvalues and the Schur form T will
+// be computed.
+// For other values of job Dhseqr will panic.
+//
+// If compz == lapack.SchurNone, no Schur vectors will be computed and Z will not be
+// referenced.
+// If compz == lapack.SchurHess, on return Z will contain the matrix of Schur
+// vectors of H.
+// If compz == lapack.SchurOrig, on entry z is assumed to contain the orthogonal
+// matrix Q that is the identity except for the submatrix
+// Q[ilo:ihi+1,ilo:ihi+1]. On return z will be updated to the product Q*Z.
+//
+// ilo and ihi determine the block of H on which Dhseqr operates. It is assumed
+// that H is already upper triangular in rows and columns [0:ilo] and [ihi+1:n],
+// although it will be only checked that the block is isolated, that is,
+//  ilo == 0   or H[ilo,ilo-1] == 0,
+//  ihi == n-1 or H[ihi+1,ihi] == 0,
+// and Dhseqr will panic otherwise. ilo and ihi are typically set by a previous
+// call to Dgebal, otherwise they should be set to 0 and n-1, respectively. It
+// must hold that
+//  0 <= ilo <= ihi < n,     if n > 0,
+//  ilo == 0 and ihi == -1,  if n == 0.
+//
+// wr and wi must have length n.
+//
+// work must have length at least lwork and lwork must be at least max(1,n)
+// otherwise Dhseqr will panic. The minimum lwork delivers very good and
+// sometimes optimal performance, although lwork as large as 11*n may be
+// required. On return, work[0] will contain the optimal value of lwork.
+//
+// If lwork is -1, instead of performing Dhseqr, the function only estimates the
+// optimal workspace size and stores it into work[0]. Neither h nor z are
+// accessed.
+//
+// unconverged indicates whether Dhseqr computed all the eigenvalues.
+//
+// If unconverged == 0, all the eigenvalues have been computed and their real
+// and imaginary parts will be stored on return in wr and wi, respectively. If
+// two eigenvalues are computed as a complex conjugate pair, they are stored in
+// consecutive elements of wr and wi, say the i-th and (i+1)th, with wi[i] > 0
+// and wi[i+1] < 0.
+//
+// If unconverged == 0 and job == lapack.EigenvaluesAndSchur, on return H will
+// contain the upper quasi-triangular matrix T from the Schur decomposition (the
+// Schur form). 2×2 diagonal blocks (corresponding to complex conjugate pairs of
+// eigenvalues) will be returned in standard form, with
+//  H[i,i] == H[i+1,i+1],
+// and
+//  H[i+1,i]*H[i,i+1] < 0.
+// The eigenvalues will be stored in wr and wi in the same order as on the
+// diagonal of the Schur form returned in H, with
+//  wr[i] = H[i,i],
+// and, if H[i:i+2,i:i+2] is a 2×2 diagonal block,
+//  wi[i]   = sqrt(-H[i+1,i]*H[i,i+1]),
+//  wi[i+1] = -wi[i].
+//
+// If unconverged == 0 and job == lapack.EigenvaluesOnly, the contents of h
+// on return is unspecified.
+//
+// If unconverged > 0, some eigenvalues have not converged, and the blocks
+// [0:ilo] and [unconverged:n] of wr and wi will contain those eigenvalues which
+// have been successfully computed. Failures are rare.
+//
+// If unconverged > 0 and job == lapack.EigenvaluesOnly, on return the
+// remaining unconverged eigenvalues are the eigenvalues of the upper Hessenberg
+// matrix H[ilo:unconverged,ilo:unconverged].
+//
+// If unconverged > 0 and job == lapack.EigenvaluesAndSchur, then on
+// return
+//  (initial H) U = U (final H),   (*)
+// where U is an orthogonal matrix. The final H is upper Hessenberg and
+// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
+//
+// If unconverged > 0 and compz == lapack.SchurOrig, then on return
+//  (final Z) = (initial Z) U,
+// where U is the orthogonal matrix in (*) regardless of the value of job.
+//
+// If unconverged > 0 and compz == lapack.SchurHess, then on return
+//  (final Z) = U,
+// where U is the orthogonal matrix in (*) regardless of the value of job.
+//
+// References:
+//  [1] R. Byers. LAPACK 3.1 xHSEQR: Tuning and Implementation Notes on the
+//      Small Bulge Multi-Shift QR Algorithm with Aggressive Early Deflation.
+//      LAPACK Working Note 187 (2007)
+//      URL: http://www.netlib.org/lapack/lawnspdf/lawn187.pdf
+//  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
+//      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
+//      Anal. Appl. 23(4) (2002), pp. 929—947
+//      URL: http://dx.doi.org/10.1137/S0895479801384573
+//  [3] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
+//      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
+//      URL: http://dx.doi.org/10.1137/S0895479801384585
+//
+// Dhseqr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dhseqr(job lapack.SchurJob, compz lapack.SchurComp, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, z []float64, ldz int, work []float64, lwork int) (unconverged int) {
+	wantt := job == lapack.EigenvaluesAndSchur
+	wantz := compz == lapack.SchurHess || compz == lapack.SchurOrig
+
+	switch {
+	case job != lapack.EigenvaluesOnly && job != lapack.EigenvaluesAndSchur:
+		panic(badSchurJob)
+	case compz != lapack.SchurNone && compz != lapack.SchurHess && compz != lapack.SchurOrig:
+		panic(badSchurComp)
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(0, n-1) < ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case ldh < max(1, n):
+		panic(badLdH)
+	case ldz < 1, wantz && ldz < n:
+		panic(badLdZ)
+	case lwork < max(1, n) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return 0
+	}
+
+	// Quick return in case of a workspace query.
+	if lwork == -1 {
+		impl.Dlaqr04(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, ilo, ihi, z, ldz, work, -1, 1)
+		work[0] = math.Max(float64(n), work[0])
+		return 0
+	}
+
+	switch {
+	case len(h) < (n-1)*ldh+n:
+		panic(shortH)
+	case wantz && len(z) < (n-1)*ldz+n:
+		panic(shortZ)
+	case len(wr) < n:
+		panic(shortWr)
+	case len(wi) < n:
+		panic(shortWi)
+	}
+
+	const (
+		// Matrices of order ntiny or smaller must be processed by
+		// Dlahqr because of insufficient subdiagonal scratch space.
+		// This is a hard limit.
+		ntiny = 11
+
+		// nl is the size of a local workspace to help small matrices
+		// through a rare Dlahqr failure. nl > ntiny is required and
+		// nl <= nmin = Ilaenv(ispec=12,...) is recommended (the default
+		// value of nmin is 75). Using nl = 49 allows up to six
+		// simultaneous shifts and a 16×16 deflation window.
+		nl = 49
+	)
+
+	// Copy eigenvalues isolated by Dgebal.
+	for i := 0; i < ilo; i++ {
+		wr[i] = h[i*ldh+i]
+		wi[i] = 0
+	}
+	for i := ihi + 1; i < n; i++ {
+		wr[i] = h[i*ldh+i]
+		wi[i] = 0
+	}
+
+	// Initialize Z to identity matrix if requested.
+	if compz == lapack.SchurHess {
+		impl.Dlaset(blas.All, n, n, 0, 1, z, ldz)
+	}
+
+	// Quick return if possible.
+	if ilo == ihi {
+		wr[ilo] = h[ilo*ldh+ilo]
+		wi[ilo] = 0
+		return 0
+	}
+
+	// Dlahqr/Dlaqr04 crossover point.
+	nmin := impl.Ilaenv(12, "DHSEQR", string(job)+string(compz), n, ilo, ihi, lwork)
+	nmin = max(ntiny, nmin)
+
+	if n > nmin {
+		// Dlaqr0 for big matrices.
+		unconverged = impl.Dlaqr04(wantt, wantz, n, ilo, ihi, h, ldh, wr[:ihi+1], wi[:ihi+1],
+			ilo, ihi, z, ldz, work, lwork, 1)
+	} else {
+		// Dlahqr for small matrices.
+		unconverged = impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr[:ihi+1], wi[:ihi+1],
+			ilo, ihi, z, ldz)
+		if unconverged > 0 {
+			// A rare Dlahqr failure! Dlaqr04 sometimes succeeds
+			// when Dlahqr fails.
+			kbot := unconverged
+			if n >= nl {
+				// Larger matrices have enough subdiagonal
+				// scratch space to call Dlaqr04 directly.
+				unconverged = impl.Dlaqr04(wantt, wantz, n, ilo, kbot, h, ldh,
+					wr[:ihi+1], wi[:ihi+1], ilo, ihi, z, ldz, work, lwork, 1)
+			} else {
+				// Tiny matrices don't have enough subdiagonal
+				// scratch space to benefit from Dlaqr04. Hence,
+				// tiny matrices must be copied into a larger
+				// array before calling Dlaqr04.
+				var hl [nl * nl]float64
+				impl.Dlacpy(blas.All, n, n, h, ldh, hl[:], nl)
+				impl.Dlaset(blas.All, nl, nl-n, 0, 0, hl[n:], nl)
+				var workl [nl]float64
+				unconverged = impl.Dlaqr04(wantt, wantz, nl, ilo, kbot, hl[:], nl,
+					wr[:ihi+1], wi[:ihi+1], ilo, ihi, z, ldz, workl[:], nl, 1)
+				work[0] = workl[0]
+				if wantt || unconverged > 0 {
+					impl.Dlacpy(blas.All, n, n, hl[:], nl, h, ldh)
+				}
+			}
+		}
+	}
+	// Zero out under the first subdiagonal, if necessary.
+	if (wantt || unconverged > 0) && n > 2 {
+		impl.Dlaset(blas.Lower, n-2, n-2, 0, 0, h[2*ldh:], ldh)
+	}
+
+	work[0] = math.Max(float64(n), work[0])
+	return unconverged
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlabrd.go
@ -0,0 +1,173 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlabrd reduces the first NB rows and columns of a real general m×n matrix
+// A to upper or lower bidiagonal form by an orthogonal transformation
+//  Q**T * A * P
+// If m >= n, A is reduced to upper bidiagonal form and upon exit the elements
+// on and below the diagonal in the first nb columns represent the elementary
+// reflectors, and the elements above the diagonal in the first nb rows represent
+// the matrix P. If m < n, A is reduced to lower bidiagonal form and the elements
+// P is instead stored above the diagonal.
+//
+// The reduction to bidiagonal form is stored in d and e, where d are the diagonal
+// elements, and e are the off-diagonal elements.
+//
+// The matrices Q and P are products of elementary reflectors
+//  Q = H_0 * H_1 * ... * H_{nb-1}
+//  P = G_0 * G_1 * ... * G_{nb-1}
+// where
+//  H_i = I - tauQ[i] * v_i * v_i^T
+//  G_i = I - tauP[i] * u_i * u_i^T
+//
+// As an example, on exit the entries of A when m = 6, n = 5, and nb = 2
+//  [ 1   1  u1  u1  u1]
+//  [v1   1   1  u2  u2]
+//  [v1  v2   a   a   a]
+//  [v1  v2   a   a   a]
+//  [v1  v2   a   a   a]
+//  [v1  v2   a   a   a]
+// and when m = 5, n = 6, and nb = 2
+//  [ 1  u1  u1  u1  u1  u1]
+//  [ 1   1  u2  u2  u2  u2]
+//  [v1   1   a   a   a   a]
+//  [v1  v2   a   a   a   a]
+//  [v1  v2   a   a   a   a]
+//
+// Dlabrd also returns the matrices X and Y which are used with U and V to
+// apply the transformation to the unreduced part of the matrix
+//  A := A - V*Y^T - X*U^T
+// and returns the matrices X and Y which are needed to apply the
+// transformation to the unreduced part of A.
+//
+// X is an m×nb matrix, Y is an n×nb matrix. d, e, taup, and tauq must all have
+// length at least nb. Dlabrd will panic if these size constraints are violated.
+//
+// Dlabrd is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlabrd(m, n, nb int, a []float64, lda int, d, e, tauQ, tauP, x []float64, ldx int, y []float64, ldy int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case nb < 0:
+		panic(nbLT0)
+	case nb > n:
+		panic(nbGTN)
+	case nb > m:
+		panic(nbGTM)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldx < max(1, nb):
+		panic(badLdX)
+	case ldy < max(1, nb):
+		panic(badLdY)
+	}
+
+	if m == 0 || n == 0 || nb == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(d) < nb:
+		panic(shortD)
+	case len(e) < nb:
+		panic(shortE)
+	case len(tauQ) < nb:
+		panic(shortTauQ)
+	case len(tauP) < nb:
+		panic(shortTauP)
+	case len(x) < (m-1)*ldx+nb:
+		panic(shortX)
+	case len(y) < (n-1)*ldy+nb:
+		panic(shortY)
+	}
+
+	bi := blas64.Implementation()
+
+	if m >= n {
+		// Reduce to upper bidiagonal form.
+		for i := 0; i < nb; i++ {
+			bi.Dgemv(blas.NoTrans, m-i, i, -1, a[i*lda:], lda, y[i*ldy:], 1, 1, a[i*lda+i:], lda)
+			bi.Dgemv(blas.NoTrans, m-i, i, -1, x[i*ldx:], ldx, a[i:], lda, 1, a[i*lda+i:], lda)
+
+			a[i*lda+i], tauQ[i] = impl.Dlarfg(m-i, a[i*lda+i], a[min(i+1, m-1)*lda+i:], lda)
+			d[i] = a[i*lda+i]
+			if i < n-1 {
+				// Compute Y[i+1:n, i].
+				a[i*lda+i] = 1
+				bi.Dgemv(blas.Trans, m-i, n-i-1, 1, a[i*lda+i+1:], lda, a[i*lda+i:], lda, 0, y[(i+1)*ldy+i:], ldy)
+				bi.Dgemv(blas.Trans, m-i, i, 1, a[i*lda:], lda, a[i*lda+i:], lda, 0, y[i:], ldy)
+				bi.Dgemv(blas.NoTrans, n-i-1, i, -1, y[(i+1)*ldy:], ldy, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy)
+				bi.Dgemv(blas.Trans, m-i, i, 1, x[i*ldx:], ldx, a[i*lda+i:], lda, 0, y[i:], ldy)
+				bi.Dgemv(blas.Trans, i, n-i-1, -1, a[i+1:], lda, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy)
+				bi.Dscal(n-i-1, tauQ[i], y[(i+1)*ldy+i:], ldy)
+
+				// Update A[i, i+1:n].
+				bi.Dgemv(blas.NoTrans, n-i-1, i+1, -1, y[(i+1)*ldy:], ldy, a[i*lda:], 1, 1, a[i*lda+i+1:], 1)
+				bi.Dgemv(blas.Trans, i, n-i-1, -1, a[i+1:], lda, x[i*ldx:], 1, 1, a[i*lda+i+1:], 1)
+
+				// Generate reflection P[i] to annihilate A[i, i+2:n].
+				a[i*lda+i+1], tauP[i] = impl.Dlarfg(n-i-1, a[i*lda+i+1], a[i*lda+min(i+2, n-1):], 1)
+				e[i] = a[i*lda+i+1]
+				a[i*lda+i+1] = 1
+
+				// Compute X[i+1:m, i].
+				bi.Dgemv(blas.NoTrans, m-i-1, n-i-1, 1, a[(i+1)*lda+i+1:], lda, a[i*lda+i+1:], 1, 0, x[(i+1)*ldx+i:], ldx)
+				bi.Dgemv(blas.Trans, n-i-1, i+1, 1, y[(i+1)*ldy:], ldy, a[i*lda+i+1:], 1, 0, x[i:], ldx)
+				bi.Dgemv(blas.NoTrans, m-i-1, i+1, -1, a[(i+1)*lda:], lda, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx)
+				bi.Dgemv(blas.NoTrans, i, n-i-1, 1, a[i+1:], lda, a[i*lda+i+1:], 1, 0, x[i:], ldx)
+				bi.Dgemv(blas.NoTrans, m-i-1, i, -1, x[(i+1)*ldx:], ldx, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx)
+				bi.Dscal(m-i-1, tauP[i], x[(i+1)*ldx+i:], ldx)
+			}
+		}
+		return
+	}
+	// Reduce to lower bidiagonal form.
+	for i := 0; i < nb; i++ {
+		// Update A[i,i:n]
+		bi.Dgemv(blas.NoTrans, n-i, i, -1, y[i*ldy:], ldy, a[i*lda:], 1, 1, a[i*lda+i:], 1)
+		bi.Dgemv(blas.Trans, i, n-i, -1, a[i:], lda, x[i*ldx:], 1, 1, a[i*lda+i:], 1)
+
+		// Generate reflection P[i] to annihilate A[i, i+1:n]
+		a[i*lda+i], tauP[i] = impl.Dlarfg(n-i, a[i*lda+i], a[i*lda+min(i+1, n-1):], 1)
+		d[i] = a[i*lda+i]
+		if i < m-1 {
+			a[i*lda+i] = 1
+			// Compute X[i+1:m, i].
+			bi.Dgemv(blas.NoTrans, m-i-1, n-i, 1, a[(i+1)*lda+i:], lda, a[i*lda+i:], 1, 0, x[(i+1)*ldx+i:], ldx)
+			bi.Dgemv(blas.Trans, n-i, i, 1, y[i*ldy:], ldy, a[i*lda+i:], 1, 0, x[i:], ldx)
+			bi.Dgemv(blas.NoTrans, m-i-1, i, -1, a[(i+1)*lda:], lda, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx)
+			bi.Dgemv(blas.NoTrans, i, n-i, 1, a[i:], lda, a[i*lda+i:], 1, 0, x[i:], ldx)
+			bi.Dgemv(blas.NoTrans, m-i-1, i, -1, x[(i+1)*ldx:], ldx, x[i:], ldx, 1, x[(i+1)*ldx+i:], ldx)
+			bi.Dscal(m-i-1, tauP[i], x[(i+1)*ldx+i:], ldx)
+
+			// Update A[i+1:m, i].
+			bi.Dgemv(blas.NoTrans, m-i-1, i, -1, a[(i+1)*lda:], lda, y[i*ldy:], 1, 1, a[(i+1)*lda+i:], lda)
+			bi.Dgemv(blas.NoTrans, m-i-1, i+1, -1, x[(i+1)*ldx:], ldx, a[i:], lda, 1, a[(i+1)*lda+i:], lda)
+
+			// Generate reflection Q[i] to annihilate A[i+2:m, i].
+			a[(i+1)*lda+i], tauQ[i] = impl.Dlarfg(m-i-1, a[(i+1)*lda+i], a[min(i+2, m-1)*lda+i:], lda)
+			e[i] = a[(i+1)*lda+i]
+			a[(i+1)*lda+i] = 1
+
+			// Compute Y[i+1:n, i].
+			bi.Dgemv(blas.Trans, m-i-1, n-i-1, 1, a[(i+1)*lda+i+1:], lda, a[(i+1)*lda+i:], lda, 0, y[(i+1)*ldy+i:], ldy)
+			bi.Dgemv(blas.Trans, m-i-1, i, 1, a[(i+1)*lda:], lda, a[(i+1)*lda+i:], lda, 0, y[i:], ldy)
+			bi.Dgemv(blas.NoTrans, n-i-1, i, -1, y[(i+1)*ldy:], ldy, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy)
+			bi.Dgemv(blas.Trans, m-i-1, i+1, 1, x[(i+1)*ldx:], ldx, a[(i+1)*lda+i:], lda, 0, y[i:], ldy)
+			bi.Dgemv(blas.Trans, i+1, n-i-1, -1, a[i+1:], lda, y[i:], ldy, 1, y[(i+1)*ldy+i:], ldy)
+			bi.Dscal(n-i-1, tauQ[i], y[(i+1)*ldy+i:], ldy)
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacn2.go
@ -0,0 +1,134 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with
+// matrix-vector products provided externally.
+//
+// Dlacn2 is called sequentially and it returns the value of est and kase to be
+// used on the next call.
+// On the initial call, kase must be 0.
+// In between calls, x must be overwritten by
+//  A * X    if kase was returned as 1,
+//  A^T * X  if kase was returned as 2,
+// and all other parameters must not be changed.
+// On the final return, kase is returned as 0, v contains A*W where W is a
+// vector, and est = norm(V)/norm(W) is a lower bound for 1-norm of A.
+//
+// v, x, and isgn must all have length n and n must be at least 1, otherwise
+// Dlacn2 will panic. isave is used for temporary storage.
+//
+// Dlacn2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlacn2(n int, v, x []float64, isgn []int, est float64, kase int, isave *[3]int) (float64, int) {
+	switch {
+	case n < 1:
+		panic(nLT1)
+	case len(v) < n:
+		panic(shortV)
+	case len(x) < n:
+		panic(shortX)
+	case len(isgn) < n:
+		panic(shortIsgn)
+	case isave[0] < 0 || 5 < isave[0]:
+		panic(badIsave)
+	case isave[0] == 0 && kase != 0:
+		panic(badIsave)
+	}
+
+	const itmax = 5
+	bi := blas64.Implementation()
+
+	if kase == 0 {
+		for i := 0; i < n; i++ {
+			x[i] = 1 / float64(n)
+		}
+		kase = 1
+		isave[0] = 1
+		return est, kase
+	}
+	switch isave[0] {
+	case 1:
+		if n == 1 {
+			v[0] = x[0]
+			est = math.Abs(v[0])
+			kase = 0
+			return est, kase
+		}
+		est = bi.Dasum(n, x, 1)
+		for i := 0; i < n; i++ {
+			x[i] = math.Copysign(1, x[i])
+			isgn[i] = int(x[i])
+		}
+		kase = 2
+		isave[0] = 2
+		return est, kase
+	case 2:
+		isave[1] = bi.Idamax(n, x, 1)
+		isave[2] = 2
+		for i := 0; i < n; i++ {
+			x[i] = 0
+		}
+		x[isave[1]] = 1
+		kase = 1
+		isave[0] = 3
+		return est, kase
+	case 3:
+		bi.Dcopy(n, x, 1, v, 1)
+		estold := est
+		est = bi.Dasum(n, v, 1)
+		sameSigns := true
+		for i := 0; i < n; i++ {
+			if int(math.Copysign(1, x[i])) != isgn[i] {
+				sameSigns = false
+				break
+			}
+		}
+		if !sameSigns && est > estold {
+			for i := 0; i < n; i++ {
+				x[i] = math.Copysign(1, x[i])
+				isgn[i] = int(x[i])
+			}
+			kase = 2
+			isave[0] = 4
+			return est, kase
+		}
+	case 4:
+		jlast := isave[1]
+		isave[1] = bi.Idamax(n, x, 1)
+		if x[jlast] != math.Abs(x[isave[1]]) && isave[2] < itmax {
+			isave[2] += 1
+			for i := 0; i < n; i++ {
+				x[i] = 0
+			}
+			x[isave[1]] = 1
+			kase = 1
+			isave[0] = 3
+			return est, kase
+		}
+	case 5:
+		tmp := 2 * (bi.Dasum(n, x, 1)) / float64(3*n)
+		if tmp > est {
+			bi.Dcopy(n, x, 1, v, 1)
+			est = tmp
+		}
+		kase = 0
+		return est, kase
+	}
+	// Iteration complete. Final stage
+	altsgn := 1.0
+	for i := 0; i < n; i++ {
+		x[i] = altsgn * (1 + float64(i)/float64(n-1))
+		altsgn *= -1
+	}
+	kase = 1
+	isave[0] = 5
+	return est, kase
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlacpy.go
@ -0,0 +1,59 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dlacpy copies the elements of A specified by uplo into B. Uplo can specify
+// a triangular portion with blas.Upper or blas.Lower, or can specify all of the
+// elemest with blas.All.
+//
+// Dlacpy is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlacpy(uplo blas.Uplo, m, n int, a []float64, lda int, b []float64, ldb int) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower && uplo != blas.All:
+		panic(badUplo)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldb < max(1, n):
+		panic(badLdB)
+	}
+
+	if m == 0 || n == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(b) < (m-1)*ldb+n:
+		panic(shortB)
+	}
+
+	switch uplo {
+	case blas.Upper:
+		for i := 0; i < m; i++ {
+			for j := i; j < n; j++ {
+				b[i*ldb+j] = a[i*lda+j]
+			}
+		}
+	case blas.Lower:
+		for i := 0; i < m; i++ {
+			for j := 0; j < min(i+1, n); j++ {
+				b[i*ldb+j] = a[i*lda+j]
+			}
+		}
+	case blas.All:
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				b[i*ldb+j] = a[i*lda+j]
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlae2.go
@ -0,0 +1,49 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlae2 computes the eigenvalues of a 2×2 symmetric matrix
+//  [a b]
+//  [b c]
+// and returns the eigenvalue with the larger absolute value as rt1 and the
+// smaller as rt2.
+//
+// Dlae2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlae2(a, b, c float64) (rt1, rt2 float64) {
+	sm := a + c
+	df := a - c
+	adf := math.Abs(df)
+	tb := b + b
+	ab := math.Abs(tb)
+	acmx := c
+	acmn := a
+	if math.Abs(a) > math.Abs(c) {
+		acmx = a
+		acmn = c
+	}
+	var rt float64
+	if adf > ab {
+		rt = adf * math.Sqrt(1+(ab/adf)*(ab/adf))
+	} else if adf < ab {
+		rt = ab * math.Sqrt(1+(adf/ab)*(adf/ab))
+	} else {
+		rt = ab * math.Sqrt2
+	}
+	if sm < 0 {
+		rt1 = 0.5 * (sm - rt)
+		rt2 = (acmx/rt1)*acmn - (b/rt1)*b
+		return rt1, rt2
+	}
+	if sm > 0 {
+		rt1 = 0.5 * (sm + rt)
+		rt2 = (acmx/rt1)*acmn - (b/rt1)*b
+		return rt1, rt2
+	}
+	rt1 = 0.5 * rt
+	rt2 = -0.5 * rt
+	return rt1, rt2
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaev2.go
@ -0,0 +1,82 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlaev2 computes the Eigen decomposition of a symmetric 2×2 matrix.
+// The matrix is given by
+//  [a b]
+//  [b c]
+// Dlaev2 returns rt1 and rt2, the eigenvalues of the matrix where |RT1| > |RT2|,
+// and [cs1, sn1] which is the unit right eigenvalue for RT1.
+//  [ cs1 sn1] [a b] [cs1 -sn1] = [rt1   0]
+//  [-sn1 cs1] [b c] [sn1  cs1]   [  0 rt2]
+//
+// Dlaev2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaev2(a, b, c float64) (rt1, rt2, cs1, sn1 float64) {
+	sm := a + c
+	df := a - c
+	adf := math.Abs(df)
+	tb := b + b
+	ab := math.Abs(tb)
+	acmx := c
+	acmn := a
+	if math.Abs(a) > math.Abs(c) {
+		acmx = a
+		acmn = c
+	}
+	var rt float64
+	if adf > ab {
+		rt = adf * math.Sqrt(1+(ab/adf)*(ab/adf))
+	} else if adf < ab {
+		rt = ab * math.Sqrt(1+(adf/ab)*(adf/ab))
+	} else {
+		rt = ab * math.Sqrt(2)
+	}
+	var sgn1 float64
+	if sm < 0 {
+		rt1 = 0.5 * (sm - rt)
+		sgn1 = -1
+		rt2 = (acmx/rt1)*acmn - (b/rt1)*b
+	} else if sm > 0 {
+		rt1 = 0.5 * (sm + rt)
+		sgn1 = 1
+		rt2 = (acmx/rt1)*acmn - (b/rt1)*b
+	} else {
+		rt1 = 0.5 * rt
+		rt2 = -0.5 * rt
+		sgn1 = 1
+	}
+	var cs, sgn2 float64
+	if df >= 0 {
+		cs = df + rt
+		sgn2 = 1
+	} else {
+		cs = df - rt
+		sgn2 = -1
+	}
+	acs := math.Abs(cs)
+	if acs > ab {
+		ct := -tb / cs
+		sn1 = 1 / math.Sqrt(1+ct*ct)
+		cs1 = ct * sn1
+	} else {
+		if ab == 0 {
+			cs1 = 1
+			sn1 = 0
+		} else {
+			tn := -cs / tb
+			cs1 = 1 / math.Sqrt(1+tn*tn)
+			sn1 = tn * cs1
+		}
+	}
+	if sgn1 == sgn2 {
+		tn := cs1
+		cs1 = -sn1
+		sn1 = tn
+	}
+	return rt1, rt2, cs1, sn1
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaexc.go
@ -0,0 +1,269 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlaexc swaps two adjacent diagonal blocks of order 1 or 2 in an n×n upper
+// quasi-triangular matrix T by an orthogonal similarity transformation.
+//
+// T must be in Schur canonical form, that is, block upper triangular with 1×1
+// and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal elements
+// equal and its off-diagonal elements of opposite sign. On return, T will
+// contain the updated matrix again in Schur canonical form.
+//
+// If wantq is true, the transformation is accumulated in the n×n matrix Q,
+// otherwise Q is not referenced.
+//
+// j1 is the index of the first row of the first block. n1 and n2 are the order
+// of the first and second block, respectively.
+//
+// work must have length at least n, otherwise Dlaexc will panic.
+//
+// If ok is false, the transformed matrix T would be too far from Schur form.
+// The blocks are not swapped, and T and Q are not modified.
+//
+// If n1 and n2 are both equal to 1, Dlaexc will always return true.
+//
+// Dlaexc is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaexc(wantq bool, n int, t []float64, ldt int, q []float64, ldq int, j1, n1, n2 int, work []float64) (ok bool) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ldt < max(1, n):
+		panic(badLdT)
+	case wantq && ldt < max(1, n):
+		panic(badLdQ)
+	case j1 < 0 || n <= j1:
+		panic(badJ1)
+	case len(work) < n:
+		panic(shortWork)
+	case n1 < 0 || 2 < n1:
+		panic(badN1)
+	case n2 < 0 || 2 < n2:
+		panic(badN2)
+	}
+
+	if n == 0 || n1 == 0 || n2 == 0 {
+		return true
+	}
+
+	switch {
+	case len(t) < (n-1)*ldt+n:
+		panic(shortT)
+	case wantq && len(q) < (n-1)*ldq+n:
+		panic(shortQ)
+	}
+
+	if j1+n1 >= n {
+		// TODO(vladimir-ch): Reference LAPACK does this check whether
+		// the start of the second block is in the matrix T. It returns
+		// true if it is not and moreover it does not check whether the
+		// whole second block fits into T. This does not feel
+		// satisfactory. The only caller of Dlaexc is Dtrexc, so if the
+		// caller makes sure that this does not happen, we could be
+		// stricter here.
+		return true
+	}
+
+	j2 := j1 + 1
+	j3 := j1 + 2
+
+	bi := blas64.Implementation()
+
+	if n1 == 1 && n2 == 1 {
+		// Swap two 1×1 blocks.
+		t11 := t[j1*ldt+j1]
+		t22 := t[j2*ldt+j2]
+
+		// Determine the transformation to perform the interchange.
+		cs, sn, _ := impl.Dlartg(t[j1*ldt+j2], t22-t11)
+
+		// Apply transformation to the matrix T.
+		if n-j3 > 0 {
+			bi.Drot(n-j3, t[j1*ldt+j3:], 1, t[j2*ldt+j3:], 1, cs, sn)
+		}
+		if j1 > 0 {
+			bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
+		}
+
+		t[j1*ldt+j1] = t22
+		t[j2*ldt+j2] = t11
+
+		if wantq {
+			// Accumulate transformation in the matrix Q.
+			bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
+		}
+
+		return true
+	}
+
+	// Swapping involves at least one 2×2 block.
+	//
+	// Copy the diagonal block of order n1+n2 to the local array d and
+	// compute its norm.
+	nd := n1 + n2
+	var d [16]float64
+	const ldd = 4
+	impl.Dlacpy(blas.All, nd, nd, t[j1*ldt+j1:], ldt, d[:], ldd)
+	dnorm := impl.Dlange(lapack.MaxAbs, nd, nd, d[:], ldd, work)
+
+	// Compute machine-dependent threshold for test for accepting swap.
+	eps := dlamchP
+	thresh := math.Max(10*eps*dnorm, dlamchS/eps)
+
+	// Solve T11*X - X*T22 = scale*T12 for X.
+	var x [4]float64
+	const ldx = 2
+	scale, _, _ := impl.Dlasy2(false, false, -1, n1, n2, d[:], ldd, d[n1*ldd+n1:], ldd, d[n1:], ldd, x[:], ldx)
+
+	// Swap the adjacent diagonal blocks.
+	switch {
+	case n1 == 1 && n2 == 2:
+		// Generate elementary reflector H so that
+		//  ( scale, X11, X12 ) H = ( 0, 0, * )
+		u := [3]float64{scale, x[0], 1}
+		_, tau := impl.Dlarfg(3, x[1], u[:2], 1)
+		t11 := t[j1*ldt+j1]
+
+		// Perform swap provisionally on diagonal block in d.
+		impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
+		impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
+
+		// Test whether to reject swap.
+		if math.Max(math.Abs(d[2*ldd]), math.Max(math.Abs(d[2*ldd+1]), math.Abs(d[2*ldd+2]-t11))) > thresh {
+			return false
+		}
+
+		// Accept swap: apply transformation to the entire matrix T.
+		impl.Dlarfx(blas.Left, 3, n-j1, u[:], tau, t[j1*ldt+j1:], ldt, work)
+		impl.Dlarfx(blas.Right, j2+1, 3, u[:], tau, t[j1:], ldt, work)
+
+		t[j3*ldt+j1] = 0
+		t[j3*ldt+j2] = 0
+		t[j3*ldt+j3] = t11
+
+		if wantq {
+			// Accumulate transformation in the matrix Q.
+			impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
+		}
+
+	case n1 == 2 && n2 == 1:
+		//  Generate elementary reflector H so that:
+		//   H (  -X11 ) = ( * )
+		//     (  -X21 ) = ( 0 )
+		//     ( scale ) = ( 0 )
+		u := [3]float64{1, -x[ldx], scale}
+		_, tau := impl.Dlarfg(3, -x[0], u[1:], 1)
+		t33 := t[j3*ldt+j3]
+
+		// Perform swap provisionally on diagonal block in D.
+		impl.Dlarfx(blas.Left, 3, 3, u[:], tau, d[:], ldd, work)
+		impl.Dlarfx(blas.Right, 3, 3, u[:], tau, d[:], ldd, work)
+
+		// Test whether to reject swap.
+		if math.Max(math.Abs(d[ldd]), math.Max(math.Abs(d[2*ldd]), math.Abs(d[0]-t33))) > thresh {
+			return false
+		}
+
+		// Accept swap: apply transformation to the entire matrix T.
+		impl.Dlarfx(blas.Right, j3+1, 3, u[:], tau, t[j1:], ldt, work)
+		impl.Dlarfx(blas.Left, 3, n-j1-1, u[:], tau, t[j1*ldt+j2:], ldt, work)
+
+		t[j1*ldt+j1] = t33
+		t[j2*ldt+j1] = 0
+		t[j3*ldt+j1] = 0
+
+		if wantq {
+			// Accumulate transformation in the matrix Q.
+			impl.Dlarfx(blas.Right, n, 3, u[:], tau, q[j1:], ldq, work)
+		}
+
+	default: // n1 == 2 && n2 == 2
+		// Generate elementary reflectors H_1 and H_2 so that:
+		//  H_2 H_1 (  -X11  -X12 ) = (  *  * )
+		//          (  -X21  -X22 )   (  0  * )
+		//          ( scale    0  )   (  0  0 )
+		//          (    0  scale )   (  0  0 )
+		u1 := [3]float64{1, -x[ldx], scale}
+		_, tau1 := impl.Dlarfg(3, -x[0], u1[1:], 1)
+
+		temp := -tau1 * (x[1] + u1[1]*x[ldx+1])
+		u2 := [3]float64{1, -temp * u1[2], scale}
+		_, tau2 := impl.Dlarfg(3, -temp*u1[1]-x[ldx+1], u2[1:], 1)
+
+		// Perform swap provisionally on diagonal block in D.
+		impl.Dlarfx(blas.Left, 3, 4, u1[:], tau1, d[:], ldd, work)
+		impl.Dlarfx(blas.Right, 4, 3, u1[:], tau1, d[:], ldd, work)
+		impl.Dlarfx(blas.Left, 3, 4, u2[:], tau2, d[ldd:], ldd, work)
+		impl.Dlarfx(blas.Right, 4, 3, u2[:], tau2, d[1:], ldd, work)
+
+		// Test whether to reject swap.
+		m1 := math.Max(math.Abs(d[2*ldd]), math.Abs(d[2*ldd+1]))
+		m2 := math.Max(math.Abs(d[3*ldd]), math.Abs(d[3*ldd+1]))
+		if math.Max(m1, m2) > thresh {
+			return false
+		}
+
+		// Accept swap: apply transformation to the entire matrix T.
+		j4 := j1 + 3
+		impl.Dlarfx(blas.Left, 3, n-j1, u1[:], tau1, t[j1*ldt+j1:], ldt, work)
+		impl.Dlarfx(blas.Right, j4+1, 3, u1[:], tau1, t[j1:], ldt, work)
+		impl.Dlarfx(blas.Left, 3, n-j1, u2[:], tau2, t[j2*ldt+j1:], ldt, work)
+		impl.Dlarfx(blas.Right, j4+1, 3, u2[:], tau2, t[j2:], ldt, work)
+
+		t[j3*ldt+j1] = 0
+		t[j3*ldt+j2] = 0
+		t[j4*ldt+j1] = 0
+		t[j4*ldt+j2] = 0
+
+		if wantq {
+			// Accumulate transformation in the matrix Q.
+			impl.Dlarfx(blas.Right, n, 3, u1[:], tau1, q[j1:], ldq, work)
+			impl.Dlarfx(blas.Right, n, 3, u2[:], tau2, q[j2:], ldq, work)
+		}
+	}
+
+	if n2 == 2 {
+		// Standardize new 2×2 block T11.
+		a, b := t[j1*ldt+j1], t[j1*ldt+j2]
+		c, d := t[j2*ldt+j1], t[j2*ldt+j2]
+		var cs, sn float64
+		t[j1*ldt+j1], t[j1*ldt+j2], t[j2*ldt+j1], t[j2*ldt+j2], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
+		if n-j1-2 > 0 {
+			bi.Drot(n-j1-2, t[j1*ldt+j1+2:], 1, t[j2*ldt+j1+2:], 1, cs, sn)
+		}
+		if j1 > 0 {
+			bi.Drot(j1, t[j1:], ldt, t[j2:], ldt, cs, sn)
+		}
+		if wantq {
+			bi.Drot(n, q[j1:], ldq, q[j2:], ldq, cs, sn)
+		}
+	}
+	if n1 == 2 {
+		// Standardize new 2×2 block T22.
+		j3 := j1 + n2
+		j4 := j3 + 1
+		a, b := t[j3*ldt+j3], t[j3*ldt+j4]
+		c, d := t[j4*ldt+j3], t[j4*ldt+j4]
+		var cs, sn float64
+		t[j3*ldt+j3], t[j3*ldt+j4], t[j4*ldt+j3], t[j4*ldt+j4], _, _, _, _, cs, sn = impl.Dlanv2(a, b, c, d)
+		if n-j3-2 > 0 {
+			bi.Drot(n-j3-2, t[j3*ldt+j3+2:], 1, t[j4*ldt+j3+2:], 1, cs, sn)
+		}
+		bi.Drot(j3, t[j3:], ldt, t[j4:], ldt, cs, sn)
+		if wantq {
+			bi.Drot(n, q[j3:], ldq, q[j4:], ldq, cs, sn)
+		}
+	}
+
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlags2.go
@ -0,0 +1,182 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlags2 computes 2-by-2 orthogonal matrices U, V and Q with the
+// triangles of A and B specified by upper.
+//
+// If upper is true
+//
+//  U^T*A*Q = U^T*[ a1 a2 ]*Q = [ x  0 ]
+//                [ 0  a3 ]     [ x  x ]
+// and
+//  V^T*B*Q = V^T*[ b1 b2 ]*Q = [ x  0 ]
+//                [ 0  b3 ]     [ x  x ]
+//
+// otherwise
+//
+//  U^T*A*Q = U^T*[ a1 0  ]*Q = [ x  x ]
+//                [ a2 a3 ]     [ 0  x ]
+// and
+//  V^T*B*Q = V^T*[ b1 0  ]*Q = [ x  x ]
+//                [ b2 b3 ]     [ 0  x ].
+//
+// The rows of the transformed A and B are parallel, where
+//
+//  U = [  csu  snu ], V = [  csv snv ], Q = [  csq   snq ]
+//      [ -snu  csu ]      [ -snv csv ]      [ -snq   csq ]
+//
+// Dlags2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlags2(upper bool, a1, a2, a3, b1, b2, b3 float64) (csu, snu, csv, snv, csq, snq float64) {
+	if upper {
+		// Input matrices A and B are upper triangular matrices.
+		//
+		// Form matrix C = A*adj(B) = [ a b ]
+		//                            [ 0 d ]
+		a := a1 * b3
+		d := a3 * b1
+		b := a2*b1 - a1*b2
+
+		// The SVD of real 2-by-2 triangular C.
+		//
+		//  [ csl -snl ]*[ a b ]*[  csr  snr ] = [ r 0 ]
+		//  [ snl  csl ] [ 0 d ] [ -snr  csr ]   [ 0 t ]
+		_, _, snr, csr, snl, csl := impl.Dlasv2(a, b, d)
+
+		if math.Abs(csl) >= math.Abs(snl) || math.Abs(csr) >= math.Abs(snr) {
+			// Compute the [0, 0] and [0, 1] elements of U^T*A and V^T*B,
+			// and [0, 1] element of |U|^T*|A| and |V|^T*|B|.
+
+			ua11r := csl * a1
+			ua12 := csl*a2 + snl*a3
+
+			vb11r := csr * b1
+			vb12 := csr*b2 + snr*b3
+
+			aua12 := math.Abs(csl)*math.Abs(a2) + math.Abs(snl)*math.Abs(a3)
+			avb12 := math.Abs(csr)*math.Abs(b2) + math.Abs(snr)*math.Abs(b3)
+
+			// Zero [0, 1] elements of U^T*A and V^T*B.
+			if math.Abs(ua11r)+math.Abs(ua12) != 0 {
+				if aua12/(math.Abs(ua11r)+math.Abs(ua12)) <= avb12/(math.Abs(vb11r)+math.Abs(vb12)) {
+					csq, snq, _ = impl.Dlartg(-ua11r, ua12)
+				} else {
+					csq, snq, _ = impl.Dlartg(-vb11r, vb12)
+				}
+			} else {
+				csq, snq, _ = impl.Dlartg(-vb11r, vb12)
+			}
+
+			csu = csl
+			snu = -snl
+			csv = csr
+			snv = -snr
+		} else {
+			// Compute the [1, 0] and [1, 1] elements of U^T*A and V^T*B,
+			// and [1, 1] element of |U|^T*|A| and |V|^T*|B|.
+
+			ua21 := -snl * a1
+			ua22 := -snl*a2 + csl*a3
+
+			vb21 := -snr * b1
+			vb22 := -snr*b2 + csr*b3
+
+			aua22 := math.Abs(snl)*math.Abs(a2) + math.Abs(csl)*math.Abs(a3)
+			avb22 := math.Abs(snr)*math.Abs(b2) + math.Abs(csr)*math.Abs(b3)
+
+			// Zero [1, 1] elements of U^T*A and V^T*B, and then swap.
+			if math.Abs(ua21)+math.Abs(ua22) != 0 {
+				if aua22/(math.Abs(ua21)+math.Abs(ua22)) <= avb22/(math.Abs(vb21)+math.Abs(vb22)) {
+					csq, snq, _ = impl.Dlartg(-ua21, ua22)
+				} else {
+					csq, snq, _ = impl.Dlartg(-vb21, vb22)
+				}
+			} else {
+				csq, snq, _ = impl.Dlartg(-vb21, vb22)
+			}
+
+			csu = snl
+			snu = csl
+			csv = snr
+			snv = csr
+		}
+	} else {
+		// Input matrices A and B are lower triangular matrices
+		//
+		// Form matrix C = A*adj(B) = [ a 0 ]
+		//                            [ c d ]
+		a := a1 * b3
+		d := a3 * b1
+		c := a2*b3 - a3*b2
+
+		// The SVD of real 2-by-2 triangular C
+		//
+		// [ csl -snl ]*[ a 0 ]*[  csr  snr ] = [ r 0 ]
+		// [ snl  csl ] [ c d ] [ -snr  csr ]   [ 0 t ]
+		_, _, snr, csr, snl, csl := impl.Dlasv2(a, c, d)
+
+		if math.Abs(csr) >= math.Abs(snr) || math.Abs(csl) >= math.Abs(snl) {
+			// Compute the [1, 0] and [1, 1] elements of U^T*A and V^T*B,
+			// and [1, 0] element of |U|^T*|A| and |V|^T*|B|.
+
+			ua21 := -snr*a1 + csr*a2
+			ua22r := csr * a3
+
+			vb21 := -snl*b1 + csl*b2
+			vb22r := csl * b3
+
+			aua21 := math.Abs(snr)*math.Abs(a1) + math.Abs(csr)*math.Abs(a2)
+			avb21 := math.Abs(snl)*math.Abs(b1) + math.Abs(csl)*math.Abs(b2)
+
+			// Zero [1, 0] elements of U^T*A and V^T*B.
+			if (math.Abs(ua21) + math.Abs(ua22r)) != 0 {
+				if aua21/(math.Abs(ua21)+math.Abs(ua22r)) <= avb21/(math.Abs(vb21)+math.Abs(vb22r)) {
+					csq, snq, _ = impl.Dlartg(ua22r, ua21)
+				} else {
+					csq, snq, _ = impl.Dlartg(vb22r, vb21)
+				}
+			} else {
+				csq, snq, _ = impl.Dlartg(vb22r, vb21)
+			}
+
+			csu = csr
+			snu = -snr
+			csv = csl
+			snv = -snl
+		} else {
+			// Compute the [0, 0] and [0, 1] elements of U^T *A and V^T *B,
+			// and [0, 0] element of |U|^T*|A| and |V|^T*|B|.
+
+			ua11 := csr*a1 + snr*a2
+			ua12 := snr * a3
+
+			vb11 := csl*b1 + snl*b2
+			vb12 := snl * b3
+
+			aua11 := math.Abs(csr)*math.Abs(a1) + math.Abs(snr)*math.Abs(a2)
+			avb11 := math.Abs(csl)*math.Abs(b1) + math.Abs(snl)*math.Abs(b2)
+
+			// Zero [0, 0] elements of U^T*A and V^T*B, and then swap.
+			if (math.Abs(ua11) + math.Abs(ua12)) != 0 {
+				if aua11/(math.Abs(ua11)+math.Abs(ua12)) <= avb11/(math.Abs(vb11)+math.Abs(vb12)) {
+					csq, snq, _ = impl.Dlartg(ua12, ua11)
+				} else {
+					csq, snq, _ = impl.Dlartg(vb12, vb11)
+				}
+			} else {
+				csq, snq, _ = impl.Dlartg(vb12, vb11)
+			}
+
+			csu = snr
+			snu = csr
+			csv = snl
+			snv = csl
+		}
+	}
+
+	return csu, snu, csv, snv, csq, snq
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahqr.go
@ -0,0 +1,431 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlahqr computes the eigenvalues and Schur factorization of a block of an n×n
+// upper Hessenberg matrix H, using the double-shift/single-shift QR algorithm.
+//
+// h and ldh represent the matrix H. Dlahqr works primarily with the Hessenberg
+// submatrix H[ilo:ihi+1,ilo:ihi+1], but applies transformations to all of H if
+// wantt is true. It is assumed that H[ihi+1:n,ihi+1:n] is already upper
+// quasi-triangular, although this is not checked.
+//
+// It must hold that
+//  0 <= ilo <= max(0,ihi), and ihi < n,
+// and that
+//  H[ilo,ilo-1] == 0,  if ilo > 0,
+// otherwise Dlahqr will panic.
+//
+// If unconverged is zero on return, wr[ilo:ihi+1] and wi[ilo:ihi+1] will contain
+// respectively the real and imaginary parts of the computed eigenvalues ilo
+// to ihi. If two eigenvalues are computed as a complex conjugate pair, they are
+// stored in consecutive elements of wr and wi, say the i-th and (i+1)th, with
+// wi[i] > 0 and wi[i+1] < 0. If wantt is true, the eigenvalues are stored in
+// the same order as on the diagonal of the Schur form returned in H, with
+// wr[i] = H[i,i], and, if H[i:i+2,i:i+2] is a 2×2 diagonal block,
+// wi[i] = sqrt(abs(H[i+1,i]*H[i,i+1])) and wi[i+1] = -wi[i].
+//
+// wr and wi must have length ihi+1.
+//
+// z and ldz represent an n×n matrix Z. If wantz is true, the transformations
+// will be applied to the submatrix Z[iloz:ihiz+1,ilo:ihi+1] and it must hold that
+//  0 <= iloz <= ilo, and ihi <= ihiz < n.
+// If wantz is false, z is not referenced.
+//
+// unconverged indicates whether Dlahqr computed all the eigenvalues ilo to ihi
+// in a total of 30 iterations per eigenvalue.
+//
+// If unconverged is zero, all the eigenvalues ilo to ihi have been computed and
+// will be stored on return in wr[ilo:ihi+1] and wi[ilo:ihi+1].
+//
+// If unconverged is zero and wantt is true, H[ilo:ihi+1,ilo:ihi+1] will be
+// overwritten on return by upper quasi-triangular full Schur form with any
+// 2×2 diagonal blocks in standard form.
+//
+// If unconverged is zero and if wantt is false, the contents of h on return is
+// unspecified.
+//
+// If unconverged is positive, some eigenvalues have not converged, and
+// wr[unconverged:ihi+1] and wi[unconverged:ihi+1] contain those eigenvalues
+// which have been successfully computed.
+//
+// If unconverged is positive and wantt is true, then on return
+//  (initial H)*U = U*(final H),   (*)
+// where U is an orthogonal matrix. The final H is upper Hessenberg and
+// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
+//
+// If unconverged is positive and wantt is false, on return the remaining
+// unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
+// H[ilo:unconverged,ilo:unconverged].
+//
+// If unconverged is positive and wantz is true, then on return
+//  (final Z) = (initial Z)*U,
+// where U is the orthogonal matrix in (*) regardless of the value of wantt.
+//
+// Dlahqr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlahqr(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int) (unconverged int) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0, max(0, ihi) < ilo:
+		panic(badIlo)
+	case ihi >= n:
+		panic(badIhi)
+	case ldh < max(1, n):
+		panic(badLdH)
+	case wantz && (iloz < 0 || ilo < iloz):
+		panic(badIloz)
+	case wantz && (ihiz < ihi || n <= ihiz):
+		panic(badIhiz)
+	case ldz < 1, wantz && ldz < n:
+		panic(badLdZ)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(h) < (n-1)*ldh+n:
+		panic(shortH)
+	case len(wr) != ihi+1:
+		panic(shortWr)
+	case len(wi) != ihi+1:
+		panic(shortWi)
+	case wantz && len(z) < (n-1)*ldz+n:
+		panic(shortZ)
+	case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
+		panic(notIsolated)
+	}
+
+	if ilo == ihi {
+		wr[ilo] = h[ilo*ldh+ilo]
+		wi[ilo] = 0
+		return 0
+	}
+
+	// Clear out the trash.
+	for j := ilo; j < ihi-2; j++ {
+		h[(j+2)*ldh+j] = 0
+		h[(j+3)*ldh+j] = 0
+	}
+	if ilo <= ihi-2 {
+		h[ihi*ldh+ihi-2] = 0
+	}
+
+	nh := ihi - ilo + 1
+	nz := ihiz - iloz + 1
+
+	// Set machine-dependent constants for the stopping criterion.
+	ulp := dlamchP
+	smlnum := float64(nh) / ulp * dlamchS
+
+	// i1 and i2 are the indices of the first row and last column of H to
+	// which transformations must be applied. If eigenvalues only are being
+	// computed, i1 and i2 are set inside the main loop.
+	var i1, i2 int
+	if wantt {
+		i1 = 0
+		i2 = n - 1
+	}
+
+	itmax := 30 * max(10, nh) // Total number of QR iterations allowed.
+
+	// The main loop begins here. i is the loop index and decreases from ihi
+	// to ilo in steps of 1 or 2. Each iteration of the loop works with the
+	// active submatrix in rows and columns l to i. Eigenvalues i+1 to ihi
+	// have already converged. Either l = ilo or H[l,l-1] is negligible so
+	// that the matrix splits.
+	bi := blas64.Implementation()
+	i := ihi
+	for i >= ilo {
+		l := ilo
+
+		// Perform QR iterations on rows and columns ilo to i until a
+		// submatrix of order 1 or 2 splits off at the bottom because a
+		// subdiagonal element has become negligible.
+		converged := false
+		for its := 0; its <= itmax; its++ {
+			// Look for a single small subdiagonal element.
+			var k int
+			for k = i; k > l; k-- {
+				if math.Abs(h[k*ldh+k-1]) <= smlnum {
+					break
+				}
+				tst := math.Abs(h[(k-1)*ldh+k-1]) + math.Abs(h[k*ldh+k])
+				if tst == 0 {
+					if k-2 >= ilo {
+						tst += math.Abs(h[(k-1)*ldh+k-2])
+					}
+					if k+1 <= ihi {
+						tst += math.Abs(h[(k+1)*ldh+k])
+					}
+				}
+				// The following is a conservative small
+				// subdiagonal deflation criterion due to Ahues
+				// & Tisseur (LAWN 122, 1997). It has better
+				// mathematical foundation and improves accuracy
+				// in some cases.
+				if math.Abs(h[k*ldh+k-1]) <= ulp*tst {
+					ab := math.Max(math.Abs(h[k*ldh+k-1]), math.Abs(h[(k-1)*ldh+k]))
+					ba := math.Min(math.Abs(h[k*ldh+k-1]), math.Abs(h[(k-1)*ldh+k]))
+					aa := math.Max(math.Abs(h[k*ldh+k]), math.Abs(h[(k-1)*ldh+k-1]-h[k*ldh+k]))
+					bb := math.Min(math.Abs(h[k*ldh+k]), math.Abs(h[(k-1)*ldh+k-1]-h[k*ldh+k]))
+					s := aa + ab
+					if ab/s*ba <= math.Max(smlnum, aa/s*bb*ulp) {
+						break
+					}
+				}
+			}
+			l = k
+			if l > ilo {
+				// H[l,l-1] is negligible.
+				h[l*ldh+l-1] = 0
+			}
+			if l >= i-1 {
+				// Break the loop because a submatrix of order 1
+				// or 2 has split off.
+				converged = true
+				break
+			}
+
+			// Now the active submatrix is in rows and columns l to
+			// i. If eigenvalues only are being computed, only the
+			// active submatrix need be transformed.
+			if !wantt {
+				i1 = l
+				i2 = i
+			}
+
+			const (
+				dat1 = 3.0
+				dat2 = -0.4375
+			)
+			var h11, h21, h12, h22 float64
+			switch its {
+			case 10: // Exceptional shift.
+				s := math.Abs(h[(l+1)*ldh+l]) + math.Abs(h[(l+2)*ldh+l+1])
+				h11 = dat1*s + h[l*ldh+l]
+				h12 = dat2 * s
+				h21 = s
+				h22 = h11
+			case 20: // Exceptional shift.
+				s := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
+				h11 = dat1*s + h[i*ldh+i]
+				h12 = dat2 * s
+				h21 = s
+				h22 = h11
+			default: // Prepare to use Francis' double shift (i.e.,
+				// 2nd degree generalized Rayleigh quotient).
+				h11 = h[(i-1)*ldh+i-1]
+				h21 = h[i*ldh+i-1]
+				h12 = h[(i-1)*ldh+i]
+				h22 = h[i*ldh+i]
+			}
+			s := math.Abs(h11) + math.Abs(h12) + math.Abs(h21) + math.Abs(h22)
+			var (
+				rt1r, rt1i float64
+				rt2r, rt2i float64
+			)
+			if s != 0 {
+				h11 /= s
+				h21 /= s
+				h12 /= s
+				h22 /= s
+				tr := (h11 + h22) / 2
+				det := (h11-tr)*(h22-tr) - h12*h21
+				rtdisc := math.Sqrt(math.Abs(det))
+				if det >= 0 {
+					// Complex conjugate shifts.
+					rt1r = tr * s
+					rt2r = rt1r
+					rt1i = rtdisc * s
+					rt2i = -rt1i
+				} else {
+					// Real shifts (use only one of them).
+					rt1r = tr + rtdisc
+					rt2r = tr - rtdisc
+					if math.Abs(rt1r-h22) <= math.Abs(rt2r-h22) {
+						rt1r *= s
+						rt2r = rt1r
+					} else {
+						rt2r *= s
+						rt1r = rt2r
+					}
+					rt1i = 0
+					rt2i = 0
+				}
+			}
+
+			// Look for two consecutive small subdiagonal elements.
+			var m int
+			var v [3]float64
+			for m = i - 2; m >= l; m-- {
+				// Determine the effect of starting the
+				// double-shift QR iteration at row m, and see
+				// if this would make H[m,m-1] negligible. The
+				// following uses scaling to avoid overflows and
+				// most underflows.
+				h21s := h[(m+1)*ldh+m]
+				s := math.Abs(h[m*ldh+m]-rt2r) + math.Abs(rt2i) + math.Abs(h21s)
+				h21s /= s
+				v[0] = h21s*h[m*ldh+m+1] + (h[m*ldh+m]-rt1r)*((h[m*ldh+m]-rt2r)/s) - rt2i/s*rt1i
+				v[1] = h21s * (h[m*ldh+m] + h[(m+1)*ldh+m+1] - rt1r - rt2r)
+				v[2] = h21s * h[(m+2)*ldh+m+1]
+				s = math.Abs(v[0]) + math.Abs(v[1]) + math.Abs(v[2])
+				v[0] /= s
+				v[1] /= s
+				v[2] /= s
+				if m == l {
+					break
+				}
+				dsum := math.Abs(h[(m-1)*ldh+m-1]) + math.Abs(h[m*ldh+m]) + math.Abs(h[(m+1)*ldh+m+1])
+				if math.Abs(h[m*ldh+m-1])*(math.Abs(v[1])+math.Abs(v[2])) <= ulp*math.Abs(v[0])*dsum {
+					break
+				}
+			}
+
+			// Double-shift QR step.
+			for k := m; k < i; k++ {
+				// The first iteration of this loop determines a
+				// reflection G from the vector V and applies it
+				// from left and right to H, thus creating a
+				// non-zero bulge below the subdiagonal.
+				//
+				// Each subsequent iteration determines a
+				// reflection G to restore the Hessenberg form
+				// in the (k-1)th column, and thus chases the
+				// bulge one step toward the bottom of the
+				// active submatrix. nr is the order of G.
+
+				nr := min(3, i-k+1)
+				if k > m {
+					bi.Dcopy(nr, h[k*ldh+k-1:], ldh, v[:], 1)
+				}
+				var t0 float64
+				v[0], t0 = impl.Dlarfg(nr, v[0], v[1:], 1)
+				if k > m {
+					h[k*ldh+k-1] = v[0]
+					h[(k+1)*ldh+k-1] = 0
+					if k < i-1 {
+						h[(k+2)*ldh+k-1] = 0
+					}
+				} else if m > l {
+					// Use the following instead of H[k,k-1] = -H[k,k-1]
+					// to avoid a bug when v[1] and v[2] underflow.
+					h[k*ldh+k-1] *= 1 - t0
+				}
+				t1 := t0 * v[1]
+				if nr == 3 {
+					t2 := t0 * v[2]
+
+					// Apply G from the left to transform
+					// the rows of the matrix in columns k
+					// to i2.
+					for j := k; j <= i2; j++ {
+						sum := h[k*ldh+j] + v[1]*h[(k+1)*ldh+j] + v[2]*h[(k+2)*ldh+j]
+						h[k*ldh+j] -= sum * t0
+						h[(k+1)*ldh+j] -= sum * t1
+						h[(k+2)*ldh+j] -= sum * t2
+					}
+
+					// Apply G from the right to transform
+					// the columns of the matrix in rows i1
+					// to min(k+3,i).
+					for j := i1; j <= min(k+3, i); j++ {
+						sum := h[j*ldh+k] + v[1]*h[j*ldh+k+1] + v[2]*h[j*ldh+k+2]
+						h[j*ldh+k] -= sum * t0
+						h[j*ldh+k+1] -= sum * t1
+						h[j*ldh+k+2] -= sum * t2
+					}
+
+					if wantz {
+						// Accumulate transformations in the matrix Z.
+						for j := iloz; j <= ihiz; j++ {
+							sum := z[j*ldz+k] + v[1]*z[j*ldz+k+1] + v[2]*z[j*ldz+k+2]
+							z[j*ldz+k] -= sum * t0
+							z[j*ldz+k+1] -= sum * t1
+							z[j*ldz+k+2] -= sum * t2
+						}
+					}
+				} else if nr == 2 {
+					// Apply G from the left to transform
+					// the rows of the matrix in columns k
+					// to i2.
+					for j := k; j <= i2; j++ {
+						sum := h[k*ldh+j] + v[1]*h[(k+1)*ldh+j]
+						h[k*ldh+j] -= sum * t0
+						h[(k+1)*ldh+j] -= sum * t1
+					}
+
+					// Apply G from the right to transform
+					// the columns of the matrix in rows i1
+					// to min(k+3,i).
+					for j := i1; j <= i; j++ {
+						sum := h[j*ldh+k] + v[1]*h[j*ldh+k+1]
+						h[j*ldh+k] -= sum * t0
+						h[j*ldh+k+1] -= sum * t1
+					}
+
+					if wantz {
+						// Accumulate transformations in the matrix Z.
+						for j := iloz; j <= ihiz; j++ {
+							sum := z[j*ldz+k] + v[1]*z[j*ldz+k+1]
+							z[j*ldz+k] -= sum * t0
+							z[j*ldz+k+1] -= sum * t1
+						}
+					}
+				}
+			}
+		}
+
+		if !converged {
+			// The QR iteration finished without splitting off a
+			// submatrix of order 1 or 2.
+			return i + 1
+		}
+
+		if l == i {
+			// H[i,i-1] is negligible: one eigenvalue has converged.
+			wr[i] = h[i*ldh+i]
+			wi[i] = 0
+		} else if l == i-1 {
+			// H[i-1,i-2] is negligible: a pair of eigenvalues have converged.
+
+			// Transform the 2×2 submatrix to standard Schur form,
+			// and compute and store the eigenvalues.
+			var cs, sn float64
+			a, b := h[(i-1)*ldh+i-1], h[(i-1)*ldh+i]
+			c, d := h[i*ldh+i-1], h[i*ldh+i]
+			a, b, c, d, wr[i-1], wi[i-1], wr[i], wi[i], cs, sn = impl.Dlanv2(a, b, c, d)
+			h[(i-1)*ldh+i-1], h[(i-1)*ldh+i] = a, b
+			h[i*ldh+i-1], h[i*ldh+i] = c, d
+
+			if wantt {
+				// Apply the transformation to the rest of H.
+				if i2 > i {
+					bi.Drot(i2-i, h[(i-1)*ldh+i+1:], 1, h[i*ldh+i+1:], 1, cs, sn)
+				}
+				bi.Drot(i-i1-1, h[i1*ldh+i-1:], ldh, h[i1*ldh+i:], ldh, cs, sn)
+			}
+
+			if wantz {
+				// Apply the transformation to Z.
+				bi.Drot(nz, z[iloz*ldz+i-1:], ldz, z[iloz*ldz+i:], ldz, cs, sn)
+			}
+		}
+
+		// Return to start of the main loop with new value of i.
+		i = l - 1
+	}
+	return 0
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlahr2.go
@ -0,0 +1,195 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlahr2 reduces the first nb columns of a real general n×(n-k+1) matrix A so
+// that elements below the k-th subdiagonal are zero. The reduction is performed
+// by an orthogonal similarity transformation Q^T * A * Q. Dlahr2 returns the
+// matrices V and T which determine Q as a block reflector I - V*T*V^T, and
+// also the matrix Y = A * V * T.
+//
+// The matrix Q is represented as a product of nb elementary reflectors
+//  Q = H_0 * H_1 * ... * H_{nb-1}.
+// Each H_i has the form
+//  H_i = I - tau[i] * v * v^T,
+// where v is a real vector with v[0:i+k-1] = 0 and v[i+k-1] = 1. v[i+k:n] is
+// stored on exit in A[i+k+1:n,i].
+//
+// The elements of the vectors v together form the (n-k+1)×nb matrix
+// V which is needed, with T and Y, to apply the transformation to the
+// unreduced part of the matrix, using an update of the form
+//  A = (I - V*T*V^T) * (A - Y*V^T).
+//
+// On entry, a contains the n×(n-k+1) general matrix A. On return, the elements
+// on and above the k-th subdiagonal in the first nb columns are overwritten
+// with the corresponding elements of the reduced matrix; the elements below the
+// k-th subdiagonal, with the slice tau, represent the matrix Q as a product of
+// elementary reflectors. The other columns of A are unchanged.
+//
+// The contents of A on exit are illustrated by the following example
+// with n = 7, k = 3 and nb = 2:
+//  [ a   a   a   a   a ]
+//  [ a   a   a   a   a ]
+//  [ a   a   a   a   a ]
+//  [ h   h   a   a   a ]
+//  [ v0  h   a   a   a ]
+//  [ v0  v1  a   a   a ]
+//  [ v0  v1  a   a   a ]
+// where a denotes an element of the original matrix A, h denotes a
+// modified element of the upper Hessenberg matrix H, and vi denotes an
+// element of the vector defining H_i.
+//
+// k is the offset for the reduction. Elements below the k-th subdiagonal in the
+// first nb columns are reduced to zero.
+//
+// nb is the number of columns to be reduced.
+//
+// On entry, a represents the n×(n-k+1) matrix A. On return, the elements on and
+// above the k-th subdiagonal in the first nb columns are overwritten with the
+// corresponding elements of the reduced matrix. The elements below the k-th
+// subdiagonal, with the slice tau, represent the matrix Q as a product of
+// elementary reflectors. The other columns of A are unchanged.
+//
+// tau will contain the scalar factors of the elementary reflectors. It must
+// have length at least nb.
+//
+// t and ldt represent the nb×nb upper triangular matrix T, and y and ldy
+// represent the n×nb matrix Y.
+//
+// Dlahr2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlahr2(n, k, nb int, a []float64, lda int, tau, t []float64, ldt int, y []float64, ldy int) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case nb < 0:
+		panic(nbLT0)
+	case nb > n:
+		panic(nbGTN)
+	case lda < max(1, n-k+1):
+		panic(badLdA)
+	case ldt < max(1, nb):
+		panic(badLdT)
+	case ldy < max(1, nb):
+		panic(badLdY)
+	}
+
+	// Quick return if possible.
+	if n < 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n-k+1:
+		panic(shortA)
+	case len(tau) < nb:
+		panic(shortTau)
+	case len(t) < (nb-1)*ldt+nb:
+		panic(shortT)
+	case len(y) < (n-1)*ldy+nb:
+		panic(shortY)
+	}
+
+	// Quick return if possible.
+	if n == 1 {
+		return
+	}
+
+	bi := blas64.Implementation()
+	var ei float64
+	for i := 0; i < nb; i++ {
+		if i > 0 {
+			// Update A[k:n,i].
+
+			// Update i-th column of A - Y * V^T.
+			bi.Dgemv(blas.NoTrans, n-k, i,
+				-1, y[k*ldy:], ldy,
+				a[(k+i-1)*lda:], 1,
+				1, a[k*lda+i:], lda)
+
+			// Apply I - V * T^T * V^T to this column (call it b)
+			// from the left, using the last column of T as
+			// workspace.
+			// Let V = [ V1 ]   and   b = [ b1 ]   (first i rows)
+			//         [ V2 ]             [ b2 ]
+			// where V1 is unit lower triangular.
+			//
+			// w := V1^T * b1.
+			bi.Dcopy(i, a[k*lda+i:], lda, t[nb-1:], ldt)
+			bi.Dtrmv(blas.Lower, blas.Trans, blas.Unit, i,
+				a[k*lda:], lda, t[nb-1:], ldt)
+
+			// w := w + V2^T * b2.
+			bi.Dgemv(blas.Trans, n-k-i, i,
+				1, a[(k+i)*lda:], lda,
+				a[(k+i)*lda+i:], lda,
+				1, t[nb-1:], ldt)
+
+			// w := T^T * w.
+			bi.Dtrmv(blas.Upper, blas.Trans, blas.NonUnit, i,
+				t, ldt, t[nb-1:], ldt)
+
+			// b2 := b2 - V2*w.
+			bi.Dgemv(blas.NoTrans, n-k-i, i,
+				-1, a[(k+i)*lda:], lda,
+				t[nb-1:], ldt,
+				1, a[(k+i)*lda+i:], lda)
+
+			// b1 := b1 - V1*w.
+			bi.Dtrmv(blas.Lower, blas.NoTrans, blas.Unit, i,
+				a[k*lda:], lda, t[nb-1:], ldt)
+			bi.Daxpy(i, -1, t[nb-1:], ldt, a[k*lda+i:], lda)
+
+			a[(k+i-1)*lda+i-1] = ei
+		}
+
+		// Generate the elementary reflector H_i to annihilate
+		// A[k+i+1:n,i].
+		ei, tau[i] = impl.Dlarfg(n-k-i, a[(k+i)*lda+i], a[min(k+i+1, n-1)*lda+i:], lda)
+		a[(k+i)*lda+i] = 1
+
+		// Compute Y[k:n,i].
+		bi.Dgemv(blas.NoTrans, n-k, n-k-i,
+			1, a[k*lda+i+1:], lda,
+			a[(k+i)*lda+i:], lda,
+			0, y[k*ldy+i:], ldy)
+		bi.Dgemv(blas.Trans, n-k-i, i,
+			1, a[(k+i)*lda:], lda,
+			a[(k+i)*lda+i:], lda,
+			0, t[i:], ldt)
+		bi.Dgemv(blas.NoTrans, n-k, i,
+			-1, y[k*ldy:], ldy,
+			t[i:], ldt,
+			1, y[k*ldy+i:], ldy)
+		bi.Dscal(n-k, tau[i], y[k*ldy+i:], ldy)
+
+		// Compute T[0:i,i].
+		bi.Dscal(i, -tau[i], t[i:], ldt)
+		bi.Dtrmv(blas.Upper, blas.NoTrans, blas.NonUnit, i,
+			t, ldt, t[i:], ldt)
+
+		t[i*ldt+i] = tau[i]
+	}
+	a[(k+nb-1)*lda+nb-1] = ei
+
+	// Compute Y[0:k,0:nb].
+	impl.Dlacpy(blas.All, k, nb, a[1:], lda, y, ldy)
+	bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, k, nb,
+		1, a[k*lda:], lda, y, ldy)
+	if n > k+nb {
+		bi.Dgemm(blas.NoTrans, blas.NoTrans, k, nb, n-k-nb,
+			1, a[1+nb:], lda,
+			a[(k+nb)*lda:], lda,
+			1, y, ldy)
+	}
+	bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.NonUnit, k, nb,
+		1, t, ldt, y, ldy)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaln2.go
@ -0,0 +1,405 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlaln2 solves a linear equation or a system of 2 linear equations of the form
+//  (ca A   - w D) X = scale B,  if trans == false,
+//  (ca A^T - w D) X = scale B,  if trans == true,
+// where A is a na×na real matrix, ca is a real scalar, D is a na×na diagonal
+// real matrix, w is a scalar, real if nw == 1, complex if nw == 2, and X and B
+// are na×1 matrices, real if w is real, complex if w is complex.
+//
+// If w is complex, X and B are represented as na×2 matrices, the first column
+// of each being the real part and the second being the imaginary part.
+//
+// na and nw must be 1 or 2, otherwise Dlaln2 will panic.
+//
+// d1 and d2 are the diagonal elements of D. d2 is not used if na == 1.
+//
+// wr and wi represent the real and imaginary part, respectively, of the scalar
+// w. wi is not used if nw == 1.
+//
+// smin is the desired lower bound on the singular values of A. This should be
+// a safe distance away from underflow or overflow, say, between
+// (underflow/machine precision) and (overflow*machine precision).
+//
+// If both singular values of (ca A - w D) are less than smin, smin*identity
+// will be used instead of (ca A - w D). If only one singular value is less than
+// smin, one element of (ca A - w D) will be perturbed enough to make the
+// smallest singular value roughly smin. If both singular values are at least
+// smin, (ca A - w D) will not be perturbed. In any case, the perturbation will
+// be at most some small multiple of max(smin, ulp*norm(ca A - w D)). The
+// singular values are computed by infinity-norm approximations, and thus will
+// only be correct to a factor of 2 or so.
+//
+// All input quantities are assumed to be smaller than overflow by a reasonable
+// factor.
+//
+// scale is a scaling factor less than or equal to 1 which is chosen so that X
+// can be computed without overflow. X is further scaled if necessary to assure
+// that norm(ca A - w D)*norm(X) is less than overflow.
+//
+// xnorm contains the infinity-norm of X when X is regarded as a na×nw real
+// matrix.
+//
+// ok will be false if (ca A - w D) had to be perturbed to make its smallest
+// singular value greater than smin, otherwise ok will be true.
+//
+// Dlaln2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaln2(trans bool, na, nw int, smin, ca float64, a []float64, lda int, d1, d2 float64, b []float64, ldb int, wr, wi float64, x []float64, ldx int) (scale, xnorm float64, ok bool) {
+	// TODO(vladimir-ch): Consider splitting this function into two, one
+	// handling the real case (nw == 1) and the other handling the complex
+	// case (nw == 2). Given that Go has complex types, their signatures
+	// would be simpler and more natural, and the implementation not as
+	// convoluted.
+
+	switch {
+	case na != 1 && na != 2:
+		panic(badNa)
+	case nw != 1 && nw != 2:
+		panic(badNw)
+	case lda < na:
+		panic(badLdA)
+	case len(a) < (na-1)*lda+na:
+		panic(shortA)
+	case ldb < nw:
+		panic(badLdB)
+	case len(b) < (na-1)*ldb+nw:
+		panic(shortB)
+	case ldx < nw:
+		panic(badLdX)
+	case len(x) < (na-1)*ldx+nw:
+		panic(shortX)
+	}
+
+	smlnum := 2 * dlamchS
+	bignum := 1 / smlnum
+	smini := math.Max(smin, smlnum)
+
+	ok = true
+	scale = 1
+
+	if na == 1 {
+		// 1×1 (i.e., scalar) system C X = B.
+
+		if nw == 1 {
+			// Real 1×1 system.
+
+			// C = ca A - w D.
+			csr := ca*a[0] - wr*d1
+			cnorm := math.Abs(csr)
+
+			// If |C| < smini, use C = smini.
+			if cnorm < smini {
+				csr = smini
+				cnorm = smini
+				ok = false
+			}
+
+			// Check scaling for X = B / C.
+			bnorm := math.Abs(b[0])
+			if cnorm < 1 && bnorm > math.Max(1, bignum*cnorm) {
+				scale = 1 / bnorm
+			}
+
+			// Compute X.
+			x[0] = b[0] * scale / csr
+			xnorm = math.Abs(x[0])
+
+			return scale, xnorm, ok
+		}
+
+		// Complex 1×1 system (w is complex).
+
+		// C = ca A - w D.
+		csr := ca*a[0] - wr*d1
+		csi := -wi * d1
+		cnorm := math.Abs(csr) + math.Abs(csi)
+
+		// If |C| < smini, use C = smini.
+		if cnorm < smini {
+			csr = smini
+			csi = 0
+			cnorm = smini
+			ok = false
+		}
+
+		// Check scaling for X = B / C.
+		bnorm := math.Abs(b[0]) + math.Abs(b[1])
+		if cnorm < 1 && bnorm > math.Max(1, bignum*cnorm) {
+			scale = 1 / bnorm
+		}
+
+		// Compute X.
+		cx := complex(scale*b[0], scale*b[1]) / complex(csr, csi)
+		x[0], x[1] = real(cx), imag(cx)
+		xnorm = math.Abs(x[0]) + math.Abs(x[1])
+
+		return scale, xnorm, ok
+	}
+
+	// 2×2 system.
+
+	// Compute the real part of
+	//  C = ca A   - w D
+	// or
+	//  C = ca A^T - w D.
+	crv := [4]float64{
+		ca*a[0] - wr*d1,
+		ca * a[1],
+		ca * a[lda],
+		ca*a[lda+1] - wr*d2,
+	}
+	if trans {
+		crv[1] = ca * a[lda]
+		crv[2] = ca * a[1]
+	}
+
+	pivot := [4][4]int{
+		{0, 1, 2, 3},
+		{1, 0, 3, 2},
+		{2, 3, 0, 1},
+		{3, 2, 1, 0},
+	}
+
+	if nw == 1 {
+		// Real 2×2 system (w is real).
+
+		// Find the largest element in C.
+		var cmax float64
+		var icmax int
+		for j, v := range crv {
+			v = math.Abs(v)
+			if v > cmax {
+				cmax = v
+				icmax = j
+			}
+		}
+
+		// If norm(C) < smini, use smini*identity.
+		if cmax < smini {
+			bnorm := math.Max(math.Abs(b[0]), math.Abs(b[ldb]))
+			if smini < 1 && bnorm > math.Max(1, bignum*smini) {
+				scale = 1 / bnorm
+			}
+			temp := scale / smini
+			x[0] = temp * b[0]
+			x[ldx] = temp * b[ldb]
+			xnorm = temp * bnorm
+			ok = false
+
+			return scale, xnorm, ok
+		}
+
+		// Gaussian elimination with complete pivoting.
+		// Form upper triangular matrix
+		//  [ur11 ur12]
+		//  [   0 ur22]
+		ur11 := crv[icmax]
+		ur12 := crv[pivot[icmax][1]]
+		cr21 := crv[pivot[icmax][2]]
+		cr22 := crv[pivot[icmax][3]]
+		ur11r := 1 / ur11
+		lr21 := ur11r * cr21
+		ur22 := cr22 - ur12*lr21
+
+		// If smaller pivot < smini, use smini.
+		if math.Abs(ur22) < smini {
+			ur22 = smini
+			ok = false
+		}
+
+		var br1, br2 float64
+		if icmax > 1 {
+			// If the pivot lies in the second row, swap the rows.
+			br1 = b[ldb]
+			br2 = b[0]
+		} else {
+			br1 = b[0]
+			br2 = b[ldb]
+		}
+		br2 -= lr21 * br1 // Apply the Gaussian elimination step to the right-hand side.
+
+		bbnd := math.Max(math.Abs(ur22*ur11r*br1), math.Abs(br2))
+		if bbnd > 1 && math.Abs(ur22) < 1 && bbnd >= bignum*math.Abs(ur22) {
+			scale = 1 / bbnd
+		}
+
+		// Solve the linear system ur*xr=br.
+		xr2 := br2 * scale / ur22
+		xr1 := scale*br1*ur11r - ur11r*ur12*xr2
+		if icmax&0x1 != 0 {
+			// If the pivot lies in the second column, swap the components of the solution.
+			x[0] = xr2
+			x[ldx] = xr1
+		} else {
+			x[0] = xr1
+			x[ldx] = xr2
+		}
+		xnorm = math.Max(math.Abs(xr1), math.Abs(xr2))
+
+		// Further scaling if norm(A)*norm(X) > overflow.
+		if xnorm > 1 && cmax > 1 && xnorm > bignum/cmax {
+			temp := cmax / bignum
+			x[0] *= temp
+			x[ldx] *= temp
+			xnorm *= temp
+			scale *= temp
+		}
+
+		return scale, xnorm, ok
+	}
+
+	// Complex 2×2 system (w is complex).
+
+	// Find the largest element in C.
+	civ := [4]float64{
+		-wi * d1,
+		0,
+		0,
+		-wi * d2,
+	}
+	var cmax float64
+	var icmax int
+	for j, v := range crv {
+		v := math.Abs(v)
+		if v+math.Abs(civ[j]) > cmax {
+			cmax = v + math.Abs(civ[j])
+			icmax = j
+		}
+	}
+
+	// If norm(C) < smini, use smini*identity.
+	if cmax < smini {
+		br1 := math.Abs(b[0]) + math.Abs(b[1])
+		br2 := math.Abs(b[ldb]) + math.Abs(b[ldb+1])
+		bnorm := math.Max(br1, br2)
+		if smini < 1 && bnorm > 1 && bnorm > bignum*smini {
+			scale = 1 / bnorm
+		}
+		temp := scale / smini
+		x[0] = temp * b[0]
+		x[1] = temp * b[1]
+		x[ldb] = temp * b[ldb]
+		x[ldb+1] = temp * b[ldb+1]
+		xnorm = temp * bnorm
+		ok = false
+
+		return scale, xnorm, ok
+	}
+
+	// Gaussian elimination with complete pivoting.
+	ur11 := crv[icmax]
+	ui11 := civ[icmax]
+	ur12 := crv[pivot[icmax][1]]
+	ui12 := civ[pivot[icmax][1]]
+	cr21 := crv[pivot[icmax][2]]
+	ci21 := civ[pivot[icmax][2]]
+	cr22 := crv[pivot[icmax][3]]
+	ci22 := civ[pivot[icmax][3]]
+	var (
+		ur11r, ui11r float64
+		lr21, li21   float64
+		ur12s, ui12s float64
+		ur22, ui22   float64
+	)
+	if icmax == 0 || icmax == 3 {
+		// Off-diagonals of pivoted C are real.
+		if math.Abs(ur11) > math.Abs(ui11) {
+			temp := ui11 / ur11
+			ur11r = 1 / (ur11 * (1 + temp*temp))
+			ui11r = -temp * ur11r
+		} else {
+			temp := ur11 / ui11
+			ui11r = -1 / (ui11 * (1 + temp*temp))
+			ur11r = -temp * ui11r
+		}
+		lr21 = cr21 * ur11r
+		li21 = cr21 * ui11r
+		ur12s = ur12 * ur11r
+		ui12s = ur12 * ui11r
+		ur22 = cr22 - ur12*lr21
+		ui22 = ci22 - ur12*li21
+	} else {
+		// Diagonals of pivoted C are real.
+		ur11r = 1 / ur11
+		// ui11r is already 0.
+		lr21 = cr21 * ur11r
+		li21 = ci21 * ur11r
+		ur12s = ur12 * ur11r
+		ui12s = ui12 * ur11r
+		ur22 = cr22 - ur12*lr21 + ui12*li21
+		ui22 = -ur12*li21 - ui12*lr21
+	}
+	u22abs := math.Abs(ur22) + math.Abs(ui22)
+
+	// If smaller pivot < smini, use smini.
+	if u22abs < smini {
+		ur22 = smini
+		ui22 = 0
+		ok = false
+	}
+
+	var br1, bi1 float64
+	var br2, bi2 float64
+	if icmax > 1 {
+		// If the pivot lies in the second row, swap the rows.
+		br1 = b[ldb]
+		bi1 = b[ldb+1]
+		br2 = b[0]
+		bi2 = b[1]
+	} else {
+		br1 = b[0]
+		bi1 = b[1]
+		br2 = b[ldb]
+		bi2 = b[ldb+1]
+	}
+	br2 += -lr21*br1 + li21*bi1
+	bi2 += -li21*br1 - lr21*bi1
+
+	bbnd1 := u22abs * (math.Abs(ur11r) + math.Abs(ui11r)) * (math.Abs(br1) + math.Abs(bi1))
+	bbnd2 := math.Abs(br2) + math.Abs(bi2)
+	bbnd := math.Max(bbnd1, bbnd2)
+	if bbnd > 1 && u22abs < 1 && bbnd >= bignum*u22abs {
+		scale = 1 / bbnd
+		br1 *= scale
+		bi1 *= scale
+		br2 *= scale
+		bi2 *= scale
+	}
+
+	cx2 := complex(br2, bi2) / complex(ur22, ui22)
+	xr2, xi2 := real(cx2), imag(cx2)
+	xr1 := ur11r*br1 - ui11r*bi1 - ur12s*xr2 + ui12s*xi2
+	xi1 := ui11r*br1 + ur11r*bi1 - ui12s*xr2 - ur12s*xi2
+	if icmax&0x1 != 0 {
+		// If the pivot lies in the second column, swap the components of the solution.
+		x[0] = xr2
+		x[1] = xi2
+		x[ldx] = xr1
+		x[ldx+1] = xi1
+	} else {
+		x[0] = xr1
+		x[1] = xi1
+		x[ldx] = xr2
+		x[ldx+1] = xi2
+	}
+	xnorm = math.Max(math.Abs(xr1)+math.Abs(xi1), math.Abs(xr2)+math.Abs(xi2))
+
+	// Further scaling if norm(A)*norm(X) > overflow.
+	if xnorm > 1 && cmax > 1 && xnorm > bignum/cmax {
+		temp := cmax / bignum
+		x[0] *= temp
+		x[1] *= temp
+		x[ldx] *= temp
+		x[ldx+1] *= temp
+		xnorm *= temp
+		scale *= temp
+	}
+
+	return scale, xnorm, ok
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlange.go
@ -0,0 +1,89 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlange computes the matrix norm of the general m×n matrix a. The input norm
+// specifies the norm computed.
+//  lapack.MaxAbs: the maximum absolute value of an element.
+//  lapack.MaxColumnSum: the maximum column sum of the absolute values of the entries.
+//  lapack.MaxRowSum: the maximum row sum of the absolute values of the entries.
+//  lapack.Frobenius: the square root of the sum of the squares of the entries.
+// If norm == lapack.MaxColumnSum, work must be of length n, and this function will panic otherwise.
+// There are no restrictions on work for the other matrix norms.
+func (impl Implementation) Dlange(norm lapack.MatrixNorm, m, n int, a []float64, lda int, work []float64) float64 {
+	// TODO(btracey): These should probably be refactored to use BLAS calls.
+	switch {
+	case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs:
+		panic(badNorm)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(badLdA)
+	case norm == lapack.MaxColumnSum && len(work) < n:
+		panic(shortWork)
+	}
+
+	if norm == lapack.MaxAbs {
+		var value float64
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				value = math.Max(value, math.Abs(a[i*lda+j]))
+			}
+		}
+		return value
+	}
+	if norm == lapack.MaxColumnSum {
+		if len(work) < n {
+			panic(shortWork)
+		}
+		for i := 0; i < n; i++ {
+			work[i] = 0
+		}
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				work[j] += math.Abs(a[i*lda+j])
+			}
+		}
+		var value float64
+		for i := 0; i < n; i++ {
+			value = math.Max(value, work[i])
+		}
+		return value
+	}
+	if norm == lapack.MaxRowSum {
+		var value float64
+		for i := 0; i < m; i++ {
+			var sum float64
+			for j := 0; j < n; j++ {
+				sum += math.Abs(a[i*lda+j])
+			}
+			value = math.Max(value, sum)
+		}
+		return value
+	}
+	// norm == lapack.Frobenius
+	var value float64
+	scale := 0.0
+	sum := 1.0
+	for i := 0; i < m; i++ {
+		scale, sum = impl.Dlassq(n, a[i*lda:], 1, scale, sum)
+	}
+	value = scale * math.Sqrt(sum)
+	return value
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanst.go
@ -0,0 +1,75 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlanst computes the specified norm of a symmetric tridiagonal matrix A.
+// The diagonal elements of A are stored in d and the off-diagonal elements
+// are stored in e.
+func (impl Implementation) Dlanst(norm lapack.MatrixNorm, n int, d, e []float64) float64 {
+	switch {
+	case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs:
+		panic(badNorm)
+	case n < 0:
+		panic(nLT0)
+	}
+	if n == 0 {
+		return 0
+	}
+	switch {
+	case len(d) < n:
+		panic(shortD)
+	case len(e) < n-1:
+		panic(shortE)
+	}
+
+	switch norm {
+	default:
+		panic(badNorm)
+	case lapack.MaxAbs:
+		anorm := math.Abs(d[n-1])
+		for i := 0; i < n-1; i++ {
+			sum := math.Abs(d[i])
+			if anorm < sum || math.IsNaN(sum) {
+				anorm = sum
+			}
+			sum = math.Abs(e[i])
+			if anorm < sum || math.IsNaN(sum) {
+				anorm = sum
+			}
+		}
+		return anorm
+	case lapack.MaxColumnSum, lapack.MaxRowSum:
+		if n == 1 {
+			return math.Abs(d[0])
+		}
+		anorm := math.Abs(d[0]) + math.Abs(e[0])
+		sum := math.Abs(e[n-2]) + math.Abs(d[n-1])
+		if anorm < sum || math.IsNaN(sum) {
+			anorm = sum
+		}
+		for i := 1; i < n-1; i++ {
+			sum := math.Abs(d[i]) + math.Abs(e[i]) + math.Abs(e[i-1])
+			if anorm < sum || math.IsNaN(sum) {
+				anorm = sum
+			}
+		}
+		return anorm
+	case lapack.Frobenius:
+		var scale float64
+		sum := 1.0
+		if n > 1 {
+			scale, sum = impl.Dlassq(n-1, e, 1, scale, sum)
+			sum = 2 * sum
+		}
+		scale, sum = impl.Dlassq(n, d, 1, scale, sum)
+		return scale * math.Sqrt(sum)
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlansy.go
@ -0,0 +1,132 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlansy computes the specified norm of an n×n symmetric matrix. If
+// norm == lapack.MaxColumnSum or norm == lapackMaxRowSum work must have length
+// at least n, otherwise work is unused.
+func (impl Implementation) Dlansy(norm lapack.MatrixNorm, uplo blas.Uplo, n int, a []float64, lda int, work []float64) float64 {
+	switch {
+	case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs:
+		panic(badNorm)
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case (norm == lapack.MaxColumnSum || norm == lapack.MaxRowSum) && len(work) < n:
+		panic(shortWork)
+	}
+
+	switch norm {
+	default:
+		panic(badNorm)
+	case lapack.MaxAbs:
+		if uplo == blas.Upper {
+			var max float64
+			for i := 0; i < n; i++ {
+				for j := i; j < n; j++ {
+					v := math.Abs(a[i*lda+j])
+					if math.IsNaN(v) {
+						return math.NaN()
+					}
+					if v > max {
+						max = v
+					}
+				}
+			}
+			return max
+		}
+		var max float64
+		for i := 0; i < n; i++ {
+			for j := 0; j <= i; j++ {
+				v := math.Abs(a[i*lda+j])
+				if math.IsNaN(v) {
+					return math.NaN()
+				}
+				if v > max {
+					max = v
+				}
+			}
+		}
+		return max
+	case lapack.MaxRowSum, lapack.MaxColumnSum:
+		// A symmetric matrix has the same 1-norm and ∞-norm.
+		for i := 0; i < n; i++ {
+			work[i] = 0
+		}
+		if uplo == blas.Upper {
+			for i := 0; i < n; i++ {
+				work[i] += math.Abs(a[i*lda+i])
+				for j := i + 1; j < n; j++ {
+					v := math.Abs(a[i*lda+j])
+					work[i] += v
+					work[j] += v
+				}
+			}
+		} else {
+			for i := 0; i < n; i++ {
+				for j := 0; j < i; j++ {
+					v := math.Abs(a[i*lda+j])
+					work[i] += v
+					work[j] += v
+				}
+				work[i] += math.Abs(a[i*lda+i])
+			}
+		}
+		var max float64
+		for i := 0; i < n; i++ {
+			v := work[i]
+			if math.IsNaN(v) {
+				return math.NaN()
+			}
+			if v > max {
+				max = v
+			}
+		}
+		return max
+	case lapack.Frobenius:
+		if uplo == blas.Upper {
+			var sum float64
+			for i := 0; i < n; i++ {
+				v := a[i*lda+i]
+				sum += v * v
+				for j := i + 1; j < n; j++ {
+					v := a[i*lda+j]
+					sum += 2 * v * v
+				}
+			}
+			return math.Sqrt(sum)
+		}
+		var sum float64
+		for i := 0; i < n; i++ {
+			for j := 0; j < i; j++ {
+				v := a[i*lda+j]
+				sum += 2 * v * v
+			}
+			v := a[i*lda+i]
+			sum += v * v
+		}
+		return math.Sqrt(sum)
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlantr.go
@ -0,0 +1,260 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlantr computes the specified norm of an m×n trapezoidal matrix A. If
+// norm == lapack.MaxColumnSum work must have length at least n, otherwise work
+// is unused.
+func (impl Implementation) Dlantr(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, m, n int, a []float64, lda int, work []float64) float64 {
+	switch {
+	case norm != lapack.MaxRowSum && norm != lapack.MaxColumnSum && norm != lapack.Frobenius && norm != lapack.MaxAbs:
+		panic(badNorm)
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case diag != blas.Unit && diag != blas.NonUnit:
+		panic(badDiag)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	minmn := min(m, n)
+	if minmn == 0 {
+		return 0
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case norm == lapack.MaxColumnSum && len(work) < n:
+		panic(shortWork)
+	}
+
+	switch norm {
+	default:
+		panic(badNorm)
+	case lapack.MaxAbs:
+		if diag == blas.Unit {
+			value := 1.0
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					for j := i + 1; j < n; j++ {
+						tmp := math.Abs(a[i*lda+j])
+						if math.IsNaN(tmp) {
+							return tmp
+						}
+						if tmp > value {
+							value = tmp
+						}
+					}
+				}
+				return value
+			}
+			for i := 1; i < m; i++ {
+				for j := 0; j < min(i, n); j++ {
+					tmp := math.Abs(a[i*lda+j])
+					if math.IsNaN(tmp) {
+						return tmp
+					}
+					if tmp > value {
+						value = tmp
+					}
+				}
+			}
+			return value
+		}
+		var value float64
+		if uplo == blas.Upper {
+			for i := 0; i < m; i++ {
+				for j := i; j < n; j++ {
+					tmp := math.Abs(a[i*lda+j])
+					if math.IsNaN(tmp) {
+						return tmp
+					}
+					if tmp > value {
+						value = tmp
+					}
+				}
+			}
+			return value
+		}
+		for i := 0; i < m; i++ {
+			for j := 0; j <= min(i, n-1); j++ {
+				tmp := math.Abs(a[i*lda+j])
+				if math.IsNaN(tmp) {
+					return tmp
+				}
+				if tmp > value {
+					value = tmp
+				}
+			}
+		}
+		return value
+	case lapack.MaxColumnSum:
+		if diag == blas.Unit {
+			for i := 0; i < minmn; i++ {
+				work[i] = 1
+			}
+			for i := minmn; i < n; i++ {
+				work[i] = 0
+			}
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					for j := i + 1; j < n; j++ {
+						work[j] += math.Abs(a[i*lda+j])
+					}
+				}
+			} else {
+				for i := 1; i < m; i++ {
+					for j := 0; j < min(i, n); j++ {
+						work[j] += math.Abs(a[i*lda+j])
+					}
+				}
+			}
+		} else {
+			for i := 0; i < n; i++ {
+				work[i] = 0
+			}
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					for j := i; j < n; j++ {
+						work[j] += math.Abs(a[i*lda+j])
+					}
+				}
+			} else {
+				for i := 0; i < m; i++ {
+					for j := 0; j <= min(i, n-1); j++ {
+						work[j] += math.Abs(a[i*lda+j])
+					}
+				}
+			}
+		}
+		var max float64
+		for _, v := range work[:n] {
+			if math.IsNaN(v) {
+				return math.NaN()
+			}
+			if v > max {
+				max = v
+			}
+		}
+		return max
+	case lapack.MaxRowSum:
+		var maxsum float64
+		if diag == blas.Unit {
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					var sum float64
+					if i < minmn {
+						sum = 1
+					}
+					for j := i + 1; j < n; j++ {
+						sum += math.Abs(a[i*lda+j])
+					}
+					if math.IsNaN(sum) {
+						return math.NaN()
+					}
+					if sum > maxsum {
+						maxsum = sum
+					}
+				}
+				return maxsum
+			} else {
+				for i := 1; i < m; i++ {
+					var sum float64
+					if i < minmn {
+						sum = 1
+					}
+					for j := 0; j < min(i, n); j++ {
+						sum += math.Abs(a[i*lda+j])
+					}
+					if math.IsNaN(sum) {
+						return math.NaN()
+					}
+					if sum > maxsum {
+						maxsum = sum
+					}
+				}
+				return maxsum
+			}
+		} else {
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					var sum float64
+					for j := i; j < n; j++ {
+						sum += math.Abs(a[i*lda+j])
+					}
+					if math.IsNaN(sum) {
+						return sum
+					}
+					if sum > maxsum {
+						maxsum = sum
+					}
+				}
+				return maxsum
+			} else {
+				for i := 0; i < m; i++ {
+					var sum float64
+					for j := 0; j <= min(i, n-1); j++ {
+						sum += math.Abs(a[i*lda+j])
+					}
+					if math.IsNaN(sum) {
+						return sum
+					}
+					if sum > maxsum {
+						maxsum = sum
+					}
+				}
+				return maxsum
+			}
+		}
+	case lapack.Frobenius:
+		var nrm float64
+		if diag == blas.Unit {
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					for j := i + 1; j < n; j++ {
+						tmp := a[i*lda+j]
+						nrm += tmp * tmp
+					}
+				}
+			} else {
+				for i := 1; i < m; i++ {
+					for j := 0; j < min(i, n); j++ {
+						tmp := a[i*lda+j]
+						nrm += tmp * tmp
+					}
+				}
+			}
+			nrm += float64(minmn)
+		} else {
+			if uplo == blas.Upper {
+				for i := 0; i < m; i++ {
+					for j := i; j < n; j++ {
+						tmp := math.Abs(a[i*lda+j])
+						nrm += tmp * tmp
+					}
+				}
+			} else {
+				for i := 0; i < m; i++ {
+					for j := 0; j <= min(i, n-1); j++ {
+						tmp := math.Abs(a[i*lda+j])
+						nrm += tmp * tmp
+					}
+				}
+			}
+		}
+		return math.Sqrt(nrm)
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlanv2.go
@ -0,0 +1,132 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlanv2 computes the Schur factorization of a real 2×2 matrix:
+//  [ a b ] = [ cs -sn ] * [ aa bb ] * [ cs sn ]
+//  [ c d ]   [ sn  cs ]   [ cc dd ] * [-sn cs ]
+// If cc is zero, aa and dd are real eigenvalues of the matrix. Otherwise it
+// holds that aa = dd and bb*cc < 0, and aa ± sqrt(bb*cc) are complex conjugate
+// eigenvalues. The real and imaginary parts of the eigenvalues are returned in
+// (rt1r,rt1i) and (rt2r,rt2i).
+func (impl Implementation) Dlanv2(a, b, c, d float64) (aa, bb, cc, dd float64, rt1r, rt1i, rt2r, rt2i float64, cs, sn float64) {
+	switch {
+	case c == 0: // Matrix is already upper triangular.
+		aa = a
+		bb = b
+		cc = 0
+		dd = d
+		cs = 1
+		sn = 0
+	case b == 0: // Matrix is lower triangular, swap rows and columns.
+		aa = d
+		bb = -c
+		cc = 0
+		dd = a
+		cs = 0
+		sn = 1
+	case a == d && math.Signbit(b) != math.Signbit(c): // Matrix is already in the standard Schur form.
+		aa = a
+		bb = b
+		cc = c
+		dd = d
+		cs = 1
+		sn = 0
+	default:
+		temp := a - d
+		p := temp / 2
+		bcmax := math.Max(math.Abs(b), math.Abs(c))
+		bcmis := math.Min(math.Abs(b), math.Abs(c))
+		if b*c < 0 {
+			bcmis *= -1
+		}
+		scale := math.Max(math.Abs(p), bcmax)
+		z := p/scale*p + bcmax/scale*bcmis
+		eps := dlamchP
+
+		if z >= 4*eps {
+			// Real eigenvalues. Compute aa and dd.
+			if p > 0 {
+				z = p + math.Sqrt(scale)*math.Sqrt(z)
+			} else {
+				z = p - math.Sqrt(scale)*math.Sqrt(z)
+			}
+			aa = d + z
+			dd = d - bcmax/z*bcmis
+			// Compute bb and the rotation matrix.
+			tau := impl.Dlapy2(c, z)
+			cs = z / tau
+			sn = c / tau
+			bb = b - c
+			cc = 0
+		} else {
+			// Complex eigenvalues, or real (almost) equal eigenvalues.
+			// Make diagonal elements equal.
+			sigma := b + c
+			tau := impl.Dlapy2(sigma, temp)
+			cs = math.Sqrt((1 + math.Abs(sigma)/tau) / 2)
+			sn = -p / (tau * cs)
+			if sigma < 0 {
+				sn *= -1
+			}
+			// Compute [ aa bb ] = [ a b ] [ cs -sn ]
+			//         [ cc dd ]   [ c d ] [ sn  cs ]
+			aa = a*cs + b*sn
+			bb = -a*sn + b*cs
+			cc = c*cs + d*sn
+			dd = -c*sn + d*cs
+			// Compute [ a b ] = [ cs sn ] [ aa bb ]
+			//         [ c d ]   [-sn cs ] [ cc dd ]
+			a = aa*cs + cc*sn
+			b = bb*cs + dd*sn
+			c = -aa*sn + cc*cs
+			d = -bb*sn + dd*cs
+
+			temp = (a + d) / 2
+			aa = temp
+			bb = b
+			cc = c
+			dd = temp
+
+			if cc != 0 {
+				if bb != 0 {
+					if math.Signbit(bb) == math.Signbit(cc) {
+						// Real eigenvalues, reduce to
+						// upper triangular form.
+						sab := math.Sqrt(math.Abs(bb))
+						sac := math.Sqrt(math.Abs(cc))
+						p = sab * sac
+						if cc < 0 {
+							p *= -1
+						}
+						tau = 1 / math.Sqrt(math.Abs(bb+cc))
+						aa = temp + p
+						bb = bb - cc
+						cc = 0
+						dd = temp - p
+						cs1 := sab * tau
+						sn1 := sac * tau
+						cs, sn = cs*cs1-sn*sn1, cs*sn1+sn+cs1
+					}
+				} else {
+					bb = -cc
+					cc = 0
+					cs, sn = -sn, cs
+				}
+			}
+		}
+	}
+
+	// Store eigenvalues in (rt1r,rt1i) and (rt2r,rt2i).
+	rt1r = aa
+	rt2r = dd
+	if cc != 0 {
+		rt1i = math.Sqrt(math.Abs(bb)) * math.Sqrt(math.Abs(cc))
+		rt2i = -rt1i
+	}
+	return
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapll.go
@ -0,0 +1,55 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas/blas64"
+
+// Dlapll returns the smallest singular value of the n×2 matrix A = [ x y ].
+// The function first computes the QR factorization of A = Q*R, and then computes
+// the SVD of the 2-by-2 upper triangular matrix r.
+//
+// The contents of x and y are overwritten during the call.
+//
+// Dlapll is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlapll(n int, x []float64, incX int, y []float64, incY int) float64 {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case incX <= 0:
+		panic(badIncX)
+	case incY <= 0:
+		panic(badIncY)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(x) < 1+(n-1)*incX:
+		panic(shortX)
+	case len(y) < 1+(n-1)*incY:
+		panic(shortY)
+	}
+
+	// Quick return if possible.
+	if n == 1 {
+		return 0
+	}
+
+	// Compute the QR factorization of the N-by-2 matrix [ X Y ].
+	a00, tau := impl.Dlarfg(n, x[0], x[incX:], incX)
+	x[0] = 1
+
+	bi := blas64.Implementation()
+	c := -tau * bi.Ddot(n, x, incX, y, incY)
+	bi.Daxpy(n, c, x, incX, y, incY)
+	a11, _ := impl.Dlarfg(n-1, y[incY], y[2*incY:], incY)
+
+	// Compute the SVD of 2-by-2 upper triangular matrix.
+	ssmin, _ := impl.Dlas2(a00, y[0], a11)
+	return ssmin
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapmt.go
@ -0,0 +1,89 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas/blas64"
+
+// Dlapmt rearranges the columns of the m×n matrix X as specified by the
+// permutation k_0, k_1, ..., k_n-1 of the integers 0, ..., n-1.
+//
+// If forward is true a forward permutation is performed:
+//
+//  X[0:m, k[j]] is moved to X[0:m, j] for j = 0, 1, ..., n-1.
+//
+// otherwise a backward permutation is performed:
+//
+//  X[0:m, j] is moved to X[0:m, k[j]] for j = 0, 1, ..., n-1.
+//
+// k must have length n, otherwise Dlapmt will panic. k is zero-indexed.
+func (impl Implementation) Dlapmt(forward bool, m, n int, x []float64, ldx int, k []int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case ldx < max(1, n):
+		panic(badLdX)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return
+	}
+
+	switch {
+	case len(x) < (m-1)*ldx+n:
+		panic(shortX)
+	case len(k) != n:
+		panic(badLenK)
+	}
+
+	// Quick return if possible.
+	if n == 1 {
+		return
+	}
+
+	for i, v := range k {
+		v++
+		k[i] = -v
+	}
+
+	bi := blas64.Implementation()
+
+	if forward {
+		for j, v := range k {
+			if v >= 0 {
+				continue
+			}
+			k[j] = -v
+			i := -v - 1
+			for k[i] < 0 {
+				bi.Dswap(m, x[j:], ldx, x[i:], ldx)
+
+				k[i] = -k[i]
+				j = i
+				i = k[i] - 1
+			}
+		}
+	} else {
+		for i, v := range k {
+			if v >= 0 {
+				continue
+			}
+			k[i] = -v
+			j := -v - 1
+			for j != i {
+				bi.Dswap(m, x[j:], ldx, x[i:], ldx)
+
+				k[j] = -k[j]
+				j = k[j] - 1
+			}
+		}
+	}
+
+	for i := range k {
+		k[i]--
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlapy2.go
@ -0,0 +1,14 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlapy2 is the LAPACK version of math.Hypot.
+//
+// Dlapy2 is an internal routine. It is exported for testing purposes.
+func (Implementation) Dlapy2(x, y float64) float64 {
+	return math.Hypot(x, y)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqp2.go
@ -0,0 +1,127 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlaqp2 computes a QR factorization with column pivoting of the block A[offset:m, 0:n]
+// of the m×n matrix A. The block A[0:offset, 0:n] is accordingly pivoted, but not factorized.
+//
+// On exit, the upper triangle of block A[offset:m, 0:n] is the triangular factor obtained.
+// The elements in block A[offset:m, 0:n] below the diagonal, together with tau, represent
+// the orthogonal matrix Q as a product of elementary reflectors.
+//
+// offset is number of rows of the matrix A that must be pivoted but not factorized.
+// offset must not be negative otherwise Dlaqp2 will panic.
+//
+// On exit, jpvt holds the permutation that was applied; the jth column of A*P was the
+// jpvt[j] column of A. jpvt must have length n, otherwise Dlaqp2 will panic.
+//
+// On exit tau holds the scalar factors of the elementary reflectors. It must have length
+// at least min(m-offset, n) otherwise Dlaqp2 will panic.
+//
+// vn1 and vn2 hold the partial and complete column norms respectively. They must have length n,
+// otherwise Dlaqp2 will panic.
+//
+// work must have length n, otherwise Dlaqp2 will panic.
+//
+// Dlaqp2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaqp2(m, n, offset int, a []float64, lda int, jpvt []int, tau, vn1, vn2, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case offset < 0:
+		panic(offsetLT0)
+	case offset > m:
+		panic(offsetGTM)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return
+	}
+
+	mn := min(m-offset, n)
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(jpvt) != n:
+		panic(badLenJpvt)
+	case len(tau) < mn:
+		panic(shortTau)
+	case len(vn1) < n:
+		panic(shortVn1)
+	case len(vn2) < n:
+		panic(shortVn2)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	tol3z := math.Sqrt(dlamchE)
+
+	bi := blas64.Implementation()
+
+	// Compute factorization.
+	for i := 0; i < mn; i++ {
+		offpi := offset + i
+
+		// Determine ith pivot column and swap if necessary.
+		p := i + bi.Idamax(n-i, vn1[i:], 1)
+		if p != i {
+			bi.Dswap(m, a[p:], lda, a[i:], lda)
+			jpvt[p], jpvt[i] = jpvt[i], jpvt[p]
+			vn1[p] = vn1[i]
+			vn2[p] = vn2[i]
+		}
+
+		// Generate elementary reflector H_i.
+		if offpi < m-1 {
+			a[offpi*lda+i], tau[i] = impl.Dlarfg(m-offpi, a[offpi*lda+i], a[(offpi+1)*lda+i:], lda)
+		} else {
+			tau[i] = 0
+		}
+
+		if i < n-1 {
+			// Apply H_i^T to A[offset+i:m, i:n] from the left.
+			aii := a[offpi*lda+i]
+			a[offpi*lda+i] = 1
+			impl.Dlarf(blas.Left, m-offpi, n-i-1, a[offpi*lda+i:], lda, tau[i], a[offpi*lda+i+1:], lda, work)
+			a[offpi*lda+i] = aii
+		}
+
+		// Update partial column norms.
+		for j := i + 1; j < n; j++ {
+			if vn1[j] == 0 {
+				continue
+			}
+
+			// The following marked lines follow from the
+			// analysis in Lapack Working Note 176.
+			r := math.Abs(a[offpi*lda+j]) / vn1[j] // *
+			temp := math.Max(0, 1-r*r)             // *
+			r = vn1[j] / vn2[j]                    // *
+			temp2 := temp * r * r                  // *
+			if temp2 < tol3z {
+				var v float64
+				if offpi < m-1 {
+					v = bi.Dnrm2(m-offpi-1, a[(offpi+1)*lda+j:], lda)
+				}
+				vn1[j] = v
+				vn2[j] = v
+			} else {
+				vn1[j] *= math.Sqrt(temp) // *
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqps.go
@ -0,0 +1,244 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlaqps computes a step of QR factorization with column pivoting
+// of an m×n matrix A by using Blas-3. It tries to factorize nb
+// columns from A starting from the row offset, and updates all
+// of the matrix with Dgemm.
+//
+// In some cases, due to catastrophic cancellations, it cannot
+// factorize nb columns. Hence, the actual number of factorized
+// columns is returned in kb.
+//
+// Dlaqps computes a QR factorization with column pivoting of the
+// block A[offset:m, 0:nb] of the m×n matrix A. The block
+// A[0:offset, 0:n] is accordingly pivoted, but not factorized.
+//
+// On exit, the upper triangle of block A[offset:m, 0:kb] is the
+// triangular factor obtained. The elements in block A[offset:m, 0:n]
+// below the diagonal, together with tau, represent the orthogonal
+// matrix Q as a product of elementary reflectors.
+//
+// offset is number of rows of the matrix A that must be pivoted but
+// not factorized. offset must not be negative otherwise Dlaqps will panic.
+//
+// On exit, jpvt holds the permutation that was applied; the jth column
+// of A*P was the jpvt[j] column of A. jpvt must have length n,
+// otherwise Dlapqs will panic.
+//
+// On exit tau holds the scalar factors of the elementary reflectors.
+// It must have length nb, otherwise Dlapqs will panic.
+//
+// vn1 and vn2 hold the partial and complete column norms respectively.
+// They must have length n, otherwise Dlapqs will panic.
+//
+// auxv must have length nb, otherwise Dlaqps will panic.
+//
+// f and ldf represent an n×nb matrix F that is overwritten during the
+// call.
+//
+// Dlaqps is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case offset < 0:
+		panic(offsetLT0)
+	case offset > m:
+		panic(offsetGTM)
+	case nb < 0:
+		panic(nbLT0)
+	case nb > n:
+		panic(nbGTN)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldf < max(1, nb):
+		panic(badLdF)
+	}
+
+	if m == 0 || n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(jpvt) != n:
+		panic(badLenJpvt)
+	case len(vn1) < n:
+		panic(shortVn1)
+	case len(vn2) < n:
+		panic(shortVn2)
+	}
+
+	if nb == 0 {
+		return 0
+	}
+
+	switch {
+	case len(tau) < nb:
+		panic(shortTau)
+	case len(auxv) < nb:
+		panic(shortAuxv)
+	case len(f) < (n-1)*ldf+nb:
+		panic(shortF)
+	}
+
+	if offset == m {
+		return 0
+	}
+
+	lastrk := min(m, n+offset)
+	lsticc := -1
+	tol3z := math.Sqrt(dlamchE)
+
+	bi := blas64.Implementation()
+
+	var k, rk int
+	for ; k < nb && lsticc == -1; k++ {
+		rk = offset + k
+
+		// Determine kth pivot column and swap if necessary.
+		p := k + bi.Idamax(n-k, vn1[k:], 1)
+		if p != k {
+			bi.Dswap(m, a[p:], lda, a[k:], lda)
+			bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1)
+			jpvt[p], jpvt[k] = jpvt[k], jpvt[p]
+			vn1[p] = vn1[k]
+			vn2[p] = vn2[k]
+		}
+
+		// Apply previous Householder reflectors to column K:
+		//
+		// A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]^T.
+		if k > 0 {
+			bi.Dgemv(blas.NoTrans, m-rk, k, -1,
+				a[rk*lda:], lda,
+				f[k*ldf:], 1,
+				1,
+				a[rk*lda+k:], lda)
+		}
+
+		// Generate elementary reflector H_k.
+		if rk < m-1 {
+			a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda)
+		} else {
+			tau[k] = 0
+		}
+
+		akk := a[rk*lda+k]
+		a[rk*lda+k] = 1
+
+		// Compute kth column of F:
+		//
+		// Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]^T*A[rk:m, k].
+		if k < n-1 {
+			bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k],
+				a[rk*lda+k+1:], lda,
+				a[rk*lda+k:], lda,
+				0,
+				f[(k+1)*ldf+k:], ldf)
+		}
+
+		// Padding F[0:k, k] with zeros.
+		for j := 0; j < k; j++ {
+			f[j*ldf+k] = 0
+		}
+
+		// Incremental updating of F:
+		//
+		// F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]^T*A[rk:m,k].
+		if k > 0 {
+			bi.Dgemv(blas.Trans, m-rk, k, -tau[k],
+				a[rk*lda:], lda,
+				a[rk*lda+k:], lda,
+				0,
+				auxv, 1)
+			bi.Dgemv(blas.NoTrans, n, k, 1,
+				f, ldf,
+				auxv, 1,
+				1,
+				f[k:], ldf)
+		}
+
+		// Update the current row of A:
+		//
+		// A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]^T.
+		if k < n-1 {
+			bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1,
+				f[(k+1)*ldf:], ldf,
+				a[rk*lda:], 1,
+				1,
+				a[rk*lda+k+1:], 1)
+		}
+
+		// Update partial column norms.
+		if rk < lastrk-1 {
+			for j := k + 1; j < n; j++ {
+				if vn1[j] == 0 {
+					continue
+				}
+
+				// The following marked lines follow from the
+				// analysis in Lapack Working Note 176.
+				r := math.Abs(a[rk*lda+j]) / vn1[j] // *
+				temp := math.Max(0, 1-r*r)          // *
+				r = vn1[j] / vn2[j]                 // *
+				temp2 := temp * r * r               // *
+				if temp2 < tol3z {
+					// vn2 is used here as a collection of
+					// indices into vn2 and also a collection
+					// of column norms.
+					vn2[j] = float64(lsticc)
+					lsticc = j
+				} else {
+					vn1[j] *= math.Sqrt(temp) // *
+				}
+			}
+		}
+
+		a[rk*lda+k] = akk
+	}
+	kb = k
+	rk = offset + kb
+
+	// Apply the block reflector to the rest of the matrix:
+	//
+	// A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]^T.
+	if kb < min(n, m-offset) {
+		bi.Dgemm(blas.NoTrans, blas.Trans,
+			m-rk, n-kb, kb, -1,
+			a[rk*lda:], lda,
+			f[kb*ldf:], ldf,
+			1,
+			a[rk*lda+kb:], lda)
+	}
+
+	// Recomputation of difficult columns.
+	for lsticc >= 0 {
+		itemp := int(vn2[lsticc])
+
+		// NOTE: The computation of vn1[lsticc] relies on the fact that
+		// Dnrm2 does not fail on vectors with norm below the value of
+		// sqrt(dlamchS)
+		v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda)
+		vn1[lsticc] = v
+		vn2[lsticc] = v
+
+		lsticc = itemp
+	}
+
+	return kb
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr04.go
@ -0,0 +1,478 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+)
+
+// Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
+// H, and optionally the matrices T and Z from the Schur decomposition
+//  H = Z T Z^T
+// where T is an upper quasi-triangular matrix (the Schur form), and Z is the
+// orthogonal matrix of Schur vectors.
+//
+// wantt indicates whether the full Schur form T is required. If wantt is false,
+// then only enough of H will be updated to preserve the eigenvalues.
+//
+// wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
+// is true, the orthogonal similarity transformation will be accumulated into
+// Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
+//
+// ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
+//  0 <= ilo <= ihi < n,     if n > 0,
+//  ilo == 0 and ihi == -1,  if n == 0,
+// and the block must be isolated, that is,
+//  ilo == 0   or H[ilo,ilo-1] == 0,
+//  ihi == n-1 or H[ihi+1,ihi] == 0,
+// otherwise Dlaqr04 will panic.
+//
+// wr and wi must have length ihi+1.
+//
+// iloz and ihiz specify the rows of Z to which transformations will be applied
+// if wantz is true. It must hold that
+//  0 <= iloz <= ilo,  and  ihi <= ihiz < n,
+// otherwise Dlaqr04 will panic.
+//
+// work must have length at least lwork and lwork must be
+//  lwork >= 1,  if n <= 11,
+//  lwork >= n,  if n > 11,
+// otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
+// optimal performance. On return, work[0] will contain the optimal value of
+// lwork.
+//
+// If lwork is -1, instead of performing Dlaqr04, the function only estimates the
+// optimal workspace size and stores it into work[0]. Neither h nor z are
+// accessed.
+//
+// recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
+// as DLAQR0, for recur == 0 it behaves as DLAQR4.
+//
+// unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
+//
+// If unconverged is zero and wantt is true, H will contain on return the upper
+// quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
+// (corresponding to complex conjugate pairs of eigenvalues) will be returned in
+// standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
+//
+// If unconverged is zero and if wantt is false, the contents of h on return is
+// unspecified.
+//
+// If unconverged is zero, all the eigenvalues have been computed and their real
+// and imaginary parts will be stored on return in wr[ilo:ihi+1] and
+// wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
+// conjugate pair, they are stored in consecutive elements of wr and wi, say the
+// i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
+// eigenvalues are stored in the same order as on the diagonal of the Schur form
+// returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
+// block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
+//
+// If unconverged is positive, some eigenvalues have not converged, and
+// wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
+// eigenvalues which have been successfully computed. Failures are rare.
+//
+// If unconverged is positive and wantt is true, then on return
+//  (initial H)*U = U*(final H),   (*)
+// where U is an orthogonal matrix. The final H is upper Hessenberg and
+// H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
+//
+// If unconverged is positive and wantt is false, on return the remaining
+// unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
+// H[ilo:unconverged,ilo:unconverged].
+//
+// If unconverged is positive and wantz is true, then on return
+//  (final Z) = (initial Z)*U,
+// where U is the orthogonal matrix in (*) regardless of the value of wantt.
+//
+// References:
+//  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
+//      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
+//      Anal. Appl. 23(4) (2002), pp. 929—947
+//      URL: http://dx.doi.org/10.1137/S0895479801384573
+//  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
+//      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
+//      URL: http://dx.doi.org/10.1137/S0895479801384585
+//
+// Dlaqr04 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
+	const (
+		// Matrices of order ntiny or smaller must be processed by
+		// Dlahqr because of insufficient subdiagonal scratch space.
+		// This is a hard limit.
+		ntiny = 11
+		// Exceptional deflation windows: try to cure rare slow
+		// convergence by varying the size of the deflation window after
+		// kexnw iterations.
+		kexnw = 5
+		// Exceptional shifts: try to cure rare slow convergence with
+		// ad-hoc exceptional shifts every kexsh iterations.
+		kexsh = 6
+
+		// See https://github.com/gonum/lapack/pull/151#discussion_r68162802
+		// and the surrounding discussion for an explanation where these
+		// constants come from.
+		// TODO(vladimir-ch): Similar constants for exceptional shifts
+		// are used also in dlahqr.go. The first constant is different
+		// there, it is equal to 3. Why? And does it matter?
+		wilk1 = 0.75
+		wilk2 = -0.4375
+	)
+
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(0, n-1) < ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case ldh < max(1, n):
+		panic(badLdH)
+	case wantz && (iloz < 0 || ilo < iloz):
+		panic(badIloz)
+	case wantz && (ihiz < ihi || n <= ihiz):
+		panic(badIhiz)
+	case ldz < 1, wantz && ldz < n:
+		panic(badLdZ)
+	case lwork < 1 && lwork != -1:
+		panic(badLWork)
+	// TODO(vladimir-ch): Enable if and when we figure out what the minimum
+	// necessary lwork value is. Dlaqr04 says that the minimum is n which
+	// clashes with Dlaqr23's opinion about optimal work when nw <= 2
+	// (independent of n).
+	// case lwork < n && n > ntiny && lwork != -1:
+	// 	panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	case recur < 0:
+		panic(recurLT0)
+	}
+
+	// Quick return.
+	if n == 0 {
+		work[0] = 1
+		return 0
+	}
+
+	if lwork != -1 {
+		switch {
+		case len(h) < (n-1)*ldh+n:
+			panic(shortH)
+		case len(wr) != ihi+1:
+			panic(badLenWr)
+		case len(wi) != ihi+1:
+			panic(badLenWi)
+		case wantz && len(z) < (n-1)*ldz+n:
+			panic(shortZ)
+		case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
+			panic(notIsolated)
+		case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
+			panic(notIsolated)
+		}
+	}
+
+	if n <= ntiny {
+		// Tiny matrices must use Dlahqr.
+		if lwork == -1 {
+			work[0] = 1
+			return 0
+		}
+		return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
+	}
+
+	// Use small bulge multi-shift QR with aggressive early deflation on
+	// larger-than-tiny matrices.
+	var jbcmpz string
+	if wantt {
+		jbcmpz = "S"
+	} else {
+		jbcmpz = "E"
+	}
+	if wantz {
+		jbcmpz += "V"
+	} else {
+		jbcmpz += "N"
+	}
+
+	var fname string
+	if recur > 0 {
+		fname = "DLAQR0"
+	} else {
+		fname = "DLAQR4"
+	}
+	// nwr is the recommended deflation window size. n is greater than 11,
+	// so there is enough subdiagonal workspace for nwr >= 2 as required.
+	// (In fact, there is enough subdiagonal space for nwr >= 3.)
+	// TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
+	// use it?
+	nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
+	nwr = max(2, nwr)
+	nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
+
+	// nsr is the recommended number of simultaneous shifts. n is greater
+	// than 11, so there is enough subdiagonal workspace for nsr to be even
+	// and greater than or equal to two as required.
+	nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
+	nsr = min(nsr, min((n+6)/9, ihi-ilo))
+	nsr = max(2, nsr&^1)
+
+	// Workspace query call to Dlaqr23.
+	impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz,
+		wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur)
+	// Optimal workspace is max(Dlaqr5, Dlaqr23).
+	lwkopt := max(3*nsr/2, int(work[0]))
+	// Quick return in case of workspace query.
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return 0
+	}
+
+	// Dlahqr/Dlaqr04 crossover point.
+	nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
+	nmin = max(ntiny, nmin)
+
+	// Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
+	nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
+	nibble = max(0, nibble)
+
+	// Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
+	kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
+	kacc22 = max(0, min(kacc22, 2))
+
+	// nwmax is the largest possible deflation window for which there is
+	// sufficient workspace.
+	nwmax := min((n-1)/3, lwork/2)
+	nw := nwmax // Start with maximum deflation window size.
+
+	// nsmax is the largest number of simultaneous shifts for which there is
+	// sufficient workspace.
+	nsmax := min((n+6)/9, 2*lwork/3) &^ 1
+
+	ndfl := 1 // Number of iterations since last deflation.
+	ndec := 0 // Deflation window size decrement.
+
+	// Main loop.
+	var (
+		itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
+		it    = 0
+	)
+	for kbot := ihi; kbot >= ilo; {
+		if it == itmax {
+			unconverged = kbot + 1
+			break
+		}
+		it++
+
+		// Locate active block.
+		ktop := ilo
+		for k := kbot; k >= ilo+1; k-- {
+			if h[k*ldh+k-1] == 0 {
+				ktop = k
+				break
+			}
+		}
+
+		// Select deflation window size nw.
+		//
+		// Typical Case:
+		//  If possible and advisable, nibble the entire active block.
+		//  If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
+		//  depending upon which has the smaller corresponding
+		//  subdiagonal entry (a heuristic).
+		//
+		// Exceptional Case:
+		//  If there have been no deflations in kexnw or more
+		//  iterations, then vary the deflation window size. At first,
+		//  because larger windows are, in general, more powerful than
+		//  smaller ones, rapidly increase the window to the maximum
+		//  possible. Then, gradually reduce the window size.
+		nh := kbot - ktop + 1
+		nwupbd := min(nh, nwmax)
+		if ndfl < kexnw {
+			nw = min(nwupbd, nwr)
+		} else {
+			nw = min(nwupbd, 2*nw)
+		}
+		if nw < nwmax {
+			if nw >= nh-1 {
+				nw = nh
+			} else {
+				kwtop := kbot - nw + 1
+				if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
+					nw++
+				}
+			}
+		}
+		if ndfl < kexnw {
+			ndec = -1
+		} else if ndec >= 0 || nw >= nwupbd {
+			ndec++
+			if nw-ndec < 2 {
+				ndec = 0
+			}
+			nw -= ndec
+		}
+
+		// Split workspace under the subdiagonal of H into:
+		//  - an nw×nw work array V in the lower left-hand corner,
+		//  - an nw×nhv horizontal work array along the bottom edge (nhv
+		//    must be at least nw but more is better),
+		//  - an nve×nw vertical work array along the left-hand-edge
+		//    (nhv can be any positive integer but more is better).
+		kv := n - nw
+		kt := nw
+		kwv := nw + 1
+		nhv := n - kwv - kt
+		// Aggressive early deflation.
+		ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
+			h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
+			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
+
+		// Adjust kbot accounting for new deflations.
+		kbot -= ld
+		// ks points to the shifts.
+		ks := kbot - ls + 1
+
+		// Skip an expensive QR sweep if there is a (partly heuristic)
+		// reason to expect that many eigenvalues will deflate without
+		// it. Here, the QR sweep is skipped if many eigenvalues have
+		// just been deflated or if the remaining active block is small.
+		if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
+			// ld is positive, note progress.
+			ndfl = 1
+			continue
+		}
+
+		// ns is the nominal number of simultaneous shifts. This may be
+		// lowered (slightly) if Dlaqr23 did not provide that many
+		// shifts.
+		ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
+
+		// If there have been no deflations in a multiple of kexsh
+		// iterations, then try exceptional shifts. Otherwise use shifts
+		// provided by Dlaqr23 above or from the eigenvalues of a
+		// trailing principal submatrix.
+		if ndfl%kexsh == 0 {
+			ks = kbot - ns + 1
+			for i := kbot; i > max(ks, ktop+1); i -= 2 {
+				ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
+				aa := wilk1*ss + h[i*ldh+i]
+				_, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
+					impl.Dlanv2(aa, ss, wilk2*ss, aa)
+			}
+			if ks == ktop {
+				wr[ks+1] = h[(ks+1)*ldh+ks+1]
+				wi[ks+1] = 0
+				wr[ks] = wr[ks+1]
+				wi[ks] = wi[ks+1]
+			}
+		} else {
+			// If we got ns/2 or fewer shifts, use Dlahqr or recur
+			// into Dlaqr04 on a trailing principal submatrix to get
+			// more. Since ns <= nsmax <=(n+6)/9, there is enough
+			// space below the subdiagonal to fit an ns×ns scratch
+			// array.
+			if kbot-ks+1 <= ns/2 {
+				ks = kbot - ns + 1
+				kt = n - ns
+				impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
+				if ns > nmin && recur > 0 {
+					ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
+						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
+				} else {
+					ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
+						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1)
+				}
+				// In case of a rare QR failure use eigenvalues
+				// of the trailing 2×2 principal submatrix.
+				if ks >= kbot {
+					aa := h[(kbot-1)*ldh+kbot-1]
+					bb := h[(kbot-1)*ldh+kbot]
+					cc := h[kbot*ldh+kbot-1]
+					dd := h[kbot*ldh+kbot]
+					_, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
+						impl.Dlanv2(aa, bb, cc, dd)
+					ks = kbot - 1
+				}
+			}
+
+			if kbot-ks+1 > ns {
+				// Sorting the shifts helps a little. Bubble
+				// sort keeps complex conjugate pairs together.
+				sorted := false
+				for k := kbot; k > ks; k-- {
+					if sorted {
+						break
+					}
+					sorted = true
+					for i := ks; i < k; i++ {
+						if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
+							continue
+						}
+						sorted = false
+						wr[i], wr[i+1] = wr[i+1], wr[i]
+						wi[i], wi[i+1] = wi[i+1], wi[i]
+					}
+				}
+			}
+
+			// Shuffle shifts into pairs of real shifts and pairs of
+			// complex conjugate shifts using the fact that complex
+			// conjugate shifts are already adjacent to one another.
+			// TODO(vladimir-ch): The shuffling here could probably
+			// be removed but I'm not sure right now and it's safer
+			// to leave it.
+			for i := kbot; i > ks+1; i -= 2 {
+				if wi[i] == -wi[i-1] {
+					continue
+				}
+				wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
+				wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
+			}
+		}
+
+		// If there are only two shifts and both are real, then use only one.
+		if kbot-ks+1 == 2 && wi[kbot] == 0 {
+			if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
+				wr[kbot-1] = wr[kbot]
+			} else {
+				wr[kbot] = wr[kbot-1]
+			}
+		}
+
+		// Use up to ns of the smallest magnitude shifts. If there
+		// aren't ns shifts available, then use them all, possibly
+		// dropping one to make the number of shifts even.
+		ns = min(ns, kbot-ks+1) &^ 1
+		ks = kbot - ns + 1
+
+		// Split workspace under the subdiagonal into:
+		// - a kdu×kdu work array U in the lower left-hand-corner,
+		// - a kdu×nhv horizontal work array WH along the bottom edge
+		//   (nhv must be at least kdu but more is better),
+		// - an nhv×kdu vertical work array WV along the left-hand-edge
+		//   (nhv must be at least kdu but more is better).
+		kdu := 3*ns - 3
+		ku := n - kdu
+		kwh := kdu
+		kwv = kdu + 3
+		nhv = n - kwv - kdu
+		// Small-bulge multi-shift QR sweep.
+		impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
+			wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
+			work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
+
+		// Note progress (or the lack of it).
+		if ld > 0 {
+			ndfl = 1
+		} else {
+			ndfl++
+		}
+	}
+
+	work[0] = float64(lwkopt)
+	return unconverged
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr1.go
@ -0,0 +1,59 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlaqr1 sets v to a scalar multiple of the first column of the product
+//  (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
+// where H is a 2×2 or 3×3 matrix, I is the identity matrix of the same size,
+// and i is the imaginary unit. Scaling is done to avoid overflows and most
+// underflows.
+//
+// n is the order of H and must be either 2 or 3. It must hold that either sr1 =
+// sr2 and si1 = -si2, or si1 = si2 = 0. The length of v must be equal to n. If
+// any of these conditions is not met, Dlaqr1 will panic.
+//
+// Dlaqr1 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaqr1(n int, h []float64, ldh int, sr1, si1, sr2, si2 float64, v []float64) {
+	switch {
+	case n != 2 && n != 3:
+		panic("lapack: n must be 2 or 3")
+	case ldh < n:
+		panic(badLdH)
+	case len(h) < (n-1)*ldh+n:
+		panic(shortH)
+	case !((sr1 == sr2 && si1 == -si2) || (si1 == 0 && si2 == 0)):
+		panic(badShifts)
+	case len(v) != n:
+		panic(shortV)
+	}
+
+	if n == 2 {
+		s := math.Abs(h[0]-sr2) + math.Abs(si2) + math.Abs(h[ldh])
+		if s == 0 {
+			v[0] = 0
+			v[1] = 0
+		} else {
+			h21s := h[ldh] / s
+			v[0] = h21s*h[1] + (h[0]-sr1)*((h[0]-sr2)/s) - si1*(si2/s)
+			v[1] = h21s * (h[0] + h[ldh+1] - sr1 - sr2)
+		}
+		return
+	}
+
+	s := math.Abs(h[0]-sr2) + math.Abs(si2) + math.Abs(h[ldh]) + math.Abs(h[2*ldh])
+	if s == 0 {
+		v[0] = 0
+		v[1] = 0
+		v[2] = 0
+	} else {
+		h21s := h[ldh] / s
+		h31s := h[2*ldh] / s
+		v[0] = (h[0]-sr1)*((h[0]-sr2)/s) - si1*(si2/s) + h[1]*h21s + h[2]*h31s
+		v[1] = h21s*(h[0]+h[ldh+1]-sr1-sr2) + h[ldh+2]*h31s
+		v[2] = h31s*(h[0]+h[2*ldh+2]-sr1-sr2) + h21s*h[2*ldh+1]
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr23.go
@ -0,0 +1,415 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
+// Hessenberg matrix to detect and deflate fully converged eigenvalues from a
+// trailing principal submatrix using aggressive early deflation [1].
+//
+// On return, H will be overwritten by a new Hessenberg matrix that is a
+// perturbation of an orthogonal similarity transformation of H. It is hoped
+// that on output H will have many zero subdiagonal entries.
+//
+// If wantt is true, the matrix H will be fully updated so that the
+// quasi-triangular Schur factor can be computed. If wantt is false, then only
+// enough of H will be updated to preserve the eigenvalues.
+//
+// If wantz is true, the orthogonal similarity transformation will be
+// accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
+//
+// ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
+// of H. It must hold that
+//  0 <= ilo <= ihi < n,     if n > 0,
+//  ilo == 0 and ihi == -1,  if n == 0,
+// and the block must be isolated, that is, it must hold that
+//  ktop == 0   or H[ktop,ktop-1] == 0,
+//  kbot == n-1 or H[kbot+1,kbot] == 0,
+// otherwise Dlaqr23 will panic.
+//
+// nw is the deflation window size. It must hold that
+//  0 <= nw <= kbot-ktop+1,
+// otherwise Dlaqr23 will panic.
+//
+// iloz and ihiz specify the rows of the n×n matrix Z to which transformations
+// will be applied if wantz is true. It must hold that
+//  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
+// otherwise Dlaqr23 will panic.
+//
+// sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
+//
+// v and ldv represent an nw×nw work matrix.
+// t and ldt represent an nw×nh work matrix, and nh must be at least nw.
+// wv and ldwv represent an nv×nw work matrix.
+//
+// work must have length at least lwork and lwork must be at least max(1,2*nw),
+// otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
+// efficiency. On return, work[0] will contain the optimal value of lwork.
+//
+// If lwork is -1, instead of performing Dlaqr23, the function only estimates the
+// optimal workspace size and stores it into work[0]. Neither h nor z are
+// accessed.
+//
+// recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
+// as DLAQR3, for recur == 0 it behaves as DLAQR2.
+//
+// On return, ns and nd will contain respectively the number of unconverged
+// (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
+// sr and si.
+//
+// On return, the real and imaginary parts of approximate eigenvalues that may
+// be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
+// and si[kbot-nd-ns+1:kbot-nd+1].
+//
+// On return, the real and imaginary parts of converged eigenvalues will be
+// stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
+//
+// References:
+//  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
+//      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
+//      URL: http://dx.doi.org/10.1137/S0895479801384585
+//
+func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case ktop < 0 || max(0, n-1) < ktop:
+		panic(badKtop)
+	case kbot < min(ktop, n-1) || n <= kbot:
+		panic(badKbot)
+	case nw < 0 || kbot-ktop+1+1 < nw:
+		panic(badNw)
+	case ldh < max(1, n):
+		panic(badLdH)
+	case wantz && (iloz < 0 || ktop < iloz):
+		panic(badIloz)
+	case wantz && (ihiz < kbot || n <= ihiz):
+		panic(badIhiz)
+	case ldz < 1, wantz && ldz < n:
+		panic(badLdZ)
+	case ldv < max(1, nw):
+		panic(badLdV)
+	case nh < nw:
+		panic(badNh)
+	case ldt < max(1, nh):
+		panic(badLdT)
+	case nv < 0:
+		panic(nvLT0)
+	case ldwv < max(1, nw):
+		panic(badLdWV)
+	case lwork < max(1, 2*nw) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	case recur < 0:
+		panic(recurLT0)
+	}
+
+	// Quick return for zero window size.
+	if nw == 0 {
+		work[0] = 1
+		return 0, 0
+	}
+
+	// LAPACK code does not enforce the documented behavior
+	//  nw <= kbot-ktop+1
+	// but we do (we panic above).
+	jw := nw
+	lwkopt := max(1, 2*nw)
+	if jw > 2 {
+		// Workspace query call to Dgehrd.
+		impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1)
+		lwk1 := int(work[0])
+		// Workspace query call to Dormhr.
+		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
+		lwk2 := int(work[0])
+		if recur > 0 {
+			// Workspace query call to Dlaqr04.
+			impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1)
+			lwk3 := int(work[0])
+			// Optimal workspace.
+			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
+		} else {
+			// Optimal workspace.
+			lwkopt = jw + max(lwk1, lwk2)
+		}
+	}
+	// Quick return in case of workspace query.
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return 0, 0
+	}
+
+	// Check input slices only if not doing workspace query.
+	switch {
+	case len(h) < (n-1)*ldh+n:
+		panic(shortH)
+	case len(v) < (nw-1)*ldv+nw:
+		panic(shortV)
+	case len(t) < (nw-1)*ldt+nh:
+		panic(shortT)
+	case len(wv) < (nv-1)*ldwv+nw:
+		panic(shortWV)
+	case wantz && len(z) < (n-1)*ldz+n:
+		panic(shortZ)
+	case len(sr) != kbot+1:
+		panic(badLenSr)
+	case len(si) != kbot+1:
+		panic(badLenSi)
+	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
+		panic(notIsolated)
+	case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
+		panic(notIsolated)
+	}
+
+	// Machine constants.
+	ulp := dlamchP
+	smlnum := float64(n) / ulp * dlamchS
+
+	// Setup deflation window.
+	var s float64
+	kwtop := kbot - jw + 1
+	if kwtop != ktop {
+		s = h[kwtop*ldh+kwtop-1]
+	}
+	if kwtop == kbot {
+		// 1×1 deflation window.
+		sr[kwtop] = h[kwtop*ldh+kwtop]
+		si[kwtop] = 0
+		ns = 1
+		nd = 0
+		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
+			ns = 0
+			nd = 1
+			if kwtop > ktop {
+				h[kwtop*ldh+kwtop-1] = 0
+			}
+		}
+		work[0] = 1
+		return ns, nd
+	}
+
+	// Convert to spike-triangular form. In case of a rare QR failure, this
+	// routine continues to do aggressive early deflation using that part of
+	// the deflation window that converged using infqr here and there to
+	// keep track.
+	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
+	bi := blas64.Implementation()
+	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
+	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
+	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
+	var infqr int
+	if recur > 0 && jw > nmin {
+		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
+	} else {
+		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
+	}
+	// Note that ilo == 0 which conveniently coincides with the success
+	// value of infqr, that is, infqr as an index always points to the first
+	// converged eigenvalue.
+
+	// Dtrexc needs a clean margin near the diagonal.
+	for j := 0; j < jw-3; j++ {
+		t[(j+2)*ldt+j] = 0
+		t[(j+3)*ldt+j] = 0
+	}
+	if jw >= 3 {
+		t[(jw-1)*ldt+jw-3] = 0
+	}
+
+	ns = jw
+	ilst := infqr
+	// Deflation detection loop.
+	for ilst < ns {
+		bulge := false
+		if ns >= 2 {
+			bulge = t[(ns-1)*ldt+ns-2] != 0
+		}
+		if !bulge {
+			// Real eigenvalue.
+			abst := math.Abs(t[(ns-1)*ldt+ns-1])
+			if abst == 0 {
+				abst = math.Abs(s)
+			}
+			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
+				// Deflatable.
+				ns--
+			} else {
+				// Undeflatable, move it up out of the way.
+				// Dtrexc can not fail in this case.
+				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
+				ilst++
+			}
+			continue
+		}
+		// Complex conjugate pair.
+		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
+		if abst == 0 {
+			abst = math.Abs(s)
+		}
+		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
+			// Deflatable.
+			ns -= 2
+		} else {
+			// Undeflatable, move them up out of the way.
+			// Dtrexc does the right thing with ilst in case of a
+			// rare exchange failure.
+			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
+			ilst += 2
+		}
+	}
+
+	// Return to Hessenberg form.
+	if ns == 0 {
+		s = 0
+	}
+	if ns < jw {
+		// Sorting diagonal blocks of T improves accuracy for graded
+		// matrices. Bubble sort deals well with exchange failures.
+		sorted := false
+		i := ns
+		for !sorted {
+			sorted = true
+			kend := i - 1
+			i = infqr
+			var k int
+			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
+				k = i + 1
+			} else {
+				k = i + 2
+			}
+			for k <= kend {
+				var evi float64
+				if k == i+1 {
+					evi = math.Abs(t[i*ldt+i])
+				} else {
+					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
+				}
+
+				var evk float64
+				if k == kend || t[(k+1)*ldt+k] == 0 {
+					evk = math.Abs(t[k*ldt+k])
+				} else {
+					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
+				}
+
+				if evi >= evk {
+					i = k
+				} else {
+					sorted = false
+					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
+					if ok {
+						i = ilst
+					} else {
+						i = k
+					}
+				}
+				if i == kend || t[(i+1)*ldt+i] == 0 {
+					k = i + 1
+				} else {
+					k = i + 2
+				}
+			}
+		}
+	}
+
+	// Restore shift/eigenvalue array from T.
+	for i := jw - 1; i >= infqr; {
+		if i == infqr || t[i*ldt+i-1] == 0 {
+			sr[kwtop+i] = t[i*ldt+i]
+			si[kwtop+i] = 0
+			i--
+			continue
+		}
+		aa := t[(i-1)*ldt+i-1]
+		bb := t[(i-1)*ldt+i]
+		cc := t[i*ldt+i-1]
+		dd := t[i*ldt+i]
+		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
+		i -= 2
+	}
+
+	if ns < jw || s == 0 {
+		if ns > 1 && s != 0 {
+			// Reflect spike back into lower triangle.
+			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
+			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
+			work[0] = 1
+			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
+			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
+			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
+			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
+			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
+		}
+
+		// Copy updated reduced window into place.
+		if kwtop > 0 {
+			h[kwtop*ldh+kwtop-1] = s * v[0]
+		}
+		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
+		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
+
+		// Accumulate orthogonal matrix in order to update H and Z, if
+		// requested.
+		if ns > 1 && s != 0 {
+			// work[:ns-1] contains the elementary reflectors stored
+			// by a call to Dgehrd above.
+			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
+				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
+		}
+
+		// Update vertical slab in H.
+		var ltop int
+		if !wantt {
+			ltop = ktop
+		}
+		for krow := ltop; krow < kwtop; krow += nv {
+			kln := min(nv, kwtop-krow)
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
+				1, h[krow*ldh+kwtop:], ldh, v, ldv,
+				0, wv, ldwv)
+			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
+		}
+
+		// Update horizontal slab in H.
+		if wantt {
+			for kcol := kbot + 1; kcol < n; kcol += nh {
+				kln := min(nh, n-kcol)
+				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
+					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
+					0, t, ldt)
+				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
+			}
+		}
+
+		// Update vertical slab in Z.
+		if wantz {
+			for krow := iloz; krow <= ihiz; krow += nv {
+				kln := min(nv, ihiz-krow+1)
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
+					1, z[krow*ldz+kwtop:], ldz, v, ldv,
+					0, wv, ldwv)
+				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
+			}
+		}
+	}
+
+	// The number of deflations.
+	nd = jw - ns
+	// Shifts are converged eigenvalues that could not be deflated.
+	// Subtracting infqr from the spike length takes care of the case of a
+	// rare QR failure while calculating eigenvalues of the deflation
+	// window.
+	ns -= infqr
+	work[0] = float64(lwkopt)
+	return ns, nd
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaqr5.go
@ -0,0 +1,644 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlaqr5 performs a single small-bulge multi-shift QR sweep on an isolated
+// block of a Hessenberg matrix.
+//
+// wantt and wantz determine whether the quasi-triangular Schur factor and the
+// orthogonal Schur factor, respectively, will be computed.
+//
+// kacc22 specifies the computation mode of far-from-diagonal orthogonal
+// updates. Permitted values are:
+//  0: Dlaqr5 will not accumulate reflections and will not use matrix-matrix
+//     multiply to update far-from-diagonal matrix entries.
+//  1: Dlaqr5 will accumulate reflections and use matrix-matrix multiply to
+//     update far-from-diagonal matrix entries.
+//  2: Dlaqr5 will accumulate reflections, use matrix-matrix multiply to update
+//     far-from-diagonal matrix entries, and take advantage of 2×2 block
+//     structure during matrix multiplies.
+// For other values of kacc2 Dlaqr5 will panic.
+//
+// n is the order of the Hessenberg matrix H.
+//
+// ktop and kbot are indices of the first and last row and column of an isolated
+// diagonal block upon which the QR sweep will be applied. It must hold that
+//  ktop == 0,   or 0 < ktop <= n-1 and H[ktop, ktop-1] == 0, and
+//  kbot == n-1, or 0 <= kbot < n-1 and H[kbot+1, kbot] == 0,
+// otherwise Dlaqr5 will panic.
+//
+// nshfts is the number of simultaneous shifts. It must be positive and even,
+// otherwise Dlaqr5 will panic.
+//
+// sr and si contain the real and imaginary parts, respectively, of the shifts
+// of origin that define the multi-shift QR sweep. On return both slices may be
+// reordered by Dlaqr5. Their length must be equal to nshfts, otherwise Dlaqr5
+// will panic.
+//
+// h and ldh represent the Hessenberg matrix H of size n×n. On return
+// multi-shift QR sweep with shifts sr+i*si has been applied to the isolated
+// diagonal block in rows and columns ktop through kbot, inclusive.
+//
+// iloz and ihiz specify the rows of Z to which transformations will be applied
+// if wantz is true. It must hold that 0 <= iloz <= ihiz < n, otherwise Dlaqr5
+// will panic.
+//
+// z and ldz represent the matrix Z of size n×n. If wantz is true, the QR sweep
+// orthogonal similarity transformation is accumulated into
+// z[iloz:ihiz,iloz:ihiz] from the right, otherwise z not referenced.
+//
+// v and ldv represent an auxiliary matrix V of size (nshfts/2)×3. Note that V
+// is transposed with respect to the reference netlib implementation.
+//
+// u and ldu represent an auxiliary matrix of size (3*nshfts-3)×(3*nshfts-3).
+//
+// wh and ldwh represent an auxiliary matrix of size (3*nshfts-3)×nh.
+//
+// wv and ldwv represent an auxiliary matrix of size nv×(3*nshfts-3).
+//
+// Dlaqr5 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaqr5(wantt, wantz bool, kacc22 int, n, ktop, kbot, nshfts int, sr, si []float64, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, v []float64, ldv int, u []float64, ldu int, nv int, wv []float64, ldwv int, nh int, wh []float64, ldwh int) {
+	switch {
+	case kacc22 != 0 && kacc22 != 1 && kacc22 != 2:
+		panic(badKacc22)
+	case n < 0:
+		panic(nLT0)
+	case ktop < 0 || n <= ktop:
+		panic(badKtop)
+	case kbot < 0 || n <= kbot:
+		panic(badKbot)
+
+	case nshfts < 0:
+		panic(nshftsLT0)
+	case nshfts&0x1 != 0:
+		panic(nshftsOdd)
+	case len(sr) != nshfts:
+		panic(badLenSr)
+	case len(si) != nshfts:
+		panic(badLenSi)
+
+	case ldh < max(1, n):
+		panic(badLdH)
+	case len(h) < (n-1)*ldh+n:
+		panic(shortH)
+
+	case wantz && ihiz >= n:
+		panic(badIhiz)
+	case wantz && iloz < 0 || ihiz < iloz:
+		panic(badIloz)
+	case ldz < 1, wantz && ldz < n:
+		panic(badLdZ)
+	case wantz && len(z) < (n-1)*ldz+n:
+		panic(shortZ)
+
+	case ldv < 3:
+		// V is transposed w.r.t. reference lapack.
+		panic(badLdV)
+	case len(v) < (nshfts/2-1)*ldv+3:
+		panic(shortV)
+
+	case ldu < max(1, 3*nshfts-3):
+		panic(badLdU)
+	case len(u) < (3*nshfts-3-1)*ldu+3*nshfts-3:
+		panic(shortU)
+
+	case nv < 0:
+		panic(nvLT0)
+	case ldwv < max(1, 3*nshfts-3):
+		panic(badLdWV)
+	case len(wv) < (nv-1)*ldwv+3*nshfts-3:
+		panic(shortWV)
+
+	case nh < 0:
+		panic(nhLT0)
+	case ldwh < max(1, nh):
+		panic(badLdWH)
+	case len(wh) < (3*nshfts-3-1)*ldwh+nh:
+		panic(shortWH)
+
+	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
+		panic(notIsolated)
+	case kbot < n-1 && h[(kbot+1)*ldh+kbot] != 0:
+		panic(notIsolated)
+	}
+
+	// If there are no shifts, then there is nothing to do.
+	if nshfts < 2 {
+		return
+	}
+	// If the active block is empty or 1×1, then there is nothing to do.
+	if ktop >= kbot {
+		return
+	}
+
+	// Shuffle shifts into pairs of real shifts and pairs of complex
+	// conjugate shifts assuming complex conjugate shifts are already
+	// adjacent to one another.
+	for i := 0; i < nshfts-2; i += 2 {
+		if si[i] == -si[i+1] {
+			continue
+		}
+		sr[i], sr[i+1], sr[i+2] = sr[i+1], sr[i+2], sr[i]
+		si[i], si[i+1], si[i+2] = si[i+1], si[i+2], si[i]
+	}
+
+	// Note: lapack says that nshfts must be even but allows it to be odd
+	// anyway. We panic above if nshfts is not even, so reducing it by one
+	// is unnecessary. The only caller Dlaqr04 uses only even nshfts.
+	//
+	// The original comment and code from lapack-3.6.0/SRC/dlaqr5.f:341:
+	// *     ==== NSHFTS is supposed to be even, but if it is odd,
+	// *     .    then simply reduce it by one.  The shuffle above
+	// *     .    ensures that the dropped shift is real and that
+	// *     .    the remaining shifts are paired. ====
+	// *
+	//      NS = NSHFTS - MOD( NSHFTS, 2 )
+	ns := nshfts
+
+	safmin := dlamchS
+	ulp := dlamchP
+	smlnum := safmin * float64(n) / ulp
+
+	// Use accumulated reflections to update far-from-diagonal entries?
+	accum := kacc22 == 1 || kacc22 == 2
+	// If so, exploit the 2×2 block structure?
+	blk22 := ns > 2 && kacc22 == 2
+
+	// Clear trash.
+	if ktop+2 <= kbot {
+		h[(ktop+2)*ldh+ktop] = 0
+	}
+
+	// nbmps = number of 2-shift bulges in the chain.
+	nbmps := ns / 2
+
+	// kdu = width of slab.
+	kdu := 6*nbmps - 3
+
+	// Create and chase chains of nbmps bulges.
+	for incol := 3*(1-nbmps) + ktop - 1; incol <= kbot-2; incol += 3*nbmps - 2 {
+		ndcol := incol + kdu
+		if accum {
+			impl.Dlaset(blas.All, kdu, kdu, 0, 1, u, ldu)
+		}
+
+		// Near-the-diagonal bulge chase. The following loop performs
+		// the near-the-diagonal part of a small bulge multi-shift QR
+		// sweep. Each 6*nbmps-2 column diagonal chunk extends from
+		// column incol to column ndcol (including both column incol and
+		// column ndcol). The following loop chases a 3*nbmps column
+		// long chain of nbmps bulges 3*nbmps-2 columns to the right.
+		// (incol may be less than ktop and ndcol may be greater than
+		// kbot indicating phantom columns from which to chase bulges
+		// before they are actually introduced or to which to chase
+		// bulges beyond column kbot.)
+		for krcol := incol; krcol <= min(incol+3*nbmps-3, kbot-2); krcol++ {
+			// Bulges number mtop to mbot are active double implicit
+			// shift bulges. There may or may not also be small 2×2
+			// bulge, if there is room. The inactive bulges (if any)
+			// must wait until the active bulges have moved down the
+			// diagonal to make room. The phantom matrix paradigm
+			// described above helps keep track.
+
+			mtop := max(0, ((ktop-1)-krcol+2)/3)
+			mbot := min(nbmps, (kbot-krcol)/3) - 1
+			m22 := mbot + 1
+			bmp22 := (mbot < nbmps-1) && (krcol+3*m22 == kbot-2)
+
+			// Generate reflections to chase the chain right one
+			// column. (The minimum value of k is ktop-1.)
+			for m := mtop; m <= mbot; m++ {
+				k := krcol + 3*m
+				if k == ktop-1 {
+					impl.Dlaqr1(3, h[ktop*ldh+ktop:], ldh,
+						sr[2*m], si[2*m], sr[2*m+1], si[2*m+1],
+						v[m*ldv:m*ldv+3])
+					alpha := v[m*ldv]
+					_, v[m*ldv] = impl.Dlarfg(3, alpha, v[m*ldv+1:m*ldv+3], 1)
+					continue
+				}
+				beta := h[(k+1)*ldh+k]
+				v[m*ldv+1] = h[(k+2)*ldh+k]
+				v[m*ldv+2] = h[(k+3)*ldh+k]
+				beta, v[m*ldv] = impl.Dlarfg(3, beta, v[m*ldv+1:m*ldv+3], 1)
+
+				// A bulge may collapse because of vigilant deflation or
+				// destructive underflow. In the underflow case, try the
+				// two-small-subdiagonals trick to try to reinflate the
+				// bulge.
+				if h[(k+3)*ldh+k] != 0 || h[(k+3)*ldh+k+1] != 0 || h[(k+3)*ldh+k+2] == 0 {
+					// Typical case: not collapsed (yet).
+					h[(k+1)*ldh+k] = beta
+					h[(k+2)*ldh+k] = 0
+					h[(k+3)*ldh+k] = 0
+					continue
+				}
+
+				// Atypical case: collapsed. Attempt to reintroduce
+				// ignoring H[k+1,k] and H[k+2,k]. If the fill
+				// resulting from the new reflector is too large,
+				// then abandon it. Otherwise, use the new one.
+				var vt [3]float64
+				impl.Dlaqr1(3, h[(k+1)*ldh+k+1:], ldh, sr[2*m],
+					si[2*m], sr[2*m+1], si[2*m+1], vt[:])
+				alpha := vt[0]
+				_, vt[0] = impl.Dlarfg(3, alpha, vt[1:3], 1)
+				refsum := vt[0] * (h[(k+1)*ldh+k] + vt[1]*h[(k+2)*ldh+k])
+
+				dsum := math.Abs(h[k*ldh+k]) + math.Abs(h[(k+1)*ldh+k+1]) + math.Abs(h[(k+2)*ldh+k+2])
+				if math.Abs(h[(k+2)*ldh+k]-refsum*vt[1])+math.Abs(refsum*vt[2]) > ulp*dsum {
+					// Starting a new bulge here would create
+					// non-negligible fill. Use the old one with
+					// trepidation.
+					h[(k+1)*ldh+k] = beta
+					h[(k+2)*ldh+k] = 0
+					h[(k+3)*ldh+k] = 0
+					continue
+				} else {
+					// Starting a new bulge here would create
+					// only negligible fill. Replace the old
+					// reflector with the new one.
+					h[(k+1)*ldh+k] -= refsum
+					h[(k+2)*ldh+k] = 0
+					h[(k+3)*ldh+k] = 0
+					v[m*ldv] = vt[0]
+					v[m*ldv+1] = vt[1]
+					v[m*ldv+2] = vt[2]
+				}
+			}
+
+			// Generate a 2×2 reflection, if needed.
+			if bmp22 {
+				k := krcol + 3*m22
+				if k == ktop-1 {
+					impl.Dlaqr1(2, h[(k+1)*ldh+k+1:], ldh,
+						sr[2*m22], si[2*m22], sr[2*m22+1], si[2*m22+1],
+						v[m22*ldv:m22*ldv+2])
+					beta := v[m22*ldv]
+					_, v[m22*ldv] = impl.Dlarfg(2, beta, v[m22*ldv+1:m22*ldv+2], 1)
+				} else {
+					beta := h[(k+1)*ldh+k]
+					v[m22*ldv+1] = h[(k+2)*ldh+k]
+					beta, v[m22*ldv] = impl.Dlarfg(2, beta, v[m22*ldv+1:m22*ldv+2], 1)
+					h[(k+1)*ldh+k] = beta
+					h[(k+2)*ldh+k] = 0
+				}
+			}
+
+			// Multiply H by reflections from the left.
+			var jbot int
+			switch {
+			case accum:
+				jbot = min(ndcol, kbot)
+			case wantt:
+				jbot = n - 1
+			default:
+				jbot = kbot
+			}
+			for j := max(ktop, krcol); j <= jbot; j++ {
+				mend := min(mbot+1, (j-krcol+2)/3) - 1
+				for m := mtop; m <= mend; m++ {
+					k := krcol + 3*m
+					refsum := v[m*ldv] * (h[(k+1)*ldh+j] +
+						v[m*ldv+1]*h[(k+2)*ldh+j] + v[m*ldv+2]*h[(k+3)*ldh+j])
+					h[(k+1)*ldh+j] -= refsum
+					h[(k+2)*ldh+j] -= refsum * v[m*ldv+1]
+					h[(k+3)*ldh+j] -= refsum * v[m*ldv+2]
+				}
+			}
+			if bmp22 {
+				k := krcol + 3*m22
+				for j := max(k+1, ktop); j <= jbot; j++ {
+					refsum := v[m22*ldv] * (h[(k+1)*ldh+j] + v[m22*ldv+1]*h[(k+2)*ldh+j])
+					h[(k+1)*ldh+j] -= refsum
+					h[(k+2)*ldh+j] -= refsum * v[m22*ldv+1]
+				}
+			}
+
+			// Multiply H by reflections from the right. Delay filling in the last row
+			// until the vigilant deflation check is complete.
+			var jtop int
+			switch {
+			case accum:
+				jtop = max(ktop, incol)
+			case wantt:
+				jtop = 0
+			default:
+				jtop = ktop
+			}
+			for m := mtop; m <= mbot; m++ {
+				if v[m*ldv] == 0 {
+					continue
+				}
+				k := krcol + 3*m
+				for j := jtop; j <= min(kbot, k+3); j++ {
+					refsum := v[m*ldv] * (h[j*ldh+k+1] +
+						v[m*ldv+1]*h[j*ldh+k+2] + v[m*ldv+2]*h[j*ldh+k+3])
+					h[j*ldh+k+1] -= refsum
+					h[j*ldh+k+2] -= refsum * v[m*ldv+1]
+					h[j*ldh+k+3] -= refsum * v[m*ldv+2]
+				}
+				if accum {
+					// Accumulate U. (If necessary, update Z later with an
+					// efficient matrix-matrix multiply.)
+					kms := k - incol
+					for j := max(0, ktop-incol-1); j < kdu; j++ {
+						refsum := v[m*ldv] * (u[j*ldu+kms] +
+							v[m*ldv+1]*u[j*ldu+kms+1] + v[m*ldv+2]*u[j*ldu+kms+2])
+						u[j*ldu+kms] -= refsum
+						u[j*ldu+kms+1] -= refsum * v[m*ldv+1]
+						u[j*ldu+kms+2] -= refsum * v[m*ldv+2]
+					}
+				} else if wantz {
+					// U is not accumulated, so update Z now by multiplying by
+					// reflections from the right.
+					for j := iloz; j <= ihiz; j++ {
+						refsum := v[m*ldv] * (z[j*ldz+k+1] +
+							v[m*ldv+1]*z[j*ldz+k+2] + v[m*ldv+2]*z[j*ldz+k+3])
+						z[j*ldz+k+1] -= refsum
+						z[j*ldz+k+2] -= refsum * v[m*ldv+1]
+						z[j*ldz+k+3] -= refsum * v[m*ldv+2]
+					}
+				}
+			}
+
+			// Special case: 2×2 reflection (if needed).
+			if bmp22 && v[m22*ldv] != 0 {
+				k := krcol + 3*m22
+				for j := jtop; j <= min(kbot, k+3); j++ {
+					refsum := v[m22*ldv] * (h[j*ldh+k+1] + v[m22*ldv+1]*h[j*ldh+k+2])
+					h[j*ldh+k+1] -= refsum
+					h[j*ldh+k+2] -= refsum * v[m22*ldv+1]
+				}
+				if accum {
+					kms := k - incol
+					for j := max(0, ktop-incol-1); j < kdu; j++ {
+						refsum := v[m22*ldv] * (u[j*ldu+kms] + v[m22*ldv+1]*u[j*ldu+kms+1])
+						u[j*ldu+kms] -= refsum
+						u[j*ldu+kms+1] -= refsum * v[m22*ldv+1]
+					}
+				} else if wantz {
+					for j := iloz; j <= ihiz; j++ {
+						refsum := v[m22*ldv] * (z[j*ldz+k+1] + v[m22*ldv+1]*z[j*ldz+k+2])
+						z[j*ldz+k+1] -= refsum
+						z[j*ldz+k+2] -= refsum * v[m22*ldv+1]
+					}
+				}
+			}
+
+			// Vigilant deflation check.
+			mstart := mtop
+			if krcol+3*mstart < ktop {
+				mstart++
+			}
+			mend := mbot
+			if bmp22 {
+				mend++
+			}
+			if krcol == kbot-2 {
+				mend++
+			}
+			for m := mstart; m <= mend; m++ {
+				k := min(kbot-1, krcol+3*m)
+
+				// The following convergence test requires that the tradition
+				// small-compared-to-nearby-diagonals criterion and the Ahues &
+				// Tisseur (LAWN 122, 1997) criteria both be satisfied. The latter
+				// improves accuracy in some examples. Falling back on an alternate
+				// convergence criterion when tst1 or tst2 is zero (as done here) is
+				// traditional but probably unnecessary.
+
+				if h[(k+1)*ldh+k] == 0 {
+					continue
+				}
+				tst1 := math.Abs(h[k*ldh+k]) + math.Abs(h[(k+1)*ldh+k+1])
+				if tst1 == 0 {
+					if k >= ktop+1 {
+						tst1 += math.Abs(h[k*ldh+k-1])
+					}
+					if k >= ktop+2 {
+						tst1 += math.Abs(h[k*ldh+k-2])
+					}
+					if k >= ktop+3 {
+						tst1 += math.Abs(h[k*ldh+k-3])
+					}
+					if k <= kbot-2 {
+						tst1 += math.Abs(h[(k+2)*ldh+k+1])
+					}
+					if k <= kbot-3 {
+						tst1 += math.Abs(h[(k+3)*ldh+k+1])
+					}
+					if k <= kbot-4 {
+						tst1 += math.Abs(h[(k+4)*ldh+k+1])
+					}
+				}
+				if math.Abs(h[(k+1)*ldh+k]) <= math.Max(smlnum, ulp*tst1) {
+					h12 := math.Max(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1]))
+					h21 := math.Min(math.Abs(h[(k+1)*ldh+k]), math.Abs(h[k*ldh+k+1]))
+					h11 := math.Max(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1]))
+					h22 := math.Min(math.Abs(h[(k+1)*ldh+k+1]), math.Abs(h[k*ldh+k]-h[(k+1)*ldh+k+1]))
+					scl := h11 + h12
+					tst2 := h22 * (h11 / scl)
+					if tst2 == 0 || h21*(h12/scl) <= math.Max(smlnum, ulp*tst2) {
+						h[(k+1)*ldh+k] = 0
+					}
+				}
+			}
+
+			// Fill in the last row of each bulge.
+			mend = min(nbmps, (kbot-krcol-1)/3) - 1
+			for m := mtop; m <= mend; m++ {
+				k := krcol + 3*m
+				refsum := v[m*ldv] * v[m*ldv+2] * h[(k+4)*ldh+k+3]
+				h[(k+4)*ldh+k+1] = -refsum
+				h[(k+4)*ldh+k+2] = -refsum * v[m*ldv+1]
+				h[(k+4)*ldh+k+3] -= refsum * v[m*ldv+2]
+			}
+		}
+
+		// Use U (if accumulated) to update far-from-diagonal entries in H.
+		// If required, use U to update Z as well.
+		if !accum {
+			continue
+		}
+		var jtop, jbot int
+		if wantt {
+			jtop = 0
+			jbot = n - 1
+		} else {
+			jtop = ktop
+			jbot = kbot
+		}
+		bi := blas64.Implementation()
+		if !blk22 || incol < ktop || kbot < ndcol || ns <= 2 {
+			// Updates not exploiting the 2×2 block structure of U. k0 and nu keep track
+			// of the location and size of U in the special cases of introducing bulges
+			// and chasing bulges off the bottom. In these special cases and in case the
+			// number of shifts is ns = 2, there is no 2×2 block structure to exploit.
+
+			k0 := max(0, ktop-incol-1)
+			nu := kdu - max(0, ndcol-kbot) - k0
+
+			// Horizontal multiply.
+			for jcol := min(ndcol, kbot) + 1; jcol <= jbot; jcol += nh {
+				jlen := min(nh, jbot-jcol+1)
+				bi.Dgemm(blas.Trans, blas.NoTrans, nu, jlen, nu,
+					1, u[k0*ldu+k0:], ldu,
+					h[(incol+k0+1)*ldh+jcol:], ldh,
+					0, wh, ldwh)
+				impl.Dlacpy(blas.All, nu, jlen, wh, ldwh, h[(incol+k0+1)*ldh+jcol:], ldh)
+			}
+
+			// Vertical multiply.
+			for jrow := jtop; jrow <= max(ktop, incol)-1; jrow += nv {
+				jlen := min(nv, max(ktop, incol)-jrow)
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, nu, nu,
+					1, h[jrow*ldh+incol+k0+1:], ldh,
+					u[k0*ldu+k0:], ldu,
+					0, wv, ldwv)
+				impl.Dlacpy(blas.All, jlen, nu, wv, ldwv, h[jrow*ldh+incol+k0+1:], ldh)
+			}
+
+			// Z multiply (also vertical).
+			if wantz {
+				for jrow := iloz; jrow <= ihiz; jrow += nv {
+					jlen := min(nv, ihiz-jrow+1)
+					bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, nu, nu,
+						1, z[jrow*ldz+incol+k0+1:], ldz,
+						u[k0*ldu+k0:], ldu,
+						0, wv, ldwv)
+					impl.Dlacpy(blas.All, jlen, nu, wv, ldwv, z[jrow*ldz+incol+k0+1:], ldz)
+				}
+			}
+
+			continue
+		}
+
+		// Updates exploiting U's 2×2 block structure.
+
+		// i2, i4, j2, j4 are the last rows and columns of the blocks.
+		i2 := (kdu + 1) / 2
+		i4 := kdu
+		j2 := i4 - i2
+		j4 := kdu
+
+		// kzs and knz deal with the band of zeros along the diagonal of one of the
+		// triangular blocks.
+		kzs := (j4 - j2) - (ns + 1)
+		knz := ns + 1
+
+		// Horizontal multiply.
+		for jcol := min(ndcol, kbot) + 1; jcol <= jbot; jcol += nh {
+			jlen := min(nh, jbot-jcol+1)
+
+			// Copy bottom of H to top+kzs of scratch (the first kzs
+			// rows get multiplied by zero).
+			impl.Dlacpy(blas.All, knz, jlen, h[(incol+1+j2)*ldh+jcol:], ldh, wh[kzs*ldwh:], ldwh)
+
+			// Multiply by U21^T.
+			impl.Dlaset(blas.All, kzs, jlen, 0, 0, wh, ldwh)
+			bi.Dtrmm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, knz, jlen,
+				1, u[j2*ldu+kzs:], ldu, wh[kzs*ldwh:], ldwh)
+
+			// Multiply top of H by U11^T.
+			bi.Dgemm(blas.Trans, blas.NoTrans, i2, jlen, j2,
+				1, u, ldu, h[(incol+1)*ldh+jcol:], ldh,
+				1, wh, ldwh)
+
+			// Copy top of H to bottom of WH.
+			impl.Dlacpy(blas.All, j2, jlen, h[(incol+1)*ldh+jcol:], ldh, wh[i2*ldwh:], ldwh)
+
+			// Multiply by U21^T.
+			bi.Dtrmm(blas.Left, blas.Lower, blas.Trans, blas.NonUnit, j2, jlen,
+				1, u[i2:], ldu, wh[i2*ldwh:], ldwh)
+
+			// Multiply by U22.
+			bi.Dgemm(blas.Trans, blas.NoTrans, i4-i2, jlen, j4-j2,
+				1, u[j2*ldu+i2:], ldu, h[(incol+1+j2)*ldh+jcol:], ldh,
+				1, wh[i2*ldwh:], ldwh)
+
+			// Copy it back.
+			impl.Dlacpy(blas.All, kdu, jlen, wh, ldwh, h[(incol+1)*ldh+jcol:], ldh)
+		}
+
+		// Vertical multiply.
+		for jrow := jtop; jrow <= max(incol, ktop)-1; jrow += nv {
+			jlen := min(nv, max(incol, ktop)-jrow)
+
+			// Copy right of H to scratch (the first kzs columns get multiplied
+			// by zero).
+			impl.Dlacpy(blas.All, jlen, knz, h[jrow*ldh+incol+1+j2:], ldh, wv[kzs:], ldwv)
+
+			// Multiply by U21.
+			impl.Dlaset(blas.All, jlen, kzs, 0, 0, wv, ldwv)
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.NonUnit, jlen, knz,
+				1, u[j2*ldu+kzs:], ldu, wv[kzs:], ldwv)
+
+			// Multiply by U11.
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, i2, j2,
+				1, h[jrow*ldh+incol+1:], ldh, u, ldu,
+				1, wv, ldwv)
+
+			// Copy left of H to right of scratch.
+			impl.Dlacpy(blas.All, jlen, j2, h[jrow*ldh+incol+1:], ldh, wv[i2:], ldwv)
+
+			// Multiply by U21.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.NonUnit, jlen, i4-i2,
+				1, u[i2:], ldu, wv[i2:], ldwv)
+
+			// Multiply by U22.
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, i4-i2, j4-j2,
+				1, h[jrow*ldh+incol+1+j2:], ldh, u[j2*ldu+i2:], ldu,
+				1, wv[i2:], ldwv)
+
+			// Copy it back.
+			impl.Dlacpy(blas.All, jlen, kdu, wv, ldwv, h[jrow*ldh+incol+1:], ldh)
+		}
+
+		if !wantz {
+			continue
+		}
+		// Multiply Z (also vertical).
+		for jrow := iloz; jrow <= ihiz; jrow += nv {
+			jlen := min(nv, ihiz-jrow+1)
+
+			// Copy right of Z to left of scratch (first kzs columns get
+			// multiplied by zero).
+			impl.Dlacpy(blas.All, jlen, knz, z[jrow*ldz+incol+1+j2:], ldz, wv[kzs:], ldwv)
+
+			// Multiply by U12.
+			impl.Dlaset(blas.All, jlen, kzs, 0, 0, wv, ldwv)
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.NonUnit, jlen, knz,
+				1, u[j2*ldu+kzs:], ldu, wv[kzs:], ldwv)
+
+			// Multiply by U11.
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, i2, j2,
+				1, z[jrow*ldz+incol+1:], ldz, u, ldu,
+				1, wv, ldwv)
+
+			// Copy left of Z to right of scratch.
+			impl.Dlacpy(blas.All, jlen, j2, z[jrow*ldz+incol+1:], ldz, wv[i2:], ldwv)
+
+			// Multiply by U21.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.NonUnit, jlen, i4-i2,
+				1, u[i2:], ldu, wv[i2:], ldwv)
+
+			// Multiply by U22.
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, jlen, i4-i2, j4-j2,
+				1, z[jrow*ldz+incol+1+j2:], ldz, u[j2*ldu+i2:], ldu,
+				1, wv[i2:], ldwv)
+
+			// Copy the result back to Z.
+			impl.Dlacpy(blas.All, jlen, kdu, wv, ldwv, z[jrow*ldz+incol+1:], ldz)
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarf.go
@ -0,0 +1,101 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlarf applies an elementary reflector to a general rectangular matrix c.
+// This computes
+//  c = h * c if side == Left
+//  c = c * h if side == right
+// where
+//  h = 1 - tau * v * v^T
+// and c is an m * n matrix.
+//
+// work is temporary storage of length at least m if side == Left and at least
+// n if side == Right. This function will panic if this length requirement is not met.
+//
+// Dlarf is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlarf(side blas.Side, m, n int, v []float64, incv int, tau float64, c []float64, ldc int, work []float64) {
+	switch {
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case incv == 0:
+		panic(zeroIncV)
+	case ldc < max(1, n):
+		panic(badLdC)
+	}
+
+	if m == 0 || n == 0 {
+		return
+	}
+
+	applyleft := side == blas.Left
+	lenV := n
+	if applyleft {
+		lenV = m
+	}
+
+	switch {
+	case len(v) < 1+(lenV-1)*abs(incv):
+		panic(shortV)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case (applyleft && len(work) < n) || (!applyleft && len(work) < m):
+		panic(shortWork)
+	}
+
+	lastv := 0 // last non-zero element of v
+	lastc := 0 // last non-zero row/column of c
+	if tau != 0 {
+		var i int
+		if applyleft {
+			lastv = m - 1
+		} else {
+			lastv = n - 1
+		}
+		if incv > 0 {
+			i = lastv * incv
+		}
+
+		// Look for the last non-zero row in v.
+		for lastv >= 0 && v[i] == 0 {
+			lastv--
+			i -= incv
+		}
+		if applyleft {
+			// Scan for the last non-zero column in C[0:lastv, :]
+			lastc = impl.Iladlc(lastv+1, n, c, ldc)
+		} else {
+			// Scan for the last non-zero row in C[:, 0:lastv]
+			lastc = impl.Iladlr(m, lastv+1, c, ldc)
+		}
+	}
+	if lastv == -1 || lastc == -1 {
+		return
+	}
+	// Sometimes 1-indexing is nicer ...
+	bi := blas64.Implementation()
+	if applyleft {
+		// Form H * C
+		// w[0:lastc+1] = c[1:lastv+1, 1:lastc+1]^T * v[1:lastv+1,1]
+		bi.Dgemv(blas.Trans, lastv+1, lastc+1, 1, c, ldc, v, incv, 0, work, 1)
+		// c[0: lastv, 0: lastc] = c[...] - w[0:lastv, 1] * v[1:lastc, 1]^T
+		bi.Dger(lastv+1, lastc+1, -tau, v, incv, work, 1, c, ldc)
+		return
+	}
+	// Form C*H
+	// w[0:lastc+1,1] := c[0:lastc+1,0:lastv+1] * v[0:lastv+1,1]
+	bi.Dgemv(blas.NoTrans, lastc+1, lastv+1, 1, c, ldc, v, incv, 0, work, 1)
+	// c[0:lastc+1,0:lastv+1] = c[...] - w[0:lastc+1,0] * v[0:lastv+1,0]^T
+	bi.Dger(lastc+1, lastv+1, -tau, work, 1, v, incv, c, ldc)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfb.go
@ -0,0 +1,449 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlarfb applies a block reflector to a matrix.
+//
+// In the call to Dlarfb, the mxn c is multiplied by the implicitly defined matrix h as follows:
+//  c = h * c if side == Left and trans == NoTrans
+//  c = c * h if side == Right and trans == NoTrans
+//  c = h^T * c if side == Left and trans == Trans
+//  c = c * h^T if side == Right and trans == Trans
+// h is a product of elementary reflectors. direct sets the direction of multiplication
+//  h = h_1 * h_2 * ... * h_k if direct == Forward
+//  h = h_k * h_k-1 * ... * h_1 if direct == Backward
+// The combination of direct and store defines the orientation of the elementary
+// reflectors. In all cases the ones on the diagonal are implicitly represented.
+//
+// If direct == lapack.Forward and store == lapack.ColumnWise
+//  V = [ 1        ]
+//      [v1   1    ]
+//      [v1  v2   1]
+//      [v1  v2  v3]
+//      [v1  v2  v3]
+// If direct == lapack.Forward and store == lapack.RowWise
+//  V = [ 1  v1  v1  v1  v1]
+//      [     1  v2  v2  v2]
+//      [         1  v3  v3]
+// If direct == lapack.Backward and store == lapack.ColumnWise
+//  V = [v1  v2  v3]
+//      [v1  v2  v3]
+//      [ 1  v2  v3]
+//      [     1  v3]
+//      [         1]
+// If direct == lapack.Backward and store == lapack.RowWise
+//  V = [v1  v1   1        ]
+//      [v2  v2  v2   1    ]
+//      [v3  v3  v3  v3   1]
+// An elementary reflector can be explicitly constructed by extracting the
+// corresponding elements of v, placing a 1 where the diagonal would be, and
+// placing zeros in the remaining elements.
+//
+// t is a k×k matrix containing the block reflector, and this function will panic
+// if t is not of sufficient size. See Dlarft for more information.
+//
+// work is a temporary storage matrix with stride ldwork.
+// work must be of size at least n×k side == Left and m×k if side == Right, and
+// this function will panic if this size is not met.
+//
+// Dlarfb is an internal routine. It is exported for testing purposes.
+func (Implementation) Dlarfb(side blas.Side, trans blas.Transpose, direct lapack.Direct, store lapack.StoreV, m, n, k int, v []float64, ldv int, t []float64, ldt int, c []float64, ldc int, work []float64, ldwork int) {
+	nv := m
+	if side == blas.Right {
+		nv = n
+	}
+	switch {
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.Trans && trans != blas.NoTrans:
+		panic(badTrans)
+	case direct != lapack.Forward && direct != lapack.Backward:
+		panic(badDirect)
+	case store != lapack.ColumnWise && store != lapack.RowWise:
+		panic(badStoreV)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case store == lapack.ColumnWise && ldv < max(1, k):
+		panic(badLdV)
+	case store == lapack.RowWise && ldv < max(1, nv):
+		panic(badLdV)
+	case ldt < max(1, k):
+		panic(badLdT)
+	case ldc < max(1, n):
+		panic(badLdC)
+	case ldwork < max(1, k):
+		panic(badLdWork)
+	}
+
+	if m == 0 || n == 0 {
+		return
+	}
+
+	nw := n
+	if side == blas.Right {
+		nw = m
+	}
+	switch {
+	case store == lapack.ColumnWise && len(v) < (nv-1)*ldv+k:
+		panic(shortV)
+	case store == lapack.RowWise && len(v) < (k-1)*ldv+nv:
+		panic(shortV)
+	case len(t) < (k-1)*ldt+k:
+		panic(shortT)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case len(work) < (nw-1)*ldwork+k:
+		panic(shortWork)
+	}
+
+	bi := blas64.Implementation()
+
+	transt := blas.Trans
+	if trans == blas.Trans {
+		transt = blas.NoTrans
+	}
+	// TODO(btracey): This follows the original Lapack code where the
+	// elements are copied into the columns of the working array. The
+	// loops should go in the other direction so the data is written
+	// into the rows of work so the copy is not strided. A bigger change
+	// would be to replace work with work^T, but benchmarks would be
+	// needed to see if the change is merited.
+	if store == lapack.ColumnWise {
+		if direct == lapack.Forward {
+			// V1 is the first k rows of C. V2 is the remaining rows.
+			if side == blas.Left {
+				// W = C^T V = C1^T V1 + C2^T V2 (stored in work).
+
+				// W = C1.
+				for j := 0; j < k; j++ {
+					bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork)
+				}
+				// W = W * V1.
+				bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit,
+					n, k, 1,
+					v, ldv,
+					work, ldwork)
+				if m > k {
+					// W = W + C2^T V2.
+					bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k,
+						1, c[k*ldc:], ldc, v[k*ldv:], ldv,
+						1, work, ldwork)
+				}
+				// W = W * T^T or W * T.
+				bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k,
+					1, t, ldt,
+					work, ldwork)
+				// C -= V * W^T.
+				if m > k {
+					// C2 -= V2 * W^T.
+					bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k,
+						-1, v[k*ldv:], ldv, work, ldwork,
+						1, c[k*ldc:], ldc)
+				}
+				// W *= V1^T.
+				bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k,
+					1, v, ldv,
+					work, ldwork)
+				// C1 -= W^T.
+				// TODO(btracey): This should use blas.Axpy.
+				for i := 0; i < n; i++ {
+					for j := 0; j < k; j++ {
+						c[j*ldc+i] -= work[i*ldwork+j]
+					}
+				}
+				return
+			}
+			// Form C = C * H or C * H^T, where C = (C1 C2).
+
+			// W = C1.
+			for i := 0; i < k; i++ {
+				bi.Dcopy(m, c[i:], ldc, work[i:], ldwork)
+			}
+			// W *= V1.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k,
+				1, v, ldv,
+				work, ldwork)
+			if n > k {
+				bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k,
+					1, c[k:], ldc, v[k*ldv:], ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k,
+				1, t, ldt,
+				work, ldwork)
+			if n > k {
+				bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k,
+					-1, work, ldwork, v[k*ldv:], ldv,
+					1, c[k:], ldc)
+			}
+			// C -= W * V^T.
+			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k,
+				1, v, ldv,
+				work, ldwork)
+			// C -= W.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < m; i++ {
+				for j := 0; j < k; j++ {
+					c[i*ldc+j] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// V = (V1)
+		//   = (V2) (last k rows)
+		// Where V2 is unit upper triangular.
+		if side == blas.Left {
+			// Form H * C or
+			// W = C^T V.
+
+			// W = C2^T.
+			for j := 0; j < k; j++ {
+				bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork)
+			}
+			// W *= V2.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k,
+				1, v[(m-k)*ldv:], ldv,
+				work, ldwork)
+			if m > k {
+				// W += C1^T * V1.
+				bi.Dgemm(blas.Trans, blas.NoTrans, n, k, m-k,
+					1, c, ldc, v, ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k,
+				1, t, ldt,
+				work, ldwork)
+			// C -= V * W^T.
+			if m > k {
+				bi.Dgemm(blas.NoTrans, blas.Trans, m-k, n, k,
+					-1, v, ldv, work, ldwork,
+					1, c, ldc)
+			}
+			// W *= V2^T.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k,
+				1, v[(m-k)*ldv:], ldv,
+				work, ldwork)
+			// C2 -= W^T.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[(m-k+j)*ldc+i] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// Form C * H or C * H^T where C = (C1 C2).
+		// W = C * V.
+
+		// W = C2.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork)
+		}
+
+		// W = W * V2.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k,
+			1, v[(n-k)*ldv:], ldv,
+			work, ldwork)
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, m, k, n-k,
+				1, c, ldc, v, ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= W * V^T.
+		if n > k {
+			// C1 -= W * V1^T.
+			bi.Dgemm(blas.NoTrans, blas.Trans, m, n-k, k,
+				-1, work, ldwork, v, ldv,
+				1, c, ldc)
+		}
+		// W *= V2^T.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k,
+			1, v[(n-k)*ldv:], ldv,
+			work, ldwork)
+		// C2 -= W.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i*ldc+n-k+j] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// Store = Rowwise.
+	if direct == lapack.Forward {
+		// V = (V1 V2) where v1 is unit upper triangular.
+		if side == blas.Left {
+			// Form H * C or H^T * C where C = (C1; C2).
+			// W = C^T * V^T.
+
+			// W = C1^T.
+			for j := 0; j < k; j++ {
+				bi.Dcopy(n, c[j*ldc:], 1, work[j:], ldwork)
+			}
+			// W *= V1^T.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, n, k,
+				1, v, ldv,
+				work, ldwork)
+			if m > k {
+				bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k,
+					1, c[k*ldc:], ldc, v[k:], ldv,
+					1, work, ldwork)
+			}
+			// W *= T or T^T.
+			bi.Dtrmm(blas.Right, blas.Upper, transt, blas.NonUnit, n, k,
+				1, t, ldt,
+				work, ldwork)
+			// C -= V^T * W^T.
+			if m > k {
+				bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k,
+					-1, v[k:], ldv, work, ldwork,
+					1, c[k*ldc:], ldc)
+			}
+			// W *= V1.
+			bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, n, k,
+				1, v, ldv,
+				work, ldwork)
+			// C1 -= W^T.
+			// TODO(btracey): This should use blas.Axpy.
+			for i := 0; i < n; i++ {
+				for j := 0; j < k; j++ {
+					c[j*ldc+i] -= work[i*ldwork+j]
+				}
+			}
+			return
+		}
+		// Form C * H or C * H^T where C = (C1 C2).
+		// W = C * V^T.
+
+		// W = C1.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(m, c[j:], ldc, work[j:], ldwork)
+		}
+		// W *= V1^T.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.Unit, m, k,
+			1, v, ldv,
+			work, ldwork)
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k,
+				1, c[k:], ldc, v[k:], ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Upper, trans, blas.NonUnit, m, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= W * V.
+		if n > k {
+			bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k,
+				-1, work, ldwork, v[k:], ldv,
+				1, c[k:], ldc)
+		}
+		// W *= V1.
+		bi.Dtrmm(blas.Right, blas.Upper, blas.NoTrans, blas.Unit, m, k,
+			1, v, ldv,
+			work, ldwork)
+		// C1 -= W.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < m; i++ {
+			for j := 0; j < k; j++ {
+				c[i*ldc+j] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// V = (V1 V2) where V2 is the last k columns and is lower unit triangular.
+	if side == blas.Left {
+		// Form H * C or H^T C where C = (C1 ; C2).
+		// W = C^T * V^T.
+
+		// W = C2^T.
+		for j := 0; j < k; j++ {
+			bi.Dcopy(n, c[(m-k+j)*ldc:], 1, work[j:], ldwork)
+		}
+		// W *= V2^T.
+		bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, n, k,
+			1, v[m-k:], ldv,
+			work, ldwork)
+		if m > k {
+			bi.Dgemm(blas.Trans, blas.Trans, n, k, m-k,
+				1, c, ldc, v, ldv,
+				1, work, ldwork)
+		}
+		// W *= T or T^T.
+		bi.Dtrmm(blas.Right, blas.Lower, transt, blas.NonUnit, n, k,
+			1, t, ldt,
+			work, ldwork)
+		// C -= V^T * W^T.
+		if m > k {
+			bi.Dgemm(blas.Trans, blas.Trans, m-k, n, k,
+				-1, v, ldv, work, ldwork,
+				1, c, ldc)
+		}
+		// W *= V2.
+		bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, n, k,
+			1, v[m-k:], ldv,
+			work, ldwork)
+		// C2 -= W^T.
+		// TODO(btracey): This should use blas.Axpy.
+		for i := 0; i < n; i++ {
+			for j := 0; j < k; j++ {
+				c[(m-k+j)*ldc+i] -= work[i*ldwork+j]
+			}
+		}
+		return
+	}
+	// Form C * H or C * H^T where C = (C1 C2).
+	// W = C * V^T.
+	// W = C2.
+	for j := 0; j < k; j++ {
+		bi.Dcopy(m, c[n-k+j:], ldc, work[j:], ldwork)
+	}
+	// W *= V2^T.
+	bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, m, k,
+		1, v[n-k:], ldv,
+		work, ldwork)
+	if n > k {
+		bi.Dgemm(blas.NoTrans, blas.Trans, m, k, n-k,
+			1, c, ldc, v, ldv,
+			1, work, ldwork)
+	}
+	// W *= T or T^T.
+	bi.Dtrmm(blas.Right, blas.Lower, trans, blas.NonUnit, m, k,
+		1, t, ldt,
+		work, ldwork)
+	// C -= W * V.
+	if n > k {
+		bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n-k, k,
+			-1, work, ldwork, v, ldv,
+			1, c, ldc)
+	}
+	// W *= V2.
+	bi.Dtrmm(blas.Right, blas.Lower, blas.NoTrans, blas.Unit, m, k,
+		1, v[n-k:], ldv,
+		work, ldwork)
+	// C1 -= W.
+	// TODO(btracey): This should use blas.Axpy.
+	for i := 0; i < m; i++ {
+		for j := 0; j < k; j++ {
+			c[i*ldc+n-k+j] -= work[i*ldwork+j]
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfg.go
@ -0,0 +1,71 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlarfg generates an elementary reflector for a Householder matrix. It creates
+// a real elementary reflector of order n such that
+//  H * (alpha) = (beta)
+//      (    x)   (   0)
+//  H^T * H = I
+// H is represented in the form
+//  H = 1 - tau * (1; v) * (1 v^T)
+// where tau is a real scalar.
+//
+// On entry, x contains the vector x, on exit it contains v.
+//
+// Dlarfg is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlarfg(n int, alpha float64, x []float64, incX int) (beta, tau float64) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case incX <= 0:
+		panic(badIncX)
+	}
+
+	if n <= 1 {
+		return alpha, 0
+	}
+
+	if len(x) < 1+(n-2)*abs(incX) {
+		panic(shortX)
+	}
+
+	bi := blas64.Implementation()
+
+	xnorm := bi.Dnrm2(n-1, x, incX)
+	if xnorm == 0 {
+		return alpha, 0
+	}
+	beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha)
+	safmin := dlamchS / dlamchE
+	knt := 0
+	if math.Abs(beta) < safmin {
+		// xnorm and beta may be inaccurate, scale x and recompute.
+		rsafmn := 1 / safmin
+		for {
+			knt++
+			bi.Dscal(n-1, rsafmn, x, incX)
+			beta *= rsafmn
+			alpha *= rsafmn
+			if math.Abs(beta) >= safmin {
+				break
+			}
+		}
+		xnorm = bi.Dnrm2(n-1, x, incX)
+		beta = -math.Copysign(impl.Dlapy2(alpha, xnorm), alpha)
+	}
+	tau = (beta - alpha) / beta
+	bi.Dscal(n-1, 1/(alpha-beta), x, incX)
+	for j := 0; j < knt; j++ {
+		beta *= safmin
+	}
+	return beta, tau
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarft.go
@ -0,0 +1,166 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlarft forms the triangular factor T of a block reflector H, storing the answer
+// in t.
+//  H = I - V * T * V^T  if store == lapack.ColumnWise
+//  H = I - V^T * T * V  if store == lapack.RowWise
+// H is defined by a product of the elementary reflectors where
+//  H = H_0 * H_1 * ... * H_{k-1}  if direct == lapack.Forward
+//  H = H_{k-1} * ... * H_1 * H_0  if direct == lapack.Backward
+//
+// t is a k×k triangular matrix. t is upper triangular if direct = lapack.Forward
+// and lower triangular otherwise. This function will panic if t is not of
+// sufficient size.
+//
+// store describes the storage of the elementary reflectors in v. See
+// Dlarfb for a description of layout.
+//
+// tau contains the scalar factors of the elementary reflectors H_i.
+//
+// Dlarft is an internal routine. It is exported for testing purposes.
+func (Implementation) Dlarft(direct lapack.Direct, store lapack.StoreV, n, k int, v []float64, ldv int, tau []float64, t []float64, ldt int) {
+	mv, nv := n, k
+	if store == lapack.RowWise {
+		mv, nv = k, n
+	}
+	switch {
+	case direct != lapack.Forward && direct != lapack.Backward:
+		panic(badDirect)
+	case store != lapack.RowWise && store != lapack.ColumnWise:
+		panic(badStoreV)
+	case n < 0:
+		panic(nLT0)
+	case k < 1:
+		panic(kLT1)
+	case ldv < max(1, nv):
+		panic(badLdV)
+	case len(tau) < k:
+		panic(shortTau)
+	case ldt < max(1, k):
+		panic(shortT)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	switch {
+	case len(v) < (mv-1)*ldv+nv:
+		panic(shortV)
+	case len(t) < (k-1)*ldt+k:
+		panic(shortT)
+	}
+
+	bi := blas64.Implementation()
+
+	// TODO(btracey): There are a number of minor obvious loop optimizations here.
+	// TODO(btracey): It may be possible to rearrange some of the code so that
+	// index of 1 is more common in the Dgemv.
+	if direct == lapack.Forward {
+		prevlastv := n - 1
+		for i := 0; i < k; i++ {
+			prevlastv = max(i, prevlastv)
+			if tau[i] == 0 {
+				for j := 0; j <= i; j++ {
+					t[j*ldt+i] = 0
+				}
+				continue
+			}
+			var lastv int
+			if store == lapack.ColumnWise {
+				// skip trailing zeros
+				for lastv = n - 1; lastv >= i+1; lastv-- {
+					if v[lastv*ldv+i] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j*ldt+i] = -tau[i] * v[i*ldv+j]
+				}
+				j := min(lastv, prevlastv)
+				bi.Dgemv(blas.Trans, j-i, i,
+					-tau[i], v[(i+1)*ldv:], ldv, v[(i+1)*ldv+i:], ldv,
+					1, t[i:], ldt)
+			} else {
+				for lastv = n - 1; lastv >= i+1; lastv-- {
+					if v[i*ldv+lastv] != 0 {
+						break
+					}
+				}
+				for j := 0; j < i; j++ {
+					t[j*ldt+i] = -tau[i] * v[j*ldv+i]
+				}
+				j := min(lastv, prevlastv)
+				bi.Dgemv(blas.NoTrans, i, j-i,
+					-tau[i], v[i+1:], ldv, v[i*ldv+i+1:], 1,
+					1, t[i:], ldt)
+			}
+			bi.Dtrmv(blas.Upper, blas.NoTrans, blas.NonUnit, i, t, ldt, t[i:], ldt)
+			t[i*ldt+i] = tau[i]
+			if i > 1 {
+				prevlastv = max(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		return
+	}
+	prevlastv := 0
+	for i := k - 1; i >= 0; i-- {
+		if tau[i] == 0 {
+			for j := i; j < k; j++ {
+				t[j*ldt+i] = 0
+			}
+			continue
+		}
+		var lastv int
+		if i < k-1 {
+			if store == lapack.ColumnWise {
+				for lastv = 0; lastv < i; lastv++ {
+					if v[lastv*ldv+i] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j*ldt+i] = -tau[i] * v[(n-k+i)*ldv+j]
+				}
+				j := max(lastv, prevlastv)
+				bi.Dgemv(blas.Trans, n-k+i-j, k-i-1,
+					-tau[i], v[j*ldv+i+1:], ldv, v[j*ldv+i:], ldv,
+					1, t[(i+1)*ldt+i:], ldt)
+			} else {
+				for lastv = 0; lastv < i; lastv++ {
+					if v[i*ldv+lastv] != 0 {
+						break
+					}
+				}
+				for j := i + 1; j < k; j++ {
+					t[j*ldt+i] = -tau[i] * v[j*ldv+n-k+i]
+				}
+				j := max(lastv, prevlastv)
+				bi.Dgemv(blas.NoTrans, k-i-1, n-k+i-j,
+					-tau[i], v[(i+1)*ldv+j:], ldv, v[i*ldv+j:], 1,
+					1, t[(i+1)*ldt+i:], ldt)
+			}
+			bi.Dtrmv(blas.Lower, blas.NoTrans, blas.NonUnit, k-i-1,
+				t[(i+1)*ldt+i+1:], ldt,
+				t[(i+1)*ldt+i:], ldt)
+			if i > 0 {
+				prevlastv = min(prevlastv, lastv)
+			} else {
+				prevlastv = lastv
+			}
+		}
+		t[i*ldt+i] = tau[i]
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlarfx.go
@ -0,0 +1,550 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dlarfx applies an elementary reflector H to a real m×n matrix C, from either
+// the left or the right, with loop unrolling when the reflector has order less
+// than 11.
+//
+// H is represented in the form
+//  H = I - tau * v * v^T,
+// where tau is a real scalar and v is a real vector. If tau = 0, then H is
+// taken to be the identity matrix.
+//
+// v must have length equal to m if side == blas.Left, and equal to n if side ==
+// blas.Right, otherwise Dlarfx will panic.
+//
+// c and ldc represent the m×n matrix C. On return, C is overwritten by the
+// matrix H * C if side == blas.Left, or C * H if side == blas.Right.
+//
+// work must have length at least n if side == blas.Left, and at least m if side
+// == blas.Right, otherwise Dlarfx will panic. work is not referenced if H has
+// order < 11.
+//
+// Dlarfx is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlarfx(side blas.Side, m, n int, v []float64, tau float64, c []float64, ldc int, work []float64) {
+	switch {
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case ldc < max(1, n):
+		panic(badLdC)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return
+	}
+
+	nh := m
+	lwork := n
+	if side == blas.Right {
+		nh = n
+		lwork = m
+	}
+	switch {
+	case len(v) < nh:
+		panic(shortV)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case nh > 10 && len(work) < lwork:
+		panic(shortWork)
+	}
+
+	if tau == 0 {
+		return
+	}
+
+	if side == blas.Left {
+		// Form H * C, where H has order m.
+		switch m {
+		default: // Code for general m.
+			impl.Dlarf(side, m, n, v, 1, tau, c, ldc, work)
+			return
+
+		case 0: // No-op for zero size matrix.
+			return
+
+		case 1: // Special code for 1×1 Householder matrix.
+			t0 := 1 - tau*v[0]*v[0]
+			for j := 0; j < n; j++ {
+				c[j] *= t0
+			}
+			return
+
+		case 2: // Special code for 2×2 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+			}
+			return
+
+		case 3: // Special code for 3×3 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+			}
+			return
+
+		case 4: // Special code for 4×4 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+			}
+			return
+
+		case 5: // Special code for 5×5 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+			}
+			return
+
+		case 6: // Special code for 6×6 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			v5 := v[5]
+			t5 := tau * v5
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] +
+					v5*c[5*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+				c[5*ldc+j] -= sum * t5
+			}
+			return
+
+		case 7: // Special code for 7×7 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			v5 := v[5]
+			t5 := tau * v5
+			v6 := v[6]
+			t6 := tau * v6
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] +
+					v5*c[5*ldc+j] + v6*c[6*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+				c[5*ldc+j] -= sum * t5
+				c[6*ldc+j] -= sum * t6
+			}
+			return
+
+		case 8: // Special code for 8×8 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			v5 := v[5]
+			t5 := tau * v5
+			v6 := v[6]
+			t6 := tau * v6
+			v7 := v[7]
+			t7 := tau * v7
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] +
+					v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+				c[5*ldc+j] -= sum * t5
+				c[6*ldc+j] -= sum * t6
+				c[7*ldc+j] -= sum * t7
+			}
+			return
+
+		case 9: // Special code for 9×9 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			v5 := v[5]
+			t5 := tau * v5
+			v6 := v[6]
+			t6 := tau * v6
+			v7 := v[7]
+			t7 := tau * v7
+			v8 := v[8]
+			t8 := tau * v8
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] +
+					v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j] + v8*c[8*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+				c[5*ldc+j] -= sum * t5
+				c[6*ldc+j] -= sum * t6
+				c[7*ldc+j] -= sum * t7
+				c[8*ldc+j] -= sum * t8
+			}
+			return
+
+		case 10: // Special code for 10×10 Householder matrix.
+			v0 := v[0]
+			t0 := tau * v0
+			v1 := v[1]
+			t1 := tau * v1
+			v2 := v[2]
+			t2 := tau * v2
+			v3 := v[3]
+			t3 := tau * v3
+			v4 := v[4]
+			t4 := tau * v4
+			v5 := v[5]
+			t5 := tau * v5
+			v6 := v[6]
+			t6 := tau * v6
+			v7 := v[7]
+			t7 := tau * v7
+			v8 := v[8]
+			t8 := tau * v8
+			v9 := v[9]
+			t9 := tau * v9
+			for j := 0; j < n; j++ {
+				sum := v0*c[j] + v1*c[ldc+j] + v2*c[2*ldc+j] + v3*c[3*ldc+j] + v4*c[4*ldc+j] +
+					v5*c[5*ldc+j] + v6*c[6*ldc+j] + v7*c[7*ldc+j] + v8*c[8*ldc+j] + v9*c[9*ldc+j]
+				c[j] -= sum * t0
+				c[ldc+j] -= sum * t1
+				c[2*ldc+j] -= sum * t2
+				c[3*ldc+j] -= sum * t3
+				c[4*ldc+j] -= sum * t4
+				c[5*ldc+j] -= sum * t5
+				c[6*ldc+j] -= sum * t6
+				c[7*ldc+j] -= sum * t7
+				c[8*ldc+j] -= sum * t8
+				c[9*ldc+j] -= sum * t9
+			}
+			return
+		}
+	}
+
+	// Form C * H, where H has order n.
+	switch n {
+	default: // Code for general n.
+		impl.Dlarf(side, m, n, v, 1, tau, c, ldc, work)
+		return
+
+	case 0: // No-op for zero size matrix.
+		return
+
+	case 1: // Special code for 1×1 Householder matrix.
+		t0 := 1 - tau*v[0]*v[0]
+		for j := 0; j < m; j++ {
+			c[j*ldc] *= t0
+		}
+		return
+
+	case 2: // Special code for 2×2 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+		}
+		return
+
+	case 3: // Special code for 3×3 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+		}
+		return
+
+	case 4: // Special code for 4×4 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+		}
+		return
+
+	case 5: // Special code for 5×5 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+		}
+		return
+
+	case 6: // Special code for 6×6 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		v5 := v[5]
+		t5 := tau * v5
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] + v5*cs[5]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+			cs[5] -= sum * t5
+		}
+		return
+
+	case 7: // Special code for 7×7 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		v5 := v[5]
+		t5 := tau * v5
+		v6 := v[6]
+		t6 := tau * v6
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] +
+				v5*cs[5] + v6*cs[6]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+			cs[5] -= sum * t5
+			cs[6] -= sum * t6
+		}
+		return
+
+	case 8: // Special code for 8×8 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		v5 := v[5]
+		t5 := tau * v5
+		v6 := v[6]
+		t6 := tau * v6
+		v7 := v[7]
+		t7 := tau * v7
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] +
+				v5*cs[5] + v6*cs[6] + v7*cs[7]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+			cs[5] -= sum * t5
+			cs[6] -= sum * t6
+			cs[7] -= sum * t7
+		}
+		return
+
+	case 9: // Special code for 9×9 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		v5 := v[5]
+		t5 := tau * v5
+		v6 := v[6]
+		t6 := tau * v6
+		v7 := v[7]
+		t7 := tau * v7
+		v8 := v[8]
+		t8 := tau * v8
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] +
+				v5*cs[5] + v6*cs[6] + v7*cs[7] + v8*cs[8]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+			cs[5] -= sum * t5
+			cs[6] -= sum * t6
+			cs[7] -= sum * t7
+			cs[8] -= sum * t8
+		}
+		return
+
+	case 10: // Special code for 10×10 Householder matrix.
+		v0 := v[0]
+		t0 := tau * v0
+		v1 := v[1]
+		t1 := tau * v1
+		v2 := v[2]
+		t2 := tau * v2
+		v3 := v[3]
+		t3 := tau * v3
+		v4 := v[4]
+		t4 := tau * v4
+		v5 := v[5]
+		t5 := tau * v5
+		v6 := v[6]
+		t6 := tau * v6
+		v7 := v[7]
+		t7 := tau * v7
+		v8 := v[8]
+		t8 := tau * v8
+		v9 := v[9]
+		t9 := tau * v9
+		for j := 0; j < m; j++ {
+			cs := c[j*ldc:]
+			sum := v0*cs[0] + v1*cs[1] + v2*cs[2] + v3*cs[3] + v4*cs[4] +
+				v5*cs[5] + v6*cs[6] + v7*cs[7] + v8*cs[8] + v9*cs[9]
+			cs[0] -= sum * t0
+			cs[1] -= sum * t1
+			cs[2] -= sum * t2
+			cs[3] -= sum * t3
+			cs[4] -= sum * t4
+			cs[5] -= sum * t5
+			cs[6] -= sum * t6
+			cs[7] -= sum * t7
+			cs[8] -= sum * t8
+			cs[9] -= sum * t9
+		}
+		return
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlartg.go
@ -0,0 +1,80 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlartg generates a plane rotation so that
+//  [ cs sn] * [f] = [r]
+//  [-sn cs]   [g] = [0]
+// This is a more accurate version of BLAS drotg, with the other differences that
+// if g = 0, then cs = 1 and sn = 0, and if f = 0 and g != 0, then cs = 0 and sn = 1.
+// If abs(f) > abs(g), cs will be positive.
+//
+// Dlartg is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlartg(f, g float64) (cs, sn, r float64) {
+	safmn2 := math.Pow(dlamchB, math.Trunc(math.Log(dlamchS/dlamchE)/math.Log(dlamchB)/2))
+	safmx2 := 1 / safmn2
+	if g == 0 {
+		cs = 1
+		sn = 0
+		r = f
+		return cs, sn, r
+	}
+	if f == 0 {
+		cs = 0
+		sn = 1
+		r = g
+		return cs, sn, r
+	}
+	f1 := f
+	g1 := g
+	scale := math.Max(math.Abs(f1), math.Abs(g1))
+	if scale >= safmx2 {
+		var count int
+		for {
+			count++
+			f1 *= safmn2
+			g1 *= safmn2
+			scale = math.Max(math.Abs(f1), math.Abs(g1))
+			if scale < safmx2 {
+				break
+			}
+		}
+		r = math.Sqrt(f1*f1 + g1*g1)
+		cs = f1 / r
+		sn = g1 / r
+		for i := 0; i < count; i++ {
+			r *= safmx2
+		}
+	} else if scale <= safmn2 {
+		var count int
+		for {
+			count++
+			f1 *= safmx2
+			g1 *= safmx2
+			scale = math.Max(math.Abs(f1), math.Abs(g1))
+			if scale >= safmn2 {
+				break
+			}
+		}
+		r = math.Sqrt(f1*f1 + g1*g1)
+		cs = f1 / r
+		sn = g1 / r
+		for i := 0; i < count; i++ {
+			r *= safmn2
+		}
+	} else {
+		r = math.Sqrt(f1*f1 + g1*g1)
+		cs = f1 / r
+		sn = g1 / r
+	}
+	if math.Abs(f) > math.Abs(g) && cs < 0 {
+		cs *= -1
+		sn *= -1
+		r *= -1
+	}
+	return cs, sn, r
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlas2.go
@ -0,0 +1,43 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlas2 computes the singular values of the 2×2 matrix defined by
+//  [F G]
+//  [0 H]
+// The smaller and larger singular values are returned in that order.
+//
+// Dlas2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlas2(f, g, h float64) (ssmin, ssmax float64) {
+	fa := math.Abs(f)
+	ga := math.Abs(g)
+	ha := math.Abs(h)
+	fhmin := math.Min(fa, ha)
+	fhmax := math.Max(fa, ha)
+	if fhmin == 0 {
+		if fhmax == 0 {
+			return 0, ga
+		}
+		v := math.Min(fhmax, ga) / math.Max(fhmax, ga)
+		return 0, math.Max(fhmax, ga) * math.Sqrt(1+v*v)
+	}
+	if ga < fhmax {
+		as := 1 + fhmin/fhmax
+		at := (fhmax - fhmin) / fhmax
+		au := (ga / fhmax) * (ga / fhmax)
+		c := 2 / (math.Sqrt(as*as+au) + math.Sqrt(at*at+au))
+		return fhmin * c, fhmax / c
+	}
+	au := fhmax / ga
+	if au == 0 {
+		return fhmin * fhmax / ga, ga
+	}
+	as := 1 + fhmin/fhmax
+	at := (fhmax - fhmin) / fhmax
+	c := 1 / (math.Sqrt(1+(as*au)*(as*au)) + math.Sqrt(1+(at*au)*(at*au)))
+	return 2 * (fhmin * c) * au, ga / (c + c)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlascl.go
@ -0,0 +1,111 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlascl multiplies an m×n matrix by the scalar cto/cfrom.
+//
+// cfrom must not be zero, and cto and cfrom must not be NaN, otherwise Dlascl
+// will panic.
+//
+// Dlascl is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlascl(kind lapack.MatrixType, kl, ku int, cfrom, cto float64, m, n int, a []float64, lda int) {
+	switch kind {
+	default:
+		panic(badMatrixType)
+	case 'H', 'B', 'Q', 'Z': // See dlascl.f.
+		panic("not implemented")
+	case lapack.General, lapack.UpperTri, lapack.LowerTri:
+		if lda < max(1, n) {
+			panic(badLdA)
+		}
+	}
+	switch {
+	case cfrom == 0:
+		panic(zeroCFrom)
+	case math.IsNaN(cfrom):
+		panic(nanCFrom)
+	case math.IsNaN(cto):
+		panic(nanCTo)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	}
+
+	if n == 0 || m == 0 {
+		return
+	}
+
+	switch kind {
+	case lapack.General, lapack.UpperTri, lapack.LowerTri:
+		if len(a) < (m-1)*lda+n {
+			panic(shortA)
+		}
+	}
+
+	smlnum := dlamchS
+	bignum := 1 / smlnum
+	cfromc := cfrom
+	ctoc := cto
+	cfrom1 := cfromc * smlnum
+	for {
+		var done bool
+		var mul, ctol float64
+		if cfrom1 == cfromc {
+			// cfromc is inf.
+			mul = ctoc / cfromc
+			done = true
+			ctol = ctoc
+		} else {
+			ctol = ctoc / bignum
+			if ctol == ctoc {
+				// ctoc is either 0 or inf.
+				mul = ctoc
+				done = true
+				cfromc = 1
+			} else if math.Abs(cfrom1) > math.Abs(ctoc) && ctoc != 0 {
+				mul = smlnum
+				done = false
+				cfromc = cfrom1
+			} else if math.Abs(ctol) > math.Abs(cfromc) {
+				mul = bignum
+				done = false
+				ctoc = ctol
+			} else {
+				mul = ctoc / cfromc
+				done = true
+			}
+		}
+		switch kind {
+		case lapack.General:
+			for i := 0; i < m; i++ {
+				for j := 0; j < n; j++ {
+					a[i*lda+j] = a[i*lda+j] * mul
+				}
+			}
+		case lapack.UpperTri:
+			for i := 0; i < m; i++ {
+				for j := i; j < n; j++ {
+					a[i*lda+j] = a[i*lda+j] * mul
+				}
+			}
+		case lapack.LowerTri:
+			for i := 0; i < m; i++ {
+				for j := 0; j <= min(i, n-1); j++ {
+					a[i*lda+j] = a[i*lda+j] * mul
+				}
+			}
+		}
+		if done {
+			break
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaset.go
@ -0,0 +1,57 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dlaset sets the off-diagonal elements of A to alpha, and the diagonal
+// elements to beta. If uplo == blas.Upper, only the elements in the upper
+// triangular part are set. If uplo == blas.Lower, only the elements in the
+// lower triangular part are set. If uplo is otherwise, all of the elements of A
+// are set.
+//
+// Dlaset is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64, a []float64, lda int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	minmn := min(m, n)
+	if minmn == 0 {
+		return
+	}
+
+	if len(a) < (m-1)*lda+n {
+		panic(shortA)
+	}
+
+	if uplo == blas.Upper {
+		for i := 0; i < m; i++ {
+			for j := i + 1; j < n; j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	} else if uplo == blas.Lower {
+		for i := 0; i < m; i++ {
+			for j := 0; j < min(i+1, n); j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	} else {
+		for i := 0; i < m; i++ {
+			for j := 0; j < n; j++ {
+				a[i*lda+j] = alpha
+			}
+		}
+	}
+	for i := 0; i < minmn; i++ {
+		a[i*lda+i] = beta
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq1.go
@ -0,0 +1,100 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlasq1 computes the singular values of an n×n bidiagonal matrix with diagonal
+// d and off-diagonal e. On exit, d contains the singular values in decreasing
+// order, and e is overwritten. d must have length at least n, e must have
+// length at least n-1, and the input work must have length at least 4*n. Dlasq1
+// will panic if these conditions are not met.
+//
+// Dlasq1 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq1(n int, d, e, work []float64) (info int) {
+	if n < 0 {
+		panic(nLT0)
+	}
+
+	if n == 0 {
+		return info
+	}
+
+	switch {
+	case len(d) < n:
+		panic(shortD)
+	case len(e) < n-1:
+		panic(shortE)
+	case len(work) < 4*n:
+		panic(shortWork)
+	}
+
+	if n == 1 {
+		d[0] = math.Abs(d[0])
+		return info
+	}
+
+	if n == 2 {
+		d[1], d[0] = impl.Dlas2(d[0], e[0], d[1])
+		return info
+	}
+
+	// Estimate the largest singular value.
+	var sigmx float64
+	for i := 0; i < n-1; i++ {
+		d[i] = math.Abs(d[i])
+		sigmx = math.Max(sigmx, math.Abs(e[i]))
+	}
+	d[n-1] = math.Abs(d[n-1])
+	// Early return if sigmx is zero (matrix is already diagonal).
+	if sigmx == 0 {
+		impl.Dlasrt(lapack.SortDecreasing, n, d)
+		return info
+	}
+
+	for i := 0; i < n; i++ {
+		sigmx = math.Max(sigmx, d[i])
+	}
+
+	// Copy D and E into WORK (in the Z format) and scale (squaring the
+	// input data makes scaling by a power of the radix pointless).
+
+	eps := dlamchP
+	safmin := dlamchS
+	scale := math.Sqrt(eps / safmin)
+	bi := blas64.Implementation()
+	bi.Dcopy(n, d, 1, work, 2)
+	bi.Dcopy(n-1, e, 1, work[1:], 2)
+	impl.Dlascl(lapack.General, 0, 0, sigmx, scale, 2*n-1, 1, work, 1)
+
+	// Compute the q's and e's.
+	for i := 0; i < 2*n-1; i++ {
+		work[i] *= work[i]
+	}
+	work[2*n-1] = 0
+
+	info = impl.Dlasq2(n, work)
+	if info == 0 {
+		for i := 0; i < n; i++ {
+			d[i] = math.Sqrt(work[i])
+		}
+		impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, d, 1)
+	} else if info == 2 {
+		// Maximum number of iterations exceeded. Move data from work
+		// into D and E so the calling subroutine can try to finish.
+		for i := 0; i < n; i++ {
+			d[i] = math.Sqrt(work[2*i])
+			e[i] = math.Sqrt(work[2*i+1])
+		}
+		impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, d, 1)
+		impl.Dlascl(lapack.General, 0, 0, scale, sigmx, n, 1, e, 1)
+	}
+	return info
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq2.go
@ -0,0 +1,369 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlasq2 computes all the eigenvalues of the symmetric positive
+// definite tridiagonal matrix associated with the qd array Z. Eigevalues
+// are computed to high relative accuracy avoiding denormalization, underflow
+// and overflow.
+//
+// To see the relation of Z to the tridiagonal matrix, let L be a
+// unit lower bidiagonal matrix with sub-diagonals Z(2,4,6,,..) and
+// let U be an upper bidiagonal matrix with 1's above and diagonal
+// Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+// symmetric tridiagonal to which it is similar.
+//
+// info returns a status error. The return codes mean as follows:
+//  0: The algorithm completed successfully.
+//  1: A split was marked by a positive value in e.
+//  2: Current block of Z not diagonalized after 100*n iterations (in inner
+//     while loop). On exit Z holds a qd array with the same eigenvalues as
+//     the given Z.
+//  3: Termination criterion of outer while loop not met (program created more
+//     than N unreduced blocks).
+//
+// z must have length at least 4*n, and must not contain any negative elements.
+// Dlasq2 will panic otherwise.
+//
+// Dlasq2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq2(n int, z []float64) (info int) {
+	if n < 0 {
+		panic(nLT0)
+	}
+
+	if n == 0 {
+		return info
+	}
+
+	if len(z) < 4*n {
+		panic(shortZ)
+	}
+
+	if n == 1 {
+		if z[0] < 0 {
+			panic(negZ)
+		}
+		return info
+	}
+
+	const cbias = 1.5
+
+	eps := dlamchP
+	safmin := dlamchS
+	tol := eps * 100
+	tol2 := tol * tol
+	if n == 2 {
+		if z[1] < 0 || z[2] < 0 {
+			panic(negZ)
+		} else if z[2] > z[0] {
+			z[0], z[2] = z[2], z[0]
+		}
+		z[4] = z[0] + z[1] + z[2]
+		if z[1] > z[2]*tol2 {
+			t := 0.5 * (z[0] - z[2] + z[1])
+			s := z[2] * (z[1] / t)
+			if s <= t {
+				s = z[2] * (z[1] / (t * (1 + math.Sqrt(1+s/t))))
+			} else {
+				s = z[2] * (z[1] / (t + math.Sqrt(t)*math.Sqrt(t+s)))
+			}
+			t = z[0] + s + z[1]
+			z[2] *= z[0] / t
+			z[0] = t
+		}
+		z[1] = z[2]
+		z[5] = z[1] + z[0]
+		return info
+	}
+	// Check for negative data and compute sums of q's and e's.
+	z[2*n-1] = 0
+	emin := z[1]
+	var d, e, qmax float64
+	var i1, n1 int
+	for k := 0; k < 2*(n-1); k += 2 {
+		if z[k] < 0 || z[k+1] < 0 {
+			panic(negZ)
+		}
+		d += z[k]
+		e += z[k+1]
+		qmax = math.Max(qmax, z[k])
+		emin = math.Min(emin, z[k+1])
+	}
+	if z[2*(n-1)] < 0 {
+		panic(negZ)
+	}
+	d += z[2*(n-1)]
+	// Check for diagonality.
+	if e == 0 {
+		for k := 1; k < n; k++ {
+			z[k] = z[2*k]
+		}
+		impl.Dlasrt(lapack.SortDecreasing, n, z)
+		z[2*(n-1)] = d
+		return info
+	}
+	trace := d + e
+	// Check for zero data.
+	if trace == 0 {
+		z[2*(n-1)] = 0
+		return info
+	}
+	// Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
+	for k := 2 * n; k >= 2; k -= 2 {
+		z[2*k-1] = 0
+		z[2*k-2] = z[k-1]
+		z[2*k-3] = 0
+		z[2*k-4] = z[k-2]
+	}
+	i0 := 0
+	n0 := n - 1
+
+	// Reverse the qd-array, if warranted.
+	// z[4*i0-3] --> z[4*(i0+1)-3-1] --> z[4*i0]
+	if cbias*z[4*i0] < z[4*n0] {
+		ipn4Out := 4 * (i0 + n0 + 2)
+		for i4loop := 4 * (i0 + 1); i4loop <= 2*(i0+n0+1); i4loop += 4 {
+			i4 := i4loop - 1
+			ipn4 := ipn4Out - 1
+			z[i4-3], z[ipn4-i4-4] = z[ipn4-i4-4], z[i4-3]
+			z[i4-1], z[ipn4-i4-6] = z[ipn4-i4-6], z[i4-1]
+		}
+	}
+
+	// Initial split checking via dqd and Li's test.
+	pp := 0
+	for k := 0; k < 2; k++ {
+		d = z[4*n0+pp]
+		for i4loop := 4*n0 + pp; i4loop >= 4*(i0+1)+pp; i4loop -= 4 {
+			i4 := i4loop - 1
+			if z[i4-1] <= tol2*d {
+				z[i4-1] = math.Copysign(0, -1)
+				d = z[i4-3]
+			} else {
+				d = z[i4-3] * (d / (d + z[i4-1]))
+			}
+		}
+		// dqd maps Z to ZZ plus Li's test.
+		emin = z[4*(i0+1)+pp]
+		d = z[4*i0+pp]
+		for i4loop := 4*(i0+1) + pp; i4loop <= 4*n0+pp; i4loop += 4 {
+			i4 := i4loop - 1
+			z[i4-2*pp-2] = d + z[i4-1]
+			if z[i4-1] <= tol2*d {
+				z[i4-1] = math.Copysign(0, -1)
+				z[i4-2*pp-2] = d
+				z[i4-2*pp] = 0
+				d = z[i4+1]
+			} else if safmin*z[i4+1] < z[i4-2*pp-2] && safmin*z[i4-2*pp-2] < z[i4+1] {
+				tmp := z[i4+1] / z[i4-2*pp-2]
+				z[i4-2*pp] = z[i4-1] * tmp
+				d *= tmp
+			} else {
+				z[i4-2*pp] = z[i4+1] * (z[i4-1] / z[i4-2*pp-2])
+				d = z[i4+1] * (d / z[i4-2*pp-2])
+			}
+			emin = math.Min(emin, z[i4-2*pp])
+		}
+		z[4*(n0+1)-pp-3] = d
+
+		// Now find qmax.
+		qmax = z[4*(i0+1)-pp-3]
+		for i4loop := 4*(i0+1) - pp + 2; i4loop <= 4*(n0+1)+pp-2; i4loop += 4 {
+			i4 := i4loop - 1
+			qmax = math.Max(qmax, z[i4])
+		}
+		// Prepare for the next iteration on K.
+		pp = 1 - pp
+	}
+
+	// Initialise variables to pass to DLASQ3.
+	var ttype int
+	var dmin1, dmin2, dn, dn1, dn2, g, tau float64
+	var tempq float64
+	iter := 2
+	var nFail int
+	nDiv := 2 * (n0 - i0)
+	var i4 int
+outer:
+	for iwhila := 1; iwhila <= n+1; iwhila++ {
+		// Test for completion.
+		if n0 < 0 {
+			// Move q's to the front.
+			for k := 1; k < n; k++ {
+				z[k] = z[4*k]
+			}
+			// Sort and compute sum of eigenvalues.
+			impl.Dlasrt(lapack.SortDecreasing, n, z)
+			e = 0
+			for k := n - 1; k >= 0; k-- {
+				e += z[k]
+			}
+			// Store trace, sum(eigenvalues) and information on performance.
+			z[2*n] = trace
+			z[2*n+1] = e
+			z[2*n+2] = float64(iter)
+			z[2*n+3] = float64(nDiv) / float64(n*n)
+			z[2*n+4] = 100 * float64(nFail) / float64(iter)
+			return info
+		}
+
+		// While array unfinished do
+		// e[n0] holds the value of sigma when submatrix in i0:n0
+		// splits from the rest of the array, but is negated.
+		var desig float64
+		var sigma float64
+		if n0 != n-1 {
+			sigma = -z[4*(n0+1)-2]
+		}
+		if sigma < 0 {
+			info = 1
+			return info
+		}
+		// Find last unreduced submatrix's top index i0, find qmax and
+		// emin. Find Gershgorin-type bound if Q's much greater than E's.
+		var emax float64
+		if n0 > i0 {
+			emin = math.Abs(z[4*(n0+1)-6])
+		} else {
+			emin = 0
+		}
+		qmin := z[4*(n0+1)-4]
+		qmax = qmin
+		zSmall := false
+		for i4loop := 4 * (n0 + 1); i4loop >= 8; i4loop -= 4 {
+			i4 = i4loop - 1
+			if z[i4-5] <= 0 {
+				zSmall = true
+				break
+			}
+			if qmin >= 4*emax {
+				qmin = math.Min(qmin, z[i4-3])
+				emax = math.Max(emax, z[i4-5])
+			}
+			qmax = math.Max(qmax, z[i4-7]+z[i4-5])
+			emin = math.Min(emin, z[i4-5])
+		}
+		if !zSmall {
+			i4 = 3
+		}
+		i0 = (i4+1)/4 - 1
+		pp = 0
+		if n0-i0 > 1 {
+			dee := z[4*i0]
+			deemin := dee
+			kmin := i0
+			for i4loop := 4*(i0+1) + 1; i4loop <= 4*(n0+1)-3; i4loop += 4 {
+				i4 := i4loop - 1
+				dee = z[i4] * (dee / (dee + z[i4-2]))
+				if dee <= deemin {
+					deemin = dee
+					kmin = (i4+4)/4 - 1
+				}
+			}
+			if (kmin-i0)*2 < n0-kmin && deemin <= 0.5*z[4*n0] {
+				ipn4Out := 4 * (i0 + n0 + 2)
+				pp = 2
+				for i4loop := 4 * (i0 + 1); i4loop <= 2*(i0+n0+1); i4loop += 4 {
+					i4 := i4loop - 1
+					ipn4 := ipn4Out - 1
+					z[i4-3], z[ipn4-i4-4] = z[ipn4-i4-4], z[i4-3]
+					z[i4-2], z[ipn4-i4-3] = z[ipn4-i4-3], z[i4-2]
+					z[i4-1], z[ipn4-i4-6] = z[ipn4-i4-6], z[i4-1]
+					z[i4], z[ipn4-i4-5] = z[ipn4-i4-5], z[i4]
+				}
+			}
+		}
+		// Put -(initial shift) into DMIN.
+		dmin := -math.Max(0, qmin-2*math.Sqrt(qmin)*math.Sqrt(emax))
+
+		// Now i0:n0 is unreduced.
+		// PP = 0 for ping, PP = 1 for pong.
+		// PP = 2 indicates that flipping was applied to the Z array and
+		// 		and that the tests for deflation upon entry in Dlasq3
+		// 		should not be performed.
+		nbig := 100 * (n0 - i0 + 1)
+		for iwhilb := 0; iwhilb < nbig; iwhilb++ {
+			if i0 > n0 {
+				continue outer
+			}
+
+			// While submatrix unfinished take a good dqds step.
+			i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau =
+				impl.Dlasq3(i0, n0, z, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau)
+
+			pp = 1 - pp
+			// When emin is very small check for splits.
+			if pp == 0 && n0-i0 >= 3 {
+				if z[4*(n0+1)-1] <= tol2*qmax || z[4*(n0+1)-2] <= tol2*sigma {
+					splt := i0 - 1
+					qmax = z[4*i0]
+					emin = z[4*(i0+1)-2]
+					oldemn := z[4*(i0+1)-1]
+					for i4loop := 4 * (i0 + 1); i4loop <= 4*(n0-2); i4loop += 4 {
+						i4 := i4loop - 1
+						if z[i4] <= tol2*z[i4-3] || z[i4-1] <= tol2*sigma {
+							z[i4-1] = -sigma
+							splt = i4 / 4
+							qmax = 0
+							emin = z[i4+3]
+							oldemn = z[i4+4]
+						} else {
+							qmax = math.Max(qmax, z[i4+1])
+							emin = math.Min(emin, z[i4-1])
+							oldemn = math.Min(oldemn, z[i4])
+						}
+					}
+					z[4*(n0+1)-2] = emin
+					z[4*(n0+1)-1] = oldemn
+					i0 = splt + 1
+				}
+			}
+		}
+		// Maximum number of iterations exceeded, restore the shift
+		// sigma and place the new d's and e's in a qd array.
+		// This might need to be done for several blocks.
+		info = 2
+		i1 = i0
+		for {
+			tempq = z[4*i0]
+			z[4*i0] += sigma
+			for k := i0 + 1; k <= n0; k++ {
+				tempe := z[4*(k+1)-6]
+				z[4*(k+1)-6] *= tempq / z[4*(k+1)-8]
+				tempq = z[4*k]
+				z[4*k] += sigma + tempe - z[4*(k+1)-6]
+			}
+			// Prepare to do this on the previous block if there is one.
+			if i1 <= 0 {
+				break
+			}
+			n1 = i1 - 1
+			for i1 >= 1 && z[4*(i1+1)-6] >= 0 {
+				i1 -= 1
+			}
+			sigma = -z[4*(n1+1)-2]
+		}
+		for k := 0; k < n; k++ {
+			z[2*k] = z[4*k]
+			// Only the block 1..N0 is unfinished.  The rest of the e's
+			// must be essentially zero, although sometimes other data
+			// has been stored in them.
+			if k < n0 {
+				z[2*(k+1)-1] = z[4*(k+1)-1]
+			} else {
+				z[2*(k+1)] = 0
+			}
+		}
+		return info
+	}
+	info = 3
+	return info
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq3.go
@ -0,0 +1,172 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlasq3 checks for deflation, computes a shift (tau) and calls dqds.
+// In case of failure it changes shifts, and tries again until output
+// is positive.
+//
+// Dlasq3 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq3(i0, n0 int, z []float64, pp int, dmin, sigma, desig, qmax float64, nFail, iter, nDiv int, ttype int, dmin1, dmin2, dn, dn1, dn2, g, tau float64) (
+	i0Out, n0Out, ppOut int, dminOut, sigmaOut, desigOut, qmaxOut float64, nFailOut, iterOut, nDivOut, ttypeOut int, dmin1Out, dmin2Out, dnOut, dn1Out, dn2Out, gOut, tauOut float64) {
+	switch {
+	case i0 < 0:
+		panic(i0LT0)
+	case n0 < 0:
+		panic(n0LT0)
+	case len(z) < 4*n0:
+		panic(shortZ)
+	case pp != 0 && pp != 1 && pp != 2:
+		panic(badPp)
+	}
+
+	const cbias = 1.5
+
+	n0in := n0
+	eps := dlamchP
+	tol := eps * 100
+	tol2 := tol * tol
+	var nn int
+	var t float64
+	for {
+		if n0 < i0 {
+			return i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau
+		}
+		if n0 == i0 {
+			z[4*(n0+1)-4] = z[4*(n0+1)+pp-4] + sigma
+			n0--
+			continue
+		}
+		nn = 4*(n0+1) + pp - 1
+		if n0 != i0+1 {
+			// Check whether e[n0-1] is negligible, 1 eigenvalue.
+			if z[nn-5] > tol2*(sigma+z[nn-3]) && z[nn-2*pp-4] > tol2*z[nn-7] {
+				// Check whether e[n0-2] is negligible, 2 eigenvalues.
+				if z[nn-9] > tol2*sigma && z[nn-2*pp-8] > tol2*z[nn-11] {
+					break
+				}
+			} else {
+				z[4*(n0+1)-4] = z[4*(n0+1)+pp-4] + sigma
+				n0--
+				continue
+			}
+		}
+		if z[nn-3] > z[nn-7] {
+			z[nn-3], z[nn-7] = z[nn-7], z[nn-3]
+		}
+		t = 0.5 * (z[nn-7] - z[nn-3] + z[nn-5])
+		if z[nn-5] > z[nn-3]*tol2 && t != 0 {
+			s := z[nn-3] * (z[nn-5] / t)
+			if s <= t {
+				s = z[nn-3] * (z[nn-5] / (t * (1 + math.Sqrt(1+s/t))))
+			} else {
+				s = z[nn-3] * (z[nn-5] / (t + math.Sqrt(t)*math.Sqrt(t+s)))
+			}
+			t = z[nn-7] + (s + z[nn-5])
+			z[nn-3] *= z[nn-7] / t
+			z[nn-7] = t
+		}
+		z[4*(n0+1)-8] = z[nn-7] + sigma
+		z[4*(n0+1)-4] = z[nn-3] + sigma
+		n0 -= 2
+	}
+	if pp == 2 {
+		pp = 0
+	}
+
+	// Reverse the qd-array, if warranted.
+	if dmin <= 0 || n0 < n0in {
+		if cbias*z[4*(i0+1)+pp-4] < z[4*(n0+1)+pp-4] {
+			ipn4Out := 4 * (i0 + n0 + 2)
+			for j4loop := 4 * (i0 + 1); j4loop <= 2*((i0+1)+(n0+1)-1); j4loop += 4 {
+				ipn4 := ipn4Out - 1
+				j4 := j4loop - 1
+
+				z[j4-3], z[ipn4-j4-4] = z[ipn4-j4-4], z[j4-3]
+				z[j4-2], z[ipn4-j4-3] = z[ipn4-j4-3], z[j4-2]
+				z[j4-1], z[ipn4-j4-6] = z[ipn4-j4-6], z[j4-1]
+				z[j4], z[ipn4-j4-5] = z[ipn4-j4-5], z[j4]
+			}
+			if n0-i0 <= 4 {
+				z[4*(n0+1)+pp-2] = z[4*(i0+1)+pp-2]
+				z[4*(n0+1)-pp-1] = z[4*(i0+1)-pp-1]
+			}
+			dmin2 = math.Min(dmin2, z[4*(i0+1)-pp-2])
+			z[4*(n0+1)+pp-2] = math.Min(math.Min(z[4*(n0+1)+pp-2], z[4*(i0+1)+pp-2]), z[4*(i0+1)+pp+2])
+			z[4*(n0+1)-pp-1] = math.Min(math.Min(z[4*(n0+1)-pp-1], z[4*(i0+1)-pp-1]), z[4*(i0+1)-pp+3])
+			qmax = math.Max(math.Max(qmax, z[4*(i0+1)+pp-4]), z[4*(i0+1)+pp])
+			dmin = math.Copysign(0, -1) // Fortran code has -zero, but -0 in go is 0
+		}
+	}
+
+	// Choose a shift.
+	tau, ttype, g = impl.Dlasq4(i0, n0, z, pp, n0in, dmin, dmin1, dmin2, dn, dn1, dn2, tau, ttype, g)
+
+	// Call dqds until dmin > 0.
+loop:
+	for {
+		i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dn1, dn2 = impl.Dlasq5(i0, n0, z, pp, tau, sigma)
+
+		nDiv += n0 - i0 + 2
+		iter++
+		switch {
+		case dmin >= 0 && dmin1 >= 0:
+			// Success.
+			goto done
+
+		case dmin < 0 && dmin1 > 0 && z[4*n0-pp-1] < tol*(sigma+dn1) && math.Abs(dn) < tol*sigma:
+			// Convergence hidden by negative dn.
+			z[4*n0-pp+1] = 0
+			dmin = 0
+			goto done
+
+		case dmin < 0:
+			// Tau too big. Select new Tau and try again.
+			nFail++
+			if ttype < -22 {
+				// Failed twice. Play it safe.
+				tau = 0
+			} else if dmin1 > 0 {
+				// Late failure. Gives excellent shift.
+				tau = (tau + dmin) * (1 - 2*eps)
+				ttype -= 11
+			} else {
+				// Early failure. Divide by 4.
+				tau = tau / 4
+				ttype -= 12
+			}
+
+		case math.IsNaN(dmin):
+			if tau == 0 {
+				break loop
+			}
+			tau = 0
+
+		default:
+			// Possible underflow. Play it safe.
+			break loop
+		}
+	}
+
+	// Risk of underflow.
+	dmin, dmin1, dmin2, dn, dn1, dn2 = impl.Dlasq6(i0, n0, z, pp)
+	nDiv += n0 - i0 + 2
+	iter++
+	tau = 0
+
+done:
+	if tau < sigma {
+		desig += tau
+		t = sigma + desig
+		desig -= t - sigma
+	} else {
+		t = sigma + tau
+		desig += sigma - (t - tau)
+	}
+	sigma = t
+	return i0, n0, pp, dmin, sigma, desig, qmax, nFail, iter, nDiv, ttype, dmin1, dmin2, dn, dn1, dn2, g, tau
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq4.go
@ -0,0 +1,249 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlasq4 computes an approximation to the smallest eigenvalue using values of d
+// from the previous transform.
+// i0, n0, and n0in are zero-indexed.
+//
+// Dlasq4 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq4(i0, n0 int, z []float64, pp int, n0in int, dmin, dmin1, dmin2, dn, dn1, dn2, tau float64, ttype int, g float64) (tauOut float64, ttypeOut int, gOut float64) {
+	switch {
+	case i0 < 0:
+		panic(i0LT0)
+	case n0 < 0:
+		panic(n0LT0)
+	case len(z) < 4*n0:
+		panic(shortZ)
+	case pp != 0 && pp != 1:
+		panic(badPp)
+	}
+
+	const (
+		cnst1 = 0.563
+		cnst2 = 1.01
+		cnst3 = 1.05
+
+		cnstthird = 0.333 // TODO(btracey): Fix?
+	)
+	// A negative dmin forces the shift to take that absolute value
+	// ttype records the type of shift.
+	if dmin <= 0 {
+		tau = -dmin
+		ttype = -1
+		return tau, ttype, g
+	}
+	nn := 4*(n0+1) + pp - 1 // -1 for zero indexing
+	s := math.NaN()         // Poison s so that failure to take a path below is obvious
+	if n0in == n0 {
+		// No eigenvalues deflated.
+		if dmin == dn || dmin == dn1 {
+			b1 := math.Sqrt(z[nn-3]) * math.Sqrt(z[nn-5])
+			b2 := math.Sqrt(z[nn-7]) * math.Sqrt(z[nn-9])
+			a2 := z[nn-7] + z[nn-5]
+			if dmin == dn && dmin1 == dn1 {
+				gap2 := dmin2 - a2 - dmin2/4
+				var gap1 float64
+				if gap2 > 0 && gap2 > b2 {
+					gap1 = a2 - dn - (b2/gap2)*b2
+				} else {
+					gap1 = a2 - dn - (b1 + b2)
+				}
+				if gap1 > 0 && gap1 > b1 {
+					s = math.Max(dn-(b1/gap1)*b1, 0.5*dmin)
+					ttype = -2
+				} else {
+					s = 0
+					if dn > b1 {
+						s = dn - b1
+					}
+					if a2 > b1+b2 {
+						s = math.Min(s, a2-(b1+b2))
+					}
+					s = math.Max(s, cnstthird*dmin)
+					ttype = -3
+				}
+			} else {
+				ttype = -4
+				s = dmin / 4
+				var gam float64
+				var np int
+				if dmin == dn {
+					gam = dn
+					a2 = 0
+					if z[nn-5] > z[nn-7] {
+						return tau, ttype, g
+					}
+					b2 = z[nn-5] / z[nn-7]
+					np = nn - 9
+				} else {
+					np = nn - 2*pp
+					gam = dn1
+					if z[np-4] > z[np-2] {
+						return tau, ttype, g
+					}
+					a2 = z[np-4] / z[np-2]
+					if z[nn-9] > z[nn-11] {
+						return tau, ttype, g
+					}
+					b2 = z[nn-9] / z[nn-11]
+					np = nn - 13
+				}
+				// Approximate contribution to norm squared from i < nn-1.
+				a2 += b2
+				for i4loop := np + 1; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 {
+					i4 := i4loop - 1
+					if b2 == 0 {
+						break
+					}
+					b1 = b2
+					if z[i4] > z[i4-2] {
+						return tau, ttype, g
+					}
+					b2 *= z[i4] / z[i4-2]
+					a2 += b2
+					if 100*math.Max(b2, b1) < a2 || cnst1 < a2 {
+						break
+					}
+				}
+				a2 *= cnst3
+				// Rayleigh quotient residual bound.
+				if a2 < cnst1 {
+					s = gam * (1 - math.Sqrt(a2)) / (1 + a2)
+				}
+			}
+		} else if dmin == dn2 {
+			ttype = -5
+			s = dmin / 4
+			// Compute contribution to norm squared from i > nn-2.
+			np := nn - 2*pp
+			b1 := z[np-2]
+			b2 := z[np-6]
+			gam := dn2
+			if z[np-8] > b2 || z[np-4] > b1 {
+				return tau, ttype, g
+			}
+			a2 := (z[np-8] / b2) * (1 + z[np-4]/b1)
+			// Approximate contribution to norm squared from i < nn-2.
+			if n0-i0 > 2 {
+				b2 = z[nn-13] / z[nn-15]
+				a2 += b2
+				for i4loop := (nn + 1) - 17; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 {
+					i4 := i4loop - 1
+					if b2 == 0 {
+						break
+					}
+					b1 = b2
+					if z[i4] > z[i4-2] {
+						return tau, ttype, g
+					}
+					b2 *= z[i4] / z[i4-2]
+					a2 += b2
+					if 100*math.Max(b2, b1) < a2 || cnst1 < a2 {
+						break
+					}
+				}
+				a2 *= cnst3
+			}
+			if a2 < cnst1 {
+				s = gam * (1 - math.Sqrt(a2)) / (1 + a2)
+			}
+		} else {
+			// Case 6, no information to guide us.
+			if ttype == -6 {
+				g += cnstthird * (1 - g)
+			} else if ttype == -18 {
+				g = cnstthird / 4
+			} else {
+				g = 1.0 / 4
+			}
+			s = g * dmin
+			ttype = -6
+		}
+	} else if n0in == (n0 + 1) {
+		// One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN.
+		if dmin1 == dn1 && dmin2 == dn2 {
+			ttype = -7
+			s = cnstthird * dmin1
+			if z[nn-5] > z[nn-7] {
+				return tau, ttype, g
+			}
+			b1 := z[nn-5] / z[nn-7]
+			b2 := b1
+			if b2 != 0 {
+				for i4loop := 4*(n0+1) - 9 + pp; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 {
+					i4 := i4loop - 1
+					a2 := b1
+					if z[i4] > z[i4-2] {
+						return tau, ttype, g
+					}
+					b1 *= z[i4] / z[i4-2]
+					b2 += b1
+					if 100*math.Max(b1, a2) < b2 {
+						break
+					}
+				}
+			}
+			b2 = math.Sqrt(cnst3 * b2)
+			a2 := dmin1 / (1 + b2*b2)
+			gap2 := 0.5*dmin2 - a2
+			if gap2 > 0 && gap2 > b2*a2 {
+				s = math.Max(s, a2*(1-cnst2*a2*(b2/gap2)*b2))
+			} else {
+				s = math.Max(s, a2*(1-cnst2*b2))
+				ttype = -8
+			}
+		} else {
+			s = dmin1 / 4
+			if dmin1 == dn1 {
+				s = 0.5 * dmin1
+			}
+			ttype = -9
+		}
+	} else if n0in == (n0 + 2) {
+		// Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+		if dmin2 == dn2 && 2*z[nn-5] < z[nn-7] {
+			ttype = -10
+			s = cnstthird * dmin2
+			if z[nn-5] > z[nn-7] {
+				return tau, ttype, g
+			}
+			b1 := z[nn-5] / z[nn-7]
+			b2 := b1
+			if b2 != 0 {
+				for i4loop := 4*(n0+1) - 9 + pp; i4loop >= 4*(i0+1)-1+pp; i4loop -= 4 {
+					i4 := i4loop - 1
+					if z[i4] > z[i4-2] {
+						return tau, ttype, g
+					}
+					b1 *= z[i4] / z[i4-2]
+					b2 += b1
+					if 100*b1 < b2 {
+						break
+					}
+				}
+			}
+			b2 = math.Sqrt(cnst3 * b2)
+			a2 := dmin2 / (1 + b2*b2)
+			gap2 := z[nn-7] + z[nn-9] - math.Sqrt(z[nn-11])*math.Sqrt(z[nn-9]) - a2
+			if gap2 > 0 && gap2 > b2*a2 {
+				s = math.Max(s, a2*(1-cnst2*a2*(b2/gap2)*b2))
+			} else {
+				s = math.Max(s, a2*(1-cnst2*b2))
+			}
+		} else {
+			s = dmin2 / 4
+			ttype = -11
+		}
+	} else if n0in > n0+2 {
+		// Case 12, more than two eigenvalues deflated. No information.
+		s = 0
+		ttype = -12
+	}
+	tau = s
+	return tau, ttype, g
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq5.go
@ -0,0 +1,140 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlasq5 computes one dqds transform in ping-pong form.
+// i0 and n0 are zero-indexed.
+//
+// Dlasq5 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq5(i0, n0 int, z []float64, pp int, tau, sigma float64) (i0Out, n0Out, ppOut int, tauOut, sigmaOut, dmin, dmin1, dmin2, dn, dnm1, dnm2 float64) {
+	// The lapack function has inputs for ieee and eps, but Go requires ieee so
+	// these are unnecessary.
+
+	switch {
+	case i0 < 0:
+		panic(i0LT0)
+	case n0 < 0:
+		panic(n0LT0)
+	case len(z) < 4*n0:
+		panic(shortZ)
+	case pp != 0 && pp != 1:
+		panic(badPp)
+	}
+
+	if n0-i0-1 <= 0 {
+		return i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dnm1, dnm2
+	}
+
+	eps := dlamchP
+	dthresh := eps * (sigma + tau)
+	if tau < dthresh*0.5 {
+		tau = 0
+	}
+	var j4 int
+	var emin float64
+	if tau != 0 {
+		j4 = 4*i0 + pp
+		emin = z[j4+4]
+		d := z[j4] - tau
+		dmin = d
+		// In the reference there are code paths that actually return this value.
+		// dmin1 = -z[j4]
+		if pp == 0 {
+			for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+				j4 := j4loop - 1
+				z[j4-2] = d + z[j4-1]
+				tmp := z[j4+1] / z[j4-2]
+				d = d*tmp - tau
+				dmin = math.Min(dmin, d)
+				z[j4] = z[j4-1] * tmp
+				emin = math.Min(z[j4], emin)
+			}
+		} else {
+			for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+				j4 := j4loop - 1
+				z[j4-3] = d + z[j4]
+				tmp := z[j4+2] / z[j4-3]
+				d = d*tmp - tau
+				dmin = math.Min(dmin, d)
+				z[j4-1] = z[j4] * tmp
+				emin = math.Min(z[j4-1], emin)
+			}
+		}
+		// Unroll the last two steps.
+		dnm2 = d
+		dmin2 = dmin
+		j4 = 4*((n0+1)-2) - pp - 1
+		j4p2 := j4 + 2*pp - 1
+		z[j4-2] = dnm2 + z[j4p2]
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dnm1 = z[j4p2+2]*(dnm2/z[j4-2]) - tau
+		dmin = math.Min(dmin, dnm1)
+
+		dmin1 = dmin
+		j4 += 4
+		j4p2 = j4 + 2*pp - 1
+		z[j4-2] = dnm1 + z[j4p2]
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dn = z[j4p2+2]*(dnm1/z[j4-2]) - tau
+		dmin = math.Min(dmin, dn)
+	} else {
+		// This is the version that sets d's to zero if they are small enough.
+		j4 = 4*(i0+1) + pp - 4
+		emin = z[j4+4]
+		d := z[j4] - tau
+		dmin = d
+		// In the reference there are code paths that actually return this value.
+		// dmin1 = -z[j4]
+		if pp == 0 {
+			for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+				j4 := j4loop - 1
+				z[j4-2] = d + z[j4-1]
+				tmp := z[j4+1] / z[j4-2]
+				d = d*tmp - tau
+				if d < dthresh {
+					d = 0
+				}
+				dmin = math.Min(dmin, d)
+				z[j4] = z[j4-1] * tmp
+				emin = math.Min(z[j4], emin)
+			}
+		} else {
+			for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+				j4 := j4loop - 1
+				z[j4-3] = d + z[j4]
+				tmp := z[j4+2] / z[j4-3]
+				d = d*tmp - tau
+				if d < dthresh {
+					d = 0
+				}
+				dmin = math.Min(dmin, d)
+				z[j4-1] = z[j4] * tmp
+				emin = math.Min(z[j4-1], emin)
+			}
+		}
+		// Unroll the last two steps.
+		dnm2 = d
+		dmin2 = dmin
+		j4 = 4*((n0+1)-2) - pp - 1
+		j4p2 := j4 + 2*pp - 1
+		z[j4-2] = dnm2 + z[j4p2]
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dnm1 = z[j4p2+2]*(dnm2/z[j4-2]) - tau
+		dmin = math.Min(dmin, dnm1)
+
+		dmin1 = dmin
+		j4 += 4
+		j4p2 = j4 + 2*pp - 1
+		z[j4-2] = dnm1 + z[j4p2]
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dn = z[j4p2+2]*(dnm1/z[j4-2]) - tau
+		dmin = math.Min(dmin, dn)
+	}
+	z[j4+2] = dn
+	z[4*(n0+1)-pp-1] = emin
+	return i0, n0, pp, tau, sigma, dmin, dmin1, dmin2, dn, dnm1, dnm2
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasq6.go
@ -0,0 +1,118 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlasq6 computes one dqd transform in ping-pong form with protection against
+// overflow and underflow. z has length at least 4*(n0+1) and holds the qd array.
+// i0 is the zero-based first index.
+// n0 is the zero-based last index.
+//
+// Dlasq6 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasq6(i0, n0 int, z []float64, pp int) (dmin, dmin1, dmin2, dn, dnm1, dnm2 float64) {
+	switch {
+	case i0 < 0:
+		panic(i0LT0)
+	case n0 < 0:
+		panic(n0LT0)
+	case len(z) < 4*n0:
+		panic(shortZ)
+	case pp != 0 && pp != 1:
+		panic(badPp)
+	}
+
+	if n0-i0-1 <= 0 {
+		return dmin, dmin1, dmin2, dn, dnm1, dnm2
+	}
+
+	safmin := dlamchS
+	j4 := 4*(i0+1) + pp - 4 // -4 rather than -3 for zero indexing
+	emin := z[j4+4]
+	d := z[j4]
+	dmin = d
+	if pp == 0 {
+		for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+			j4 := j4loop - 1 // Translate back to zero-indexed.
+			z[j4-2] = d + z[j4-1]
+			if z[j4-2] == 0 {
+				z[j4] = 0
+				d = z[j4+1]
+				dmin = d
+				emin = 0
+			} else if safmin*z[j4+1] < z[j4-2] && safmin*z[j4-2] < z[j4+1] {
+				tmp := z[j4+1] / z[j4-2]
+				z[j4] = z[j4-1] * tmp
+				d *= tmp
+			} else {
+				z[j4] = z[j4+1] * (z[j4-1] / z[j4-2])
+				d = z[j4+1] * (d / z[j4-2])
+			}
+			dmin = math.Min(dmin, d)
+			emin = math.Min(emin, z[j4])
+		}
+	} else {
+		for j4loop := 4 * (i0 + 1); j4loop <= 4*((n0+1)-3); j4loop += 4 {
+			j4 := j4loop - 1
+			z[j4-3] = d + z[j4]
+			if z[j4-3] == 0 {
+				z[j4-1] = 0
+				d = z[j4+2]
+				dmin = d
+				emin = 0
+			} else if safmin*z[j4+2] < z[j4-3] && safmin*z[j4-3] < z[j4+2] {
+				tmp := z[j4+2] / z[j4-3]
+				z[j4-1] = z[j4] * tmp
+				d *= tmp
+			} else {
+				z[j4-1] = z[j4+2] * (z[j4] / z[j4-3])
+				d = z[j4+2] * (d / z[j4-3])
+			}
+			dmin = math.Min(dmin, d)
+			emin = math.Min(emin, z[j4-1])
+		}
+	}
+	// Unroll last two steps.
+	dnm2 = d
+	dmin2 = dmin
+	j4 = 4*(n0-1) - pp - 1
+	j4p2 := j4 + 2*pp - 1
+	z[j4-2] = dnm2 + z[j4p2]
+	if z[j4-2] == 0 {
+		z[j4] = 0
+		dnm1 = z[j4p2+2]
+		dmin = dnm1
+		emin = 0
+	} else if safmin*z[j4p2+2] < z[j4-2] && safmin*z[j4-2] < z[j4p2+2] {
+		tmp := z[j4p2+2] / z[j4-2]
+		z[j4] = z[j4p2] * tmp
+		dnm1 = dnm2 * tmp
+	} else {
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dnm1 = z[j4p2+2] * (dnm2 / z[j4-2])
+	}
+	dmin = math.Min(dmin, dnm1)
+	dmin1 = dmin
+	j4 += 4
+	j4p2 = j4 + 2*pp - 1
+	z[j4-2] = dnm1 + z[j4p2]
+	if z[j4-2] == 0 {
+		z[j4] = 0
+		dn = z[j4p2+2]
+		dmin = dn
+		emin = 0
+	} else if safmin*z[j4p2+2] < z[j4-2] && safmin*z[j4-2] < z[j4p2+2] {
+		tmp := z[j4p2+2] / z[j4-2]
+		z[j4] = z[j4p2] * tmp
+		dn = dnm1 * tmp
+	} else {
+		z[j4] = z[j4p2+2] * (z[j4p2] / z[j4-2])
+		dn = z[j4p2+2] * (dnm1 / z[j4-2])
+	}
+	dmin = math.Min(dmin, dn)
+	z[j4+2] = dn
+	z[4*(n0+1)-pp-1] = emin
+	return dmin, dmin1, dmin2, dn, dnm1, dnm2
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasr.go
@ -0,0 +1,279 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlasr applies a sequence of plane rotations to the m×n matrix A. This series
+// of plane rotations is implicitly represented by a matrix P. P is multiplied
+// by a depending on the value of side -- A = P * A if side == lapack.Left,
+// A = A * P^T if side == lapack.Right.
+//
+// The exact value of P depends on the value of pivot, but in all cases P is
+// implicitly represented by a series of 2×2 rotation matrices. The entries of
+// rotation matrix k are defined by s[k] and c[k]
+//  R(k) = [ c[k] s[k]]
+//         [-s[k] s[k]]
+// If direct == lapack.Forward, the rotation matrices are applied as
+// P = P(z-1) * ... * P(2) * P(1), while if direct == lapack.Backward they are
+// applied as P = P(1) * P(2) * ... * P(n).
+//
+// pivot defines the mapping of the elements in R(k) to P(k).
+// If pivot == lapack.Variable, the rotation is performed for the (k, k+1) plane.
+//  P(k) = [1                    ]
+//         [    ...              ]
+//         [     1               ]
+//         [       c[k] s[k]     ]
+//         [      -s[k] c[k]     ]
+//         [                 1   ]
+//         [                ...  ]
+//         [                    1]
+// if pivot == lapack.Top, the rotation is performed for the (1, k+1) plane,
+//  P(k) = [c[k]        s[k]     ]
+//         [    1                ]
+//         [     ...             ]
+//         [         1           ]
+//         [-s[k]       c[k]     ]
+//         [                 1   ]
+//         [                ...  ]
+//         [                    1]
+// and if pivot == lapack.Bottom, the rotation is performed for the (k, z) plane.
+//  P(k) = [1                    ]
+//         [  ...                ]
+//         [      1              ]
+//         [        c[k]     s[k]]
+//         [           1         ]
+//         [            ...      ]
+//         [              1      ]
+//         [       -s[k]     c[k]]
+// s and c have length m - 1 if side == blas.Left, and n - 1 if side == blas.Right.
+//
+// Dlasr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasr(side blas.Side, pivot lapack.Pivot, direct lapack.Direct, m, n int, c, s, a []float64, lda int) {
+	switch {
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case pivot != lapack.Variable && pivot != lapack.Top && pivot != lapack.Bottom:
+		panic(badPivot)
+	case direct != lapack.Forward && direct != lapack.Backward:
+		panic(badDirect)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		return
+	}
+
+	if side == blas.Left {
+		if len(c) < m-1 {
+			panic(shortC)
+		}
+		if len(s) < m-1 {
+			panic(shortS)
+		}
+	} else {
+		if len(c) < n-1 {
+			panic(shortC)
+		}
+		if len(s) < n-1 {
+			panic(shortS)
+		}
+	}
+	if len(a) < (m-1)*lda+n {
+		panic(shortA)
+	}
+
+	if side == blas.Left {
+		if pivot == lapack.Variable {
+			if direct == lapack.Forward {
+				for j := 0; j < m-1; j++ {
+					ctmp := c[j]
+					stmp := s[j]
+					if ctmp != 1 || stmp != 0 {
+						for i := 0; i < n; i++ {
+							tmp2 := a[j*lda+i]
+							tmp := a[(j+1)*lda+i]
+							a[(j+1)*lda+i] = ctmp*tmp - stmp*tmp2
+							a[j*lda+i] = stmp*tmp + ctmp*tmp2
+						}
+					}
+				}
+				return
+			}
+			for j := m - 2; j >= 0; j-- {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						tmp2 := a[j*lda+i]
+						tmp := a[(j+1)*lda+i]
+						a[(j+1)*lda+i] = ctmp*tmp - stmp*tmp2
+						a[j*lda+i] = stmp*tmp + ctmp*tmp2
+					}
+				}
+			}
+			return
+		} else if pivot == lapack.Top {
+			if direct == lapack.Forward {
+				for j := 1; j < m; j++ {
+					ctmp := c[j-1]
+					stmp := s[j-1]
+					if ctmp != 1 || stmp != 0 {
+						for i := 0; i < n; i++ {
+							tmp := a[j*lda+i]
+							tmp2 := a[i]
+							a[j*lda+i] = ctmp*tmp - stmp*tmp2
+							a[i] = stmp*tmp + ctmp*tmp2
+						}
+					}
+				}
+				return
+			}
+			for j := m - 1; j >= 1; j-- {
+				ctmp := c[j-1]
+				stmp := s[j-1]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						ctmp := c[j-1]
+						stmp := s[j-1]
+						if ctmp != 1 || stmp != 0 {
+							for i := 0; i < n; i++ {
+								tmp := a[j*lda+i]
+								tmp2 := a[i]
+								a[j*lda+i] = ctmp*tmp - stmp*tmp2
+								a[i] = stmp*tmp + ctmp*tmp2
+							}
+						}
+					}
+				}
+			}
+			return
+		}
+		if direct == lapack.Forward {
+			for j := 0; j < m-1; j++ {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < n; i++ {
+						tmp := a[j*lda+i]
+						tmp2 := a[(m-1)*lda+i]
+						a[j*lda+i] = stmp*tmp2 + ctmp*tmp
+						a[(m-1)*lda+i] = ctmp*tmp2 - stmp*tmp
+					}
+				}
+			}
+			return
+		}
+		for j := m - 2; j >= 0; j-- {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < n; i++ {
+					tmp := a[j*lda+i]
+					tmp2 := a[(m-1)*lda+i]
+					a[j*lda+i] = stmp*tmp2 + ctmp*tmp
+					a[(m-1)*lda+i] = ctmp*tmp2 - stmp*tmp
+				}
+			}
+		}
+		return
+	}
+	if pivot == lapack.Variable {
+		if direct == lapack.Forward {
+			for j := 0; j < n-1; j++ {
+				ctmp := c[j]
+				stmp := s[j]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < m; i++ {
+						tmp := a[i*lda+j+1]
+						tmp2 := a[i*lda+j]
+						a[i*lda+j+1] = ctmp*tmp - stmp*tmp2
+						a[i*lda+j] = stmp*tmp + ctmp*tmp2
+					}
+				}
+			}
+			return
+		}
+		for j := n - 2; j >= 0; j-- {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i*lda+j+1]
+					tmp2 := a[i*lda+j]
+					a[i*lda+j+1] = ctmp*tmp - stmp*tmp2
+					a[i*lda+j] = stmp*tmp + ctmp*tmp2
+				}
+			}
+		}
+		return
+	} else if pivot == lapack.Top {
+		if direct == lapack.Forward {
+			for j := 1; j < n; j++ {
+				ctmp := c[j-1]
+				stmp := s[j-1]
+				if ctmp != 1 || stmp != 0 {
+					for i := 0; i < m; i++ {
+						tmp := a[i*lda+j]
+						tmp2 := a[i*lda]
+						a[i*lda+j] = ctmp*tmp - stmp*tmp2
+						a[i*lda] = stmp*tmp + ctmp*tmp2
+					}
+				}
+			}
+			return
+		}
+		for j := n - 1; j >= 1; j-- {
+			ctmp := c[j-1]
+			stmp := s[j-1]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i*lda+j]
+					tmp2 := a[i*lda]
+					a[i*lda+j] = ctmp*tmp - stmp*tmp2
+					a[i*lda] = stmp*tmp + ctmp*tmp2
+				}
+			}
+		}
+		return
+	}
+	if direct == lapack.Forward {
+		for j := 0; j < n-1; j++ {
+			ctmp := c[j]
+			stmp := s[j]
+			if ctmp != 1 || stmp != 0 {
+				for i := 0; i < m; i++ {
+					tmp := a[i*lda+j]
+					tmp2 := a[i*lda+n-1]
+					a[i*lda+j] = stmp*tmp2 + ctmp*tmp
+					a[i*lda+n-1] = ctmp*tmp2 - stmp*tmp
+				}
+
+			}
+		}
+		return
+	}
+	for j := n - 2; j >= 0; j-- {
+		ctmp := c[j]
+		stmp := s[j]
+		if ctmp != 1 || stmp != 0 {
+			for i := 0; i < m; i++ {
+				tmp := a[i*lda+j]
+				tmp2 := a[i*lda+n-1]
+				a[i*lda+j] = stmp*tmp2 + ctmp*tmp
+				a[i*lda+n-1] = ctmp*tmp2 - stmp*tmp
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasrt.go
@ -0,0 +1,36 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"sort"
+
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dlasrt sorts the numbers in the input slice d. If s == lapack.SortIncreasing,
+// the elements are sorted in increasing order. If s == lapack.SortDecreasing,
+// the elements are sorted in decreasing order. For other values of s Dlasrt
+// will panic.
+//
+// Dlasrt is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasrt(s lapack.Sort, n int, d []float64) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case len(d) < n:
+		panic(shortD)
+	}
+
+	d = d[:n]
+	switch s {
+	default:
+		panic(badSort)
+	case lapack.SortIncreasing:
+		sort.Float64s(d)
+	case lapack.SortDecreasing:
+		sort.Sort(sort.Reverse(sort.Float64Slice(d)))
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlassq.go
@ -0,0 +1,41 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlassq updates a sum of squares in scaled form. The input parameters scale and
+// sumsq represent the current scale and total sum of squares. These values are
+// updated with the information in the first n elements of the vector specified
+// by x and incX.
+//
+// Dlassq is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlassq(n int, x []float64, incx int, scale float64, sumsq float64) (scl, smsq float64) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case incx <= 0:
+		panic(badIncX)
+	case len(x) < 1+(n-1)*incx:
+		panic(shortX)
+	}
+
+	if n == 0 {
+		return scale, sumsq
+	}
+
+	for ix := 0; ix <= (n-1)*incx; ix += incx {
+		absxi := math.Abs(x[ix])
+		if absxi > 0 || math.IsNaN(absxi) {
+			if scale < absxi {
+				sumsq = 1 + sumsq*(scale/absxi)*(scale/absxi)
+				scale = absxi
+			} else {
+				sumsq += (absxi / scale) * (absxi / scale)
+			}
+		}
+	}
+	return scale, sumsq
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasv2.go
@ -0,0 +1,115 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "math"
+
+// Dlasv2 computes the singular value decomposition of a 2×2 matrix.
+//  [ csl snl] [f g] [csr -snr] = [ssmax     0]
+//  [-snl csl] [0 h] [snr  csr] = [    0 ssmin]
+// ssmax is the larger absolute singular value, and ssmin is the smaller absolute
+// singular value. [cls, snl] and [csr, snr] are the left and right singular vectors.
+//
+// Dlasv2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasv2(f, g, h float64) (ssmin, ssmax, snr, csr, snl, csl float64) {
+	ft := f
+	fa := math.Abs(ft)
+	ht := h
+	ha := math.Abs(h)
+	// pmax points to the largest element of the matrix in terms of absolute value.
+	// 1 if F, 2 if G, 3 if H.
+	pmax := 1
+	swap := ha > fa
+	if swap {
+		pmax = 3
+		ft, ht = ht, ft
+		fa, ha = ha, fa
+	}
+	gt := g
+	ga := math.Abs(gt)
+	var clt, crt, slt, srt float64
+	if ga == 0 {
+		ssmin = ha
+		ssmax = fa
+		clt = 1
+		crt = 1
+		slt = 0
+		srt = 0
+	} else {
+		gasmall := true
+		if ga > fa {
+			pmax = 2
+			if (fa / ga) < dlamchE {
+				gasmall = false
+				ssmax = ga
+				if ha > 1 {
+					ssmin = fa / (ga / ha)
+				} else {
+					ssmin = (fa / ga) * ha
+				}
+				clt = 1
+				slt = ht / gt
+				srt = 1
+				crt = ft / gt
+			}
+		}
+		if gasmall {
+			d := fa - ha
+			l := d / fa
+			if d == fa { // deal with inf
+				l = 1
+			}
+			m := gt / ft
+			t := 2 - l
+			s := math.Hypot(t, m)
+			var r float64
+			if l == 0 {
+				r = math.Abs(m)
+			} else {
+				r = math.Hypot(l, m)
+			}
+			a := 0.5 * (s + r)
+			ssmin = ha / a
+			ssmax = fa * a
+			if m == 0 {
+				if l == 0 {
+					t = math.Copysign(2, ft) * math.Copysign(1, gt)
+				} else {
+					t = gt/math.Copysign(d, ft) + m/t
+				}
+			} else {
+				t = (m/(s+t) + m/(r+l)) * (1 + a)
+			}
+			l = math.Hypot(t, 2)
+			crt = 2 / l
+			srt = t / l
+			clt = (crt + srt*m) / a
+			slt = (ht / ft) * srt / a
+		}
+	}
+	if swap {
+		csl = srt
+		snl = crt
+		csr = slt
+		snr = clt
+	} else {
+		csl = clt
+		snl = slt
+		csr = crt
+		snr = srt
+	}
+	var tsign float64
+	switch pmax {
+	case 1:
+		tsign = math.Copysign(1, csr) * math.Copysign(1, csl) * math.Copysign(1, f)
+	case 2:
+		tsign = math.Copysign(1, snr) * math.Copysign(1, csl) * math.Copysign(1, g)
+	case 3:
+		tsign = math.Copysign(1, snr) * math.Copysign(1, snl) * math.Copysign(1, h)
+	}
+	ssmax = math.Copysign(ssmax, tsign)
+	ssmin = math.Copysign(ssmin, tsign*math.Copysign(1, f)*math.Copysign(1, h))
+	return ssmin, ssmax, snr, csr, snl, csl
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlaswp.go
@ -0,0 +1,52 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas/blas64"
+
+// Dlaswp swaps the rows k1 to k2 of a rectangular matrix A according to the
+// indices in ipiv so that row k is swapped with ipiv[k].
+//
+// n is the number of columns of A and incX is the increment for ipiv. If incX
+// is 1, the swaps are applied from k1 to k2. If incX is -1, the swaps are
+// applied in reverse order from k2 to k1. For other values of incX Dlaswp will
+// panic. ipiv must have length k2+1, otherwise Dlaswp will panic.
+//
+// The indices k1, k2, and the elements of ipiv are zero-based.
+//
+// Dlaswp is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlaswp(n int, a []float64, lda int, k1, k2 int, ipiv []int, incX int) {
+	switch {
+	case n < 0:
+		panic(nLT0)
+	case k2 < 0:
+		panic(badK2)
+	case k1 < 0 || k2 < k1:
+		panic(badK1)
+	case lda < max(1, n):
+		panic(badLdA)
+	case len(a) < (k2-1)*lda+n:
+		panic(shortA)
+	case len(ipiv) != k2+1:
+		panic(badLenIpiv)
+	case incX != 1 && incX != -1:
+		panic(absIncNotOne)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	bi := blas64.Implementation()
+	if incX == 1 {
+		for k := k1; k <= k2; k++ {
+			bi.Dswap(n, a[k*lda:], 1, a[ipiv[k]*lda:], 1)
+		}
+		return
+	}
+	for k := k2; k >= k1; k-- {
+		bi.Dswap(n, a[k*lda:], 1, a[ipiv[k]*lda:], 1)
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlasy2.go
@ -0,0 +1,290 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlasy2 solves the Sylvester matrix equation where the matrices are of order 1
+// or 2. It computes the unknown n1×n2 matrix X so that
+//  TL*X   + sgn*X*TR   = scale*B,  if tranl == false and tranr == false,
+//  TL^T*X + sgn*X*TR   = scale*B,  if tranl == true  and tranr == false,
+//  TL*X   + sgn*X*TR^T = scale*B,  if tranl == false and tranr == true,
+//  TL^T*X + sgn*X*TR^T = scale*B,  if tranl == true  and tranr == true,
+// where TL is n1×n1, TR is n2×n2, B is n1×n2, and 1 <= n1,n2 <= 2.
+//
+// isgn must be 1 or -1, and n1 and n2 must be 0, 1, or 2, but these conditions
+// are not checked.
+//
+// Dlasy2 returns three values, a scale factor that is chosen less than or equal
+// to 1 to prevent the solution overflowing, the infinity norm of the solution,
+// and an indicator of success. If ok is false, TL and TR have eigenvalues that
+// are too close, so TL or TR is perturbed to get a non-singular equation.
+//
+// Dlasy2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlasy2(tranl, tranr bool, isgn, n1, n2 int, tl []float64, ldtl int, tr []float64, ldtr int, b []float64, ldb int, x []float64, ldx int) (scale, xnorm float64, ok bool) {
+	// TODO(vladimir-ch): Add input validation checks conditionally skipped
+	// using the build tag mechanism.
+
+	ok = true
+	// Quick return if possible.
+	if n1 == 0 || n2 == 0 {
+		return scale, xnorm, ok
+	}
+
+	// Set constants to control overflow.
+	eps := dlamchP
+	smlnum := dlamchS / eps
+	sgn := float64(isgn)
+
+	if n1 == 1 && n2 == 1 {
+		// 1×1 case: TL11*X + sgn*X*TR11 = B11.
+		tau1 := tl[0] + sgn*tr[0]
+		bet := math.Abs(tau1)
+		if bet <= smlnum {
+			tau1 = smlnum
+			bet = smlnum
+			ok = false
+		}
+		scale = 1
+		gam := math.Abs(b[0])
+		if smlnum*gam > bet {
+			scale = 1 / gam
+		}
+		x[0] = b[0] * scale / tau1
+		xnorm = math.Abs(x[0])
+		return scale, xnorm, ok
+	}
+
+	if n1+n2 == 3 {
+		// 1×2 or 2×1 case.
+		var (
+			smin float64
+			tmp  [4]float64 // tmp is used as a 2×2 row-major matrix.
+			btmp [2]float64
+		)
+		if n1 == 1 && n2 == 2 {
+			// 1×2 case: TL11*[X11 X12] + sgn*[X11 X12]*op[TR11 TR12] = [B11 B12].
+			//                                            [TR21 TR22]
+			smin = math.Abs(tl[0])
+			smin = math.Max(smin, math.Max(math.Abs(tr[0]), math.Abs(tr[1])))
+			smin = math.Max(smin, math.Max(math.Abs(tr[ldtr]), math.Abs(tr[ldtr+1])))
+			smin = math.Max(eps*smin, smlnum)
+			tmp[0] = tl[0] + sgn*tr[0]
+			tmp[3] = tl[0] + sgn*tr[ldtr+1]
+			if tranr {
+				tmp[1] = sgn * tr[1]
+				tmp[2] = sgn * tr[ldtr]
+			} else {
+				tmp[1] = sgn * tr[ldtr]
+				tmp[2] = sgn * tr[1]
+			}
+			btmp[0] = b[0]
+			btmp[1] = b[1]
+		} else {
+			// 2×1 case: op[TL11 TL12]*[X11] + sgn*[X11]*TR11 = [B11].
+			//             [TL21 TL22]*[X21]       [X21]        [B21]
+			smin = math.Abs(tr[0])
+			smin = math.Max(smin, math.Max(math.Abs(tl[0]), math.Abs(tl[1])))
+			smin = math.Max(smin, math.Max(math.Abs(tl[ldtl]), math.Abs(tl[ldtl+1])))
+			smin = math.Max(eps*smin, smlnum)
+			tmp[0] = tl[0] + sgn*tr[0]
+			tmp[3] = tl[ldtl+1] + sgn*tr[0]
+			if tranl {
+				tmp[1] = tl[ldtl]
+				tmp[2] = tl[1]
+			} else {
+				tmp[1] = tl[1]
+				tmp[2] = tl[ldtl]
+			}
+			btmp[0] = b[0]
+			btmp[1] = b[ldb]
+		}
+
+		// Solve 2×2 system using complete pivoting.
+		// Set pivots less than smin to smin.
+
+		bi := blas64.Implementation()
+		ipiv := bi.Idamax(len(tmp), tmp[:], 1)
+		// Compute the upper triangular matrix [u11 u12].
+		//                                     [  0 u22]
+		u11 := tmp[ipiv]
+		if math.Abs(u11) <= smin {
+			ok = false
+			u11 = smin
+		}
+		locu12 := [4]int{1, 0, 3, 2} // Index in tmp of the element on the same row as the pivot.
+		u12 := tmp[locu12[ipiv]]
+		locl21 := [4]int{2, 3, 0, 1} // Index in tmp of the element on the same column as the pivot.
+		l21 := tmp[locl21[ipiv]] / u11
+		locu22 := [4]int{3, 2, 1, 0} // Index in tmp of the remaining element.
+		u22 := tmp[locu22[ipiv]] - l21*u12
+		if math.Abs(u22) <= smin {
+			ok = false
+			u22 = smin
+		}
+		if ipiv&0x2 != 0 { // true for ipiv equal to 2 and 3.
+			// The pivot was in the second row, swap the elements of
+			// the right-hand side.
+			btmp[0], btmp[1] = btmp[1], btmp[0]-l21*btmp[1]
+		} else {
+			btmp[1] -= l21 * btmp[0]
+		}
+		scale = 1
+		if 2*smlnum*math.Abs(btmp[1]) > math.Abs(u22) || 2*smlnum*math.Abs(btmp[0]) > math.Abs(u11) {
+			scale = 0.5 / math.Max(math.Abs(btmp[0]), math.Abs(btmp[1]))
+			btmp[0] *= scale
+			btmp[1] *= scale
+		}
+		// Solve the system [u11 u12] [x21] = [ btmp[0] ].
+		//                  [  0 u22] [x22]   [ btmp[1] ]
+		x22 := btmp[1] / u22
+		x21 := btmp[0]/u11 - (u12/u11)*x22
+		if ipiv&0x1 != 0 { // true for ipiv equal to 1 and 3.
+			// The pivot was in the second column, swap the elements
+			// of the solution.
+			x21, x22 = x22, x21
+		}
+		x[0] = x21
+		if n1 == 1 {
+			x[1] = x22
+			xnorm = math.Abs(x[0]) + math.Abs(x[1])
+		} else {
+			x[ldx] = x22
+			xnorm = math.Max(math.Abs(x[0]), math.Abs(x[ldx]))
+		}
+		return scale, xnorm, ok
+	}
+
+	// 2×2 case: op[TL11 TL12]*[X11 X12] + SGN*[X11 X12]*op[TR11 TR12] = [B11 B12].
+	//             [TL21 TL22] [X21 X22]       [X21 X22]   [TR21 TR22]   [B21 B22]
+	//
+	// Solve equivalent 4×4 system using complete pivoting.
+	// Set pivots less than smin to smin.
+
+	smin := math.Max(math.Abs(tr[0]), math.Abs(tr[1]))
+	smin = math.Max(smin, math.Max(math.Abs(tr[ldtr]), math.Abs(tr[ldtr+1])))
+	smin = math.Max(smin, math.Max(math.Abs(tl[0]), math.Abs(tl[1])))
+	smin = math.Max(smin, math.Max(math.Abs(tl[ldtl]), math.Abs(tl[ldtl+1])))
+	smin = math.Max(eps*smin, smlnum)
+
+	var t [4][4]float64
+	t[0][0] = tl[0] + sgn*tr[0]
+	t[1][1] = tl[0] + sgn*tr[ldtr+1]
+	t[2][2] = tl[ldtl+1] + sgn*tr[0]
+	t[3][3] = tl[ldtl+1] + sgn*tr[ldtr+1]
+	if tranl {
+		t[0][2] = tl[ldtl]
+		t[1][3] = tl[ldtl]
+		t[2][0] = tl[1]
+		t[3][1] = tl[1]
+	} else {
+		t[0][2] = tl[1]
+		t[1][3] = tl[1]
+		t[2][0] = tl[ldtl]
+		t[3][1] = tl[ldtl]
+	}
+	if tranr {
+		t[0][1] = sgn * tr[1]
+		t[1][0] = sgn * tr[ldtr]
+		t[2][3] = sgn * tr[1]
+		t[3][2] = sgn * tr[ldtr]
+	} else {
+		t[0][1] = sgn * tr[ldtr]
+		t[1][0] = sgn * tr[1]
+		t[2][3] = sgn * tr[ldtr]
+		t[3][2] = sgn * tr[1]
+	}
+
+	var btmp [4]float64
+	btmp[0] = b[0]
+	btmp[1] = b[1]
+	btmp[2] = b[ldb]
+	btmp[3] = b[ldb+1]
+
+	// Perform elimination.
+	var jpiv [4]int // jpiv records any column swaps for pivoting.
+	for i := 0; i < 3; i++ {
+		var (
+			xmax       float64
+			ipsv, jpsv int
+		)
+		for ip := i; ip < 4; ip++ {
+			for jp := i; jp < 4; jp++ {
+				if math.Abs(t[ip][jp]) >= xmax {
+					xmax = math.Abs(t[ip][jp])
+					ipsv = ip
+					jpsv = jp
+				}
+			}
+		}
+		if ipsv != i {
+			// The pivot is not in the top row of the unprocessed
+			// block, swap rows ipsv and i of t and btmp.
+			t[ipsv], t[i] = t[i], t[ipsv]
+			btmp[ipsv], btmp[i] = btmp[i], btmp[ipsv]
+		}
+		if jpsv != i {
+			// The pivot is not in the left column of the
+			// unprocessed block, swap columns jpsv and i of t.
+			for k := 0; k < 4; k++ {
+				t[k][jpsv], t[k][i] = t[k][i], t[k][jpsv]
+			}
+		}
+		jpiv[i] = jpsv
+		if math.Abs(t[i][i]) < smin {
+			ok = false
+			t[i][i] = smin
+		}
+		for k := i + 1; k < 4; k++ {
+			t[k][i] /= t[i][i]
+			btmp[k] -= t[k][i] * btmp[i]
+			for j := i + 1; j < 4; j++ {
+				t[k][j] -= t[k][i] * t[i][j]
+			}
+		}
+	}
+	if math.Abs(t[3][3]) < smin {
+		ok = false
+		t[3][3] = smin
+	}
+	scale = 1
+	if 8*smlnum*math.Abs(btmp[0]) > math.Abs(t[0][0]) ||
+		8*smlnum*math.Abs(btmp[1]) > math.Abs(t[1][1]) ||
+		8*smlnum*math.Abs(btmp[2]) > math.Abs(t[2][2]) ||
+		8*smlnum*math.Abs(btmp[3]) > math.Abs(t[3][3]) {
+
+		maxbtmp := math.Max(math.Abs(btmp[0]), math.Abs(btmp[1]))
+		maxbtmp = math.Max(maxbtmp, math.Max(math.Abs(btmp[2]), math.Abs(btmp[3])))
+		scale = 1 / 8 / maxbtmp
+		btmp[0] *= scale
+		btmp[1] *= scale
+		btmp[2] *= scale
+		btmp[3] *= scale
+	}
+	// Compute the solution of the upper triangular system t * tmp = btmp.
+	var tmp [4]float64
+	for i := 3; i >= 0; i-- {
+		temp := 1 / t[i][i]
+		tmp[i] = btmp[i] * temp
+		for j := i + 1; j < 4; j++ {
+			tmp[i] -= temp * t[i][j] * tmp[j]
+		}
+	}
+	for i := 2; i >= 0; i-- {
+		if jpiv[i] != i {
+			tmp[i], tmp[jpiv[i]] = tmp[jpiv[i]], tmp[i]
+		}
+	}
+	x[0] = tmp[0]
+	x[1] = tmp[1]
+	x[ldx] = tmp[2]
+	x[ldx+1] = tmp[3]
+	xnorm = math.Max(math.Abs(tmp[0])+math.Abs(tmp[1]), math.Abs(tmp[2])+math.Abs(tmp[3]))
+	return scale, xnorm, ok
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrd.go
@ -0,0 +1,165 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlatrd reduces nb rows and columns of a real n×n symmetric matrix A to symmetric
+// tridiagonal form. It computes the orthonormal similarity transformation
+//  Q^T * A * Q
+// and returns the matrices V and W to apply to the unreduced part of A. If
+// uplo == blas.Upper, the upper triangle is supplied and the last nb rows are
+// reduced. If uplo == blas.Lower, the lower triangle is supplied and the first
+// nb rows are reduced.
+//
+// a contains the symmetric matrix on entry with active triangular half specified
+// by uplo. On exit, the nb columns have been reduced to tridiagonal form. The
+// diagonal contains the diagonal of the reduced matrix, the off-diagonal is
+// set to 1, and the remaining elements contain the data to construct Q.
+//
+// If uplo == blas.Upper, with n = 5 and nb = 2 on exit a is
+//  [ a   a   a  v4  v5]
+//  [     a   a  v4  v5]
+//  [         a   1  v5]
+//  [             d   1]
+//  [                 d]
+//
+// If uplo == blas.Lower, with n = 5 and nb = 2, on exit a is
+//  [ d                ]
+//  [ 1   d            ]
+//  [v1   1   a        ]
+//  [v1  v2   a   a    ]
+//  [v1  v2   a   a   a]
+//
+// e contains the superdiagonal elements of the reduced matrix. If uplo == blas.Upper,
+// e[n-nb:n-1] contains the last nb columns of the reduced matrix, while if
+// uplo == blas.Lower, e[:nb] contains the first nb columns of the reduced matrix.
+// e must have length at least n-1, and Dlatrd will panic otherwise.
+//
+// tau contains the scalar factors of the elementary reflectors needed to construct Q.
+// The reflectors are stored in tau[n-nb:n-1] if uplo == blas.Upper, and in
+// tau[:nb] if uplo == blas.Lower. tau must have length n-1, and Dlatrd will panic
+// otherwise.
+//
+// w is an n×nb matrix. On exit it contains the data to update the unreduced part
+// of A.
+//
+// The matrix Q is represented as a product of elementary reflectors. Each reflector
+// H has the form
+//  I - tau * v * v^T
+// If uplo == blas.Upper,
+//  Q = H_{n-1} * H_{n-2} * ... * H_{n-nb}
+// where v[:i-1] is stored in A[:i-1,i], v[i-1] = 1, and v[i:n] = 0.
+//
+// If uplo == blas.Lower,
+//  Q = H_0 * H_1 * ... * H_{nb-1}
+// where v[:i+1] = 0, v[i+1] = 1, and v[i+2:n] is stored in A[i+2:n,i].
+//
+// The vectors v form the n×nb matrix V which is used with W to apply a
+// symmetric rank-2 update to the unreduced part of A
+//  A = A - V * W^T - W * V^T
+//
+// Dlatrd is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlatrd(uplo blas.Uplo, n, nb int, a []float64, lda int, e, tau, w []float64, ldw int) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case nb < 0:
+		panic(nbLT0)
+	case nb > n:
+		panic(nbGTN)
+	case lda < max(1, n):
+		panic(badLdA)
+	case ldw < max(1, nb):
+		panic(badLdW)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(w) < (n-1)*ldw+nb:
+		panic(shortW)
+	case len(e) < n-1:
+		panic(shortE)
+	case len(tau) < n-1:
+		panic(shortTau)
+	}
+
+	bi := blas64.Implementation()
+
+	if uplo == blas.Upper {
+		for i := n - 1; i >= n-nb; i-- {
+			iw := i - n + nb
+			if i < n-1 {
+				// Update A(0:i, i).
+				bi.Dgemv(blas.NoTrans, i+1, n-i-1, -1, a[i+1:], lda,
+					w[i*ldw+iw+1:], 1, 1, a[i:], lda)
+				bi.Dgemv(blas.NoTrans, i+1, n-i-1, -1, w[iw+1:], ldw,
+					a[i*lda+i+1:], 1, 1, a[i:], lda)
+			}
+			if i > 0 {
+				// Generate elementary reflector H_i to annihilate A(0:i-2,i).
+				e[i-1], tau[i-1] = impl.Dlarfg(i, a[(i-1)*lda+i], a[i:], lda)
+				a[(i-1)*lda+i] = 1
+
+				// Compute W(0:i-1, i).
+				bi.Dsymv(blas.Upper, i, 1, a, lda, a[i:], lda, 0, w[iw:], ldw)
+				if i < n-1 {
+					bi.Dgemv(blas.Trans, i, n-i-1, 1, w[iw+1:], ldw,
+						a[i:], lda, 0, w[(i+1)*ldw+iw:], ldw)
+					bi.Dgemv(blas.NoTrans, i, n-i-1, -1, a[i+1:], lda,
+						w[(i+1)*ldw+iw:], ldw, 1, w[iw:], ldw)
+					bi.Dgemv(blas.Trans, i, n-i-1, 1, a[i+1:], lda,
+						a[i:], lda, 0, w[(i+1)*ldw+iw:], ldw)
+					bi.Dgemv(blas.NoTrans, i, n-i-1, -1, w[iw+1:], ldw,
+						w[(i+1)*ldw+iw:], ldw, 1, w[iw:], ldw)
+				}
+				bi.Dscal(i, tau[i-1], w[iw:], ldw)
+				alpha := -0.5 * tau[i-1] * bi.Ddot(i, w[iw:], ldw, a[i:], lda)
+				bi.Daxpy(i, alpha, a[i:], lda, w[iw:], ldw)
+			}
+		}
+	} else {
+		// Reduce first nb columns of lower triangle.
+		for i := 0; i < nb; i++ {
+			// Update A(i:n, i)
+			bi.Dgemv(blas.NoTrans, n-i, i, -1, a[i*lda:], lda,
+				w[i*ldw:], 1, 1, a[i*lda+i:], lda)
+			bi.Dgemv(blas.NoTrans, n-i, i, -1, w[i*ldw:], ldw,
+				a[i*lda:], 1, 1, a[i*lda+i:], lda)
+			if i < n-1 {
+				// Generate elementary reflector H_i to annihilate A(i+2:n,i).
+				e[i], tau[i] = impl.Dlarfg(n-i-1, a[(i+1)*lda+i], a[min(i+2, n-1)*lda+i:], lda)
+				a[(i+1)*lda+i] = 1
+
+				// Compute W(i+1:n,i).
+				bi.Dsymv(blas.Lower, n-i-1, 1, a[(i+1)*lda+i+1:], lda,
+					a[(i+1)*lda+i:], lda, 0, w[(i+1)*ldw+i:], ldw)
+				bi.Dgemv(blas.Trans, n-i-1, i, 1, w[(i+1)*ldw:], ldw,
+					a[(i+1)*lda+i:], lda, 0, w[i:], ldw)
+				bi.Dgemv(blas.NoTrans, n-i-1, i, -1, a[(i+1)*lda:], lda,
+					w[i:], ldw, 1, w[(i+1)*ldw+i:], ldw)
+				bi.Dgemv(blas.Trans, n-i-1, i, 1, a[(i+1)*lda:], lda,
+					a[(i+1)*lda+i:], lda, 0, w[i:], ldw)
+				bi.Dgemv(blas.NoTrans, n-i-1, i, -1, w[(i+1)*ldw:], ldw,
+					w[i:], ldw, 1, w[(i+1)*ldw+i:], ldw)
+				bi.Dscal(n-i-1, tau[i], w[(i+1)*ldw+i:], ldw)
+				alpha := -0.5 * tau[i] * bi.Ddot(n-i-1, w[(i+1)*ldw+i:], ldw,
+					a[(i+1)*lda+i:], lda)
+				bi.Daxpy(n-i-1, alpha, a[(i+1)*lda+i:], lda,
+					w[(i+1)*ldw+i:], ldw)
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlatrs.go
@ -0,0 +1,359 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlatrs solves a triangular system of equations scaled to prevent overflow. It
+// solves
+//  A * x = scale * b if trans == blas.NoTrans
+//  A^T * x = scale * b if trans == blas.Trans
+// where the scale s is set for numeric stability.
+//
+// A is an n×n triangular matrix. On entry, the slice x contains the values of
+// b, and on exit it contains the solution vector x.
+//
+// If normin == true, cnorm is an input and cnorm[j] contains the norm of the off-diagonal
+// part of the j^th column of A. If trans == blas.NoTrans, cnorm[j] must be greater
+// than or equal to the infinity norm, and greater than or equal to the one-norm
+// otherwise. If normin == false, then cnorm is treated as an output, and is set
+// to contain the 1-norm of the off-diagonal part of the j^th column of A.
+//
+// Dlatrs is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dlatrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, normin bool, n int, a []float64, lda int, x []float64, cnorm []float64) (scale float64) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
+		panic(badTrans)
+	case diag != blas.Unit && diag != blas.NonUnit:
+		panic(badDiag)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 0
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(x) < n:
+		panic(shortX)
+	case len(cnorm) < n:
+		panic(shortCNorm)
+	}
+
+	upper := uplo == blas.Upper
+	nonUnit := diag == blas.NonUnit
+
+	smlnum := dlamchS / dlamchP
+	bignum := 1 / smlnum
+	scale = 1
+
+	bi := blas64.Implementation()
+
+	if !normin {
+		if upper {
+			cnorm[0] = 0
+			for j := 1; j < n; j++ {
+				cnorm[j] = bi.Dasum(j, a[j:], lda)
+			}
+		} else {
+			for j := 0; j < n-1; j++ {
+				cnorm[j] = bi.Dasum(n-j-1, a[(j+1)*lda+j:], lda)
+			}
+			cnorm[n-1] = 0
+		}
+	}
+	// Scale the column norms by tscal if the maximum element in cnorm is greater than bignum.
+	imax := bi.Idamax(n, cnorm, 1)
+	tmax := cnorm[imax]
+	var tscal float64
+	if tmax <= bignum {
+		tscal = 1
+	} else {
+		tscal = 1 / (smlnum * tmax)
+		bi.Dscal(n, tscal, cnorm, 1)
+	}
+
+	// Compute a bound on the computed solution vector to see if bi.Dtrsv can be used.
+	j := bi.Idamax(n, x, 1)
+	xmax := math.Abs(x[j])
+	xbnd := xmax
+	var grow float64
+	var jfirst, jlast, jinc int
+	if trans == blas.NoTrans {
+		if upper {
+			jfirst = n - 1
+			jlast = -1
+			jinc = -1
+		} else {
+			jfirst = 0
+			jlast = n
+			jinc = 1
+		}
+		// Compute the growth in A * x = b.
+		if tscal != 1 {
+			grow = 0
+			goto Solve
+		}
+		if nonUnit {
+			grow = 1 / math.Max(xbnd, smlnum)
+			xbnd = grow
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Solve
+				}
+				tjj := math.Abs(a[j*lda+j])
+				xbnd = math.Min(xbnd, math.Min(1, tjj)*grow)
+				if tjj+cnorm[j] >= smlnum {
+					grow *= tjj / (tjj + cnorm[j])
+				} else {
+					grow = 0
+				}
+			}
+			grow = xbnd
+		} else {
+			grow = math.Min(1, 1/math.Max(xbnd, smlnum))
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Solve
+				}
+				grow *= 1 / (1 + cnorm[j])
+			}
+		}
+	} else {
+		if upper {
+			jfirst = 0
+			jlast = n
+			jinc = 1
+		} else {
+			jfirst = n - 1
+			jlast = -1
+			jinc = -1
+		}
+		if tscal != 1 {
+			grow = 0
+			goto Solve
+		}
+		if nonUnit {
+			grow = 1 / (math.Max(xbnd, smlnum))
+			xbnd = grow
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Solve
+				}
+				xj := 1 + cnorm[j]
+				grow = math.Min(grow, xbnd/xj)
+				tjj := math.Abs(a[j*lda+j])
+				if xj > tjj {
+					xbnd *= tjj / xj
+				}
+			}
+			grow = math.Min(grow, xbnd)
+		} else {
+			grow = math.Min(1, 1/math.Max(xbnd, smlnum))
+			for j := jfirst; j != jlast; j += jinc {
+				if grow <= smlnum {
+					goto Solve
+				}
+				xj := 1 + cnorm[j]
+				grow /= xj
+			}
+		}
+	}
+
+Solve:
+	if grow*tscal > smlnum {
+		// Use the Level 2 BLAS solve if the reciprocal of the bound on
+		// elements of X is not too small.
+		bi.Dtrsv(uplo, trans, diag, n, a, lda, x, 1)
+		if tscal != 1 {
+			bi.Dscal(n, 1/tscal, cnorm, 1)
+		}
+		return scale
+	}
+
+	// Use a Level 1 BLAS solve, scaling intermediate results.
+	if xmax > bignum {
+		scale = bignum / xmax
+		bi.Dscal(n, scale, x, 1)
+		xmax = bignum
+	}
+	if trans == blas.NoTrans {
+		for j := jfirst; j != jlast; j += jinc {
+			xj := math.Abs(x[j])
+			var tjj, tjjs float64
+			if nonUnit {
+				tjjs = a[j*lda+j] * tscal
+			} else {
+				tjjs = tscal
+				if tscal == 1 {
+					goto Skip1
+				}
+			}
+			tjj = math.Abs(tjjs)
+			if tjj > smlnum {
+				if tjj < 1 {
+					if xj > tjj*bignum {
+						rec := 1 / xj
+						bi.Dscal(n, rec, x, 1)
+						scale *= rec
+						xmax *= rec
+					}
+				}
+				x[j] /= tjjs
+				xj = math.Abs(x[j])
+			} else if tjj > 0 {
+				if xj > tjj*bignum {
+					rec := (tjj * bignum) / xj
+					if cnorm[j] > 1 {
+						rec /= cnorm[j]
+					}
+					bi.Dscal(n, rec, x, 1)
+					scale *= rec
+					xmax *= rec
+				}
+				x[j] /= tjjs
+				xj = math.Abs(x[j])
+			} else {
+				for i := 0; i < n; i++ {
+					x[i] = 0
+				}
+				x[j] = 1
+				xj = 1
+				scale = 0
+				xmax = 0
+			}
+		Skip1:
+			if xj > 1 {
+				rec := 1 / xj
+				if cnorm[j] > (bignum-xmax)*rec {
+					rec *= 0.5
+					bi.Dscal(n, rec, x, 1)
+					scale *= rec
+				}
+			} else if xj*cnorm[j] > bignum-xmax {
+				bi.Dscal(n, 0.5, x, 1)
+				scale *= 0.5
+			}
+			if upper {
+				if j > 0 {
+					bi.Daxpy(j, -x[j]*tscal, a[j:], lda, x, 1)
+					i := bi.Idamax(j, x, 1)
+					xmax = math.Abs(x[i])
+				}
+			} else {
+				if j < n-1 {
+					bi.Daxpy(n-j-1, -x[j]*tscal, a[(j+1)*lda+j:], lda, x[j+1:], 1)
+					i := j + bi.Idamax(n-j-1, x[j+1:], 1)
+					xmax = math.Abs(x[i])
+				}
+			}
+		}
+	} else {
+		for j := jfirst; j != jlast; j += jinc {
+			xj := math.Abs(x[j])
+			uscal := tscal
+			rec := 1 / math.Max(xmax, 1)
+			var tjjs float64
+			if cnorm[j] > (bignum-xj)*rec {
+				rec *= 0.5
+				if nonUnit {
+					tjjs = a[j*lda+j] * tscal
+				} else {
+					tjjs = tscal
+				}
+				tjj := math.Abs(tjjs)
+				if tjj > 1 {
+					rec = math.Min(1, rec*tjj)
+					uscal /= tjjs
+				}
+				if rec < 1 {
+					bi.Dscal(n, rec, x, 1)
+					scale *= rec
+					xmax *= rec
+				}
+			}
+			var sumj float64
+			if uscal == 1 {
+				if upper {
+					sumj = bi.Ddot(j, a[j:], lda, x, 1)
+				} else if j < n-1 {
+					sumj = bi.Ddot(n-j-1, a[(j+1)*lda+j:], lda, x[j+1:], 1)
+				}
+			} else {
+				if upper {
+					for i := 0; i < j; i++ {
+						sumj += (a[i*lda+j] * uscal) * x[i]
+					}
+				} else if j < n {
+					for i := j + 1; i < n; i++ {
+						sumj += (a[i*lda+j] * uscal) * x[i]
+					}
+				}
+			}
+			if uscal == tscal {
+				x[j] -= sumj
+				xj := math.Abs(x[j])
+				var tjjs float64
+				if nonUnit {
+					tjjs = a[j*lda+j] * tscal
+				} else {
+					tjjs = tscal
+					if tscal == 1 {
+						goto Skip2
+					}
+				}
+				tjj := math.Abs(tjjs)
+				if tjj > smlnum {
+					if tjj < 1 {
+						if xj > tjj*bignum {
+							rec = 1 / xj
+							bi.Dscal(n, rec, x, 1)
+							scale *= rec
+							xmax *= rec
+						}
+					}
+					x[j] /= tjjs
+				} else if tjj > 0 {
+					if xj > tjj*bignum {
+						rec = (tjj * bignum) / xj
+						bi.Dscal(n, rec, x, 1)
+						scale *= rec
+						xmax *= rec
+					}
+					x[j] /= tjjs
+				} else {
+					for i := 0; i < n; i++ {
+						x[i] = 0
+					}
+					x[j] = 1
+					scale = 0
+					xmax = 0
+				}
+			} else {
+				x[j] = x[j]/tjjs - sumj
+			}
+		Skip2:
+			xmax = math.Max(xmax, math.Abs(x[j]))
+		}
+	}
+	scale /= tscal
+	if tscal != 1 {
+		bi.Dscal(n, 1/tscal, cnorm, 1)
+	}
+	return scale
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauu2.go
@ -0,0 +1,64 @@
+// Copyright ©2018 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlauu2 computes the product
+//  U * U^T  if uplo is blas.Upper
+//  L^T * L  if uplo is blas.Lower
+// where U or L is stored in the upper or lower triangular part of A.
+// Only the upper or lower triangle of the result is stored, overwriting
+// the corresponding factor in A.
+func (impl Implementation) Dlauu2(uplo blas.Uplo, n int, a []float64, lda int) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	bi := blas64.Implementation()
+
+	if uplo == blas.Upper {
+		// Compute the product U*U^T.
+		for i := 0; i < n; i++ {
+			aii := a[i*lda+i]
+			if i < n-1 {
+				a[i*lda+i] = bi.Ddot(n-i, a[i*lda+i:], 1, a[i*lda+i:], 1)
+				bi.Dgemv(blas.NoTrans, i, n-i-1, 1, a[i+1:], lda, a[i*lda+i+1:], 1,
+					aii, a[i:], lda)
+			} else {
+				bi.Dscal(i+1, aii, a[i:], lda)
+			}
+		}
+	} else {
+		// Compute the product L^T*L.
+		for i := 0; i < n; i++ {
+			aii := a[i*lda+i]
+			if i < n-1 {
+				a[i*lda+i] = bi.Ddot(n-i, a[i*lda+i:], lda, a[i*lda+i:], lda)
+				bi.Dgemv(blas.Trans, n-i-1, i, 1, a[(i+1)*lda:], lda, a[(i+1)*lda+i:], lda,
+					aii, a[i*lda:], 1)
+			} else {
+				bi.Dscal(i+1, aii, a[i*lda:], 1)
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dlauum.go
@ -0,0 +1,81 @@
+// Copyright ©2018 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dlauum computes the product
+//  U * U^T  if uplo is blas.Upper
+//  L^T * L  if uplo is blas.Lower
+// where U or L is stored in the upper or lower triangular part of A.
+// Only the upper or lower triangle of the result is stored, overwriting
+// the corresponding factor in A.
+func (impl Implementation) Dlauum(uplo blas.Uplo, n int, a []float64, lda int) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	// Determine the block size.
+	opts := "U"
+	if uplo == blas.Lower {
+		opts = "L"
+	}
+	nb := impl.Ilaenv(1, "DLAUUM", opts, n, -1, -1, -1)
+
+	if nb <= 1 || n <= nb {
+		// Use unblocked code.
+		impl.Dlauu2(uplo, n, a, lda)
+		return
+	}
+
+	// Use blocked code.
+	bi := blas64.Implementation()
+	if uplo == blas.Upper {
+		// Compute the product U*U^T.
+		for i := 0; i < n; i += nb {
+			ib := min(nb, n-i)
+			bi.Dtrmm(blas.Right, blas.Upper, blas.Trans, blas.NonUnit,
+				i, ib, 1, a[i*lda+i:], lda, a[i:], lda)
+			impl.Dlauu2(blas.Upper, ib, a[i*lda+i:], lda)
+			if n-i-ib > 0 {
+				bi.Dgemm(blas.NoTrans, blas.Trans, i, ib, n-i-ib,
+					1, a[i+ib:], lda, a[i*lda+i+ib:], lda, 1, a[i:], lda)
+				bi.Dsyrk(blas.Upper, blas.NoTrans, ib, n-i-ib,
+					1, a[i*lda+i+ib:], lda, 1, a[i*lda+i:], lda)
+			}
+		}
+	} else {
+		// Compute the product L^T*L.
+		for i := 0; i < n; i += nb {
+			ib := min(nb, n-i)
+			bi.Dtrmm(blas.Left, blas.Lower, blas.Trans, blas.NonUnit,
+				ib, i, 1, a[i*lda+i:], lda, a[i*lda:], lda)
+			impl.Dlauu2(blas.Lower, ib, a[i*lda+i:], lda)
+			if n-i-ib > 0 {
+				bi.Dgemm(blas.Trans, blas.NoTrans, ib, i, n-i-ib,
+					1, a[(i+ib)*lda+i:], lda, a[(i+ib)*lda:], lda, 1, a[i*lda:], lda)
+				bi.Dsyrk(blas.Lower, blas.Trans, ib, n-i-ib,
+					1, a[(i+ib)*lda+i:], lda, 1, a[i*lda+i:], lda)
+			}
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/doc.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/doc.go
@ -0,0 +1,28 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gonum is a pure-go implementation of the LAPACK API. The LAPACK API defines
+// a set of algorithms for advanced matrix operations.
+//
+// The function definitions and implementations follow that of the netlib reference
+// implementation. See http://www.netlib.org/lapack/explore-html/ for more
+// information, and http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html
+// for more license information.
+//
+// Slice function arguments frequently represent vectors and matrices. The data
+// layout is identical to that found in https://godoc.org/gonum.org/v1/gonum/blas/gonum.
+//
+// Most LAPACK functions are built on top the routines defined in the BLAS API,
+// and as such the computation time for many LAPACK functions is
+// dominated by BLAS calls. Here, BLAS is accessed through the
+// blas64 package (https://godoc.org/golang.org/v1/gonum/blas/blas64). In particular,
+// this implies that an external BLAS library will be used if it is
+// registered in blas64.
+//
+// The full LAPACK capability has not been implemented at present. The full
+// API is very large, containing approximately 200 functions for double precision
+// alone. Future additions will be focused on supporting the gonum matrix
+// package (https://godoc.org/github.com/gonum/matrix/mat64), though pull requests
+// with implementations and tests for LAPACK function are encouraged.
+package gonum // import "gonum.org/v1/gonum/lapack/gonum"
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2l.go
@ -0,0 +1,76 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dorg2l generates an m×n matrix Q with orthonormal columns which is defined
+// as the last n columns of a product of k elementary reflectors of order m.
+//  Q = H_{k-1} * ... * H_1 * H_0
+// See Dgelqf for more information. It must be that m >= n >= k.
+//
+// tau contains the scalar reflectors computed by Dgeqlf. tau must have length
+// at least k, and Dorg2l will panic otherwise.
+//
+// work contains temporary memory, and must have length at least n. Dorg2l will
+// panic otherwise.
+//
+// Dorg2l is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorg2l(m, n, k int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case n > m:
+		panic(nGTM)
+	case k < 0:
+		panic(kLT0)
+	case k > n:
+		panic(kGTN)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	// Initialize columns 0:n-k to columns of the unit matrix.
+	for j := 0; j < n-k; j++ {
+		for l := 0; l < m; l++ {
+			a[l*lda+j] = 0
+		}
+		a[(m-n+j)*lda+j] = 1
+	}
+
+	bi := blas64.Implementation()
+	for i := 0; i < k; i++ {
+		ii := n - k + i
+
+		// Apply H_i to A[0:m-k+i, 0:n-k+i] from the left.
+		a[(m-n+ii)*lda+ii] = 1
+		impl.Dlarf(blas.Left, m-n+ii+1, ii, a[ii:], lda, tau[i], a, lda, work)
+		bi.Dscal(m-n+ii, -tau[i], a[ii:], lda)
+		a[(m-n+ii)*lda+ii] = 1 - tau[i]
+
+		// Set A[m-k+i:m, n-k+i+1] to zero.
+		for l := m - n + ii + 1; l < m; l++ {
+			a[l*lda+ii] = 0
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorg2r.go
@ -0,0 +1,75 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dorg2r generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors as computed by Dgeqrf.
+//  Q = H_0 * H_1 * ... * H_{k-1}
+// len(tau) >= k, 0 <= k <= n, 0 <= n <= m, len(work) >= n.
+// Dorg2r will panic if these conditions are not met.
+//
+// Dorg2r is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorg2r(m, n, k int, a []float64, lda int, tau []float64, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case n > m:
+		panic(nGTM)
+	case k < 0:
+		panic(kLT0)
+	case k > n:
+		panic(kGTN)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(work) < n:
+		panic(shortWork)
+	}
+
+	bi := blas64.Implementation()
+
+	// Initialize columns k+1:n to columns of the unit matrix.
+	for l := 0; l < m; l++ {
+		for j := k; j < n; j++ {
+			a[l*lda+j] = 0
+		}
+	}
+	for j := k; j < n; j++ {
+		a[j*lda+j] = 1
+	}
+	for i := k - 1; i >= 0; i-- {
+		for i := range work {
+			work[i] = 0
+		}
+		if i < n-1 {
+			a[i*lda+i] = 1
+			impl.Dlarf(blas.Left, m-i, n-i-1, a[i*lda+i:], lda, tau[i], a[i*lda+i+1:], lda, work)
+		}
+		if i < m-1 {
+			bi.Dscal(m-i-1, -tau[i], a[(i+1)*lda+i:], lda)
+		}
+		a[i*lda+i] = 1 - tau[i]
+		for l := 0; l < i; l++ {
+			a[l*lda+i] = 0
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgbr.go
@ -0,0 +1,138 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/lapack"
+
+// Dorgbr generates one of the matrices Q or P^T computed by Dgebrd
+// computed from the decomposition Dgebrd. See Dgebd2 for the description of
+// Q and P^T.
+//
+// If vect == lapack.GenerateQ, then a is assumed to have been an m×k matrix and
+// Q is of order m. If m >= k, then Dorgbr returns the first n columns of Q
+// where m >= n >= k. If m < k, then Dorgbr returns Q as an m×m matrix.
+//
+// If vect == lapack.GeneratePT, then A is assumed to have been a k×n matrix, and
+// P^T is of order n. If k < n, then Dorgbr returns the first m rows of P^T,
+// where n >= m >= k. If k >= n, then Dorgbr returns P^T as an n×n matrix.
+//
+// Dorgbr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorgbr(vect lapack.GenOrtho, m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	wantq := vect == lapack.GenerateQ
+	mn := min(m, n)
+	switch {
+	case vect != lapack.GenerateQ && vect != lapack.GeneratePT:
+		panic(badGenOrtho)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case wantq && n > m:
+		panic(nGTM)
+	case wantq && n < min(m, k):
+		panic("lapack: n < min(m,k)")
+	case !wantq && m > n:
+		panic(mGTN)
+	case !wantq && m < min(n, k):
+		panic("lapack: m < min(n,k)")
+	case lda < max(1, n) && lwork != -1:
+		// Normally, we follow the reference and require the leading
+		// dimension to be always valid, even in case of workspace
+		// queries. However, if a caller provided a placeholder value
+		// for lda (and a) when doing a workspace query that didn't
+		// fulfill the condition here, it would cause a panic. This is
+		// exactly what Dgesvd does.
+		panic(badLdA)
+	case lwork < max(1, mn) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	work[0] = 1
+	if m == 0 || n == 0 {
+		return
+	}
+
+	if wantq {
+		if m >= k {
+			impl.Dorgqr(m, n, k, a, lda, tau, work, -1)
+		} else if m > 1 {
+			impl.Dorgqr(m-1, m-1, m-1, a[lda+1:], lda, tau, work, -1)
+		}
+	} else {
+		if k < n {
+			impl.Dorglq(m, n, k, a, lda, tau, work, -1)
+		} else if n > 1 {
+			impl.Dorglq(n-1, n-1, n-1, a[lda+1:], lda, tau, work, -1)
+		}
+	}
+	lworkopt := int(work[0])
+	lworkopt = max(lworkopt, mn)
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case wantq && len(tau) < min(m, k):
+		panic(shortTau)
+	case !wantq && len(tau) < min(n, k):
+		panic(shortTau)
+	}
+
+	if wantq {
+		// Form Q, determined by a call to Dgebrd to reduce an m×k matrix.
+		if m >= k {
+			impl.Dorgqr(m, n, k, a, lda, tau, work, lwork)
+		} else {
+			// Shift the vectors which define the elementary reflectors one
+			// column to the right, and set the first row and column of Q to
+			// those of the unit matrix.
+			for j := m - 1; j >= 1; j-- {
+				a[j] = 0
+				for i := j + 1; i < m; i++ {
+					a[i*lda+j] = a[i*lda+j-1]
+				}
+			}
+			a[0] = 1
+			for i := 1; i < m; i++ {
+				a[i*lda] = 0
+			}
+			if m > 1 {
+				// Form Q[1:m-1, 1:m-1]
+				impl.Dorgqr(m-1, m-1, m-1, a[lda+1:], lda, tau, work, lwork)
+			}
+		}
+	} else {
+		// Form P^T, determined by a call to Dgebrd to reduce a k×n matrix.
+		if k < n {
+			impl.Dorglq(m, n, k, a, lda, tau, work, lwork)
+		} else {
+			// Shift the vectors which define the elementary reflectors one
+			// row downward, and set the first row and column of P^T to
+			// those of the unit matrix.
+			a[0] = 1
+			for i := 1; i < n; i++ {
+				a[i*lda] = 0
+			}
+			for j := 1; j < n; j++ {
+				for i := j - 1; i >= 1; i-- {
+					a[i*lda+j] = a[(i-1)*lda+j]
+				}
+				a[j] = 0
+			}
+			if n > 1 {
+				impl.Dorglq(n-1, n-1, n-1, a[lda+1:], lda, tau, work, lwork)
+			}
+		}
+	}
+	work[0] = float64(lworkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorghr.go
@ -0,0 +1,101 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+// Dorghr generates an n×n orthogonal matrix Q which is defined as the product
+// of ihi-ilo elementary reflectors:
+//  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
+//
+// a and lda represent an n×n matrix that contains the elementary reflectors, as
+// returned by Dgehrd. On return, a is overwritten by the n×n orthogonal matrix
+// Q. Q will be equal to the identity matrix except in the submatrix
+// Q[ilo+1:ihi+1,ilo+1:ihi+1].
+//
+// ilo and ihi must have the same values as in the previous call of Dgehrd. It
+// must hold that
+//  0 <= ilo <= ihi < n,  if n > 0,
+//  ilo = 0, ihi = -1,    if n == 0.
+//
+// tau contains the scalar factors of the elementary reflectors, as returned by
+// Dgehrd. tau must have length n-1.
+//
+// work must have length at least max(1,lwork) and lwork must be at least
+// ihi-ilo. For optimum performance lwork must be at least (ihi-ilo)*nb where nb
+// is the optimal blocksize. On return, work[0] will contain the optimal value
+// of lwork.
+//
+// If lwork == -1, instead of performing Dorghr, only the optimal value of lwork
+// will be stored into work[0].
+//
+// If any requirement on input sizes is not met, Dorghr will panic.
+//
+// Dorghr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorghr(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
+	nh := ihi - ilo
+	switch {
+	case ilo < 0 || max(1, n) <= ilo:
+		panic(badIlo)
+	case ihi < min(ilo, n-1) || n <= ihi:
+		panic(badIhi)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, nh) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	lwkopt := max(1, nh) * impl.Ilaenv(1, "DORGQR", " ", nh, nh, nh, -1)
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(tau) < n-1:
+		panic(shortTau)
+	}
+
+	// Shift the vectors which define the elementary reflectors one column
+	// to the right.
+	for i := ilo + 2; i < ihi+1; i++ {
+		copy(a[i*lda+ilo+1:i*lda+i], a[i*lda+ilo:i*lda+i-1])
+	}
+	// Set the first ilo+1 and the last n-ihi-1 rows and columns to those of
+	// the identity matrix.
+	for i := 0; i < ilo+1; i++ {
+		for j := 0; j < n; j++ {
+			a[i*lda+j] = 0
+		}
+		a[i*lda+i] = 1
+	}
+	for i := ilo + 1; i < ihi+1; i++ {
+		for j := 0; j <= ilo; j++ {
+			a[i*lda+j] = 0
+		}
+		for j := i; j < n; j++ {
+			a[i*lda+j] = 0
+		}
+	}
+	for i := ihi + 1; i < n; i++ {
+		for j := 0; j < n; j++ {
+			a[i*lda+j] = 0
+		}
+		a[i*lda+i] = 1
+	}
+	if nh > 0 {
+		// Generate Q[ilo+1:ihi+1,ilo+1:ihi+1].
+		impl.Dorgqr(nh, nh, nh, a[(ilo+1)*lda+ilo+1:], lda, tau[ilo:ihi], work, lwork)
+	}
+	work[0] = float64(lwkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgl2.go
@ -0,0 +1,71 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dorgl2 generates an m×n matrix Q with orthonormal rows defined by the
+// first m rows product of elementary reflectors as computed by Dgelqf.
+//  Q = H_0 * H_1 * ... * H_{k-1}
+// len(tau) >= k, 0 <= k <= m, 0 <= m <= n, len(work) >= m.
+// Dorgl2 will panic if these conditions are not met.
+//
+// Dorgl2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorgl2(m, n, k int, a []float64, lda int, tau, work []float64) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < m:
+		panic(nLTM)
+	case k < 0:
+		panic(kLT0)
+	case k > m:
+		panic(kGTM)
+	case lda < max(1, m):
+		panic(badLdA)
+	}
+
+	if m == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(work) < m:
+		panic(shortWork)
+	}
+
+	bi := blas64.Implementation()
+
+	if k < m {
+		for i := k; i < m; i++ {
+			for j := 0; j < n; j++ {
+				a[i*lda+j] = 0
+			}
+		}
+		for j := k; j < m; j++ {
+			a[j*lda+j] = 1
+		}
+	}
+	for i := k - 1; i >= 0; i-- {
+		if i < n-1 {
+			if i < m-1 {
+				a[i*lda+i] = 1
+				impl.Dlarf(blas.Right, m-i-1, n-i, a[i*lda+i:], 1, tau[i], a[(i+1)*lda+i:], lda, work)
+			}
+			bi.Dscal(n-i-1, -tau[i], a[i*lda+i+1:], 1)
+		}
+		a[i*lda+i] = 1 - tau[i]
+		for l := 0; l < i; l++ {
+			a[i*lda+l] = 0
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorglq.go
@ -0,0 +1,123 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dorglq generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors as computed by Dgelqf.
+//  Q = H_0 * H_1 * ... * H_{k-1}
+// Dorglq is the blocked version of Dorgl2 that makes greater use of level-3 BLAS
+// routines.
+//
+// len(tau) >= k, 0 <= k <= m, and 0 <= m <= n.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At minimum,
+// lwork >= m, and the amount of blocking is limited by the usable length.
+// If lwork == -1, instead of computing Dorglq the optimal work length is stored
+// into work[0].
+//
+// Dorglq will panic if the conditions on input values are not met.
+//
+// Dorglq is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < m:
+		panic(nLTM)
+	case k < 0:
+		panic(kLT0)
+	case k > m:
+		panic(kGTM)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, m) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	if m == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
+	if lwork == -1 {
+		work[0] = float64(m * nb)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	}
+
+	nbmin := 2 // Minimum block size
+	var nx int // Crossover size from blocked to unbloked code
+	iws := m   // Length of work needed
+	var ldwork int
+	if 1 < nb && nb < k {
+		nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
+		if nx < k {
+			ldwork = nb
+			iws = m * ldwork
+			if lwork < iws {
+				nb = lwork / m
+				ldwork = nb
+				nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
+			}
+		}
+	}
+
+	var ki, kk int
+	if nbmin <= nb && nb < k && nx < k {
+		// The first kk rows are handled by the blocked method.
+		ki = ((k - nx - 1) / nb) * nb
+		kk = min(k, ki+nb)
+		for i := kk; i < m; i++ {
+			for j := 0; j < kk; j++ {
+				a[i*lda+j] = 0
+			}
+		}
+	}
+	if kk < m {
+		// Perform the operation on colums kk to the end.
+		impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
+	}
+	if kk > 0 {
+		// Perform the operation on column-blocks
+		for i := ki; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			if i+ib < m {
+				impl.Dlarft(lapack.Forward, lapack.RowWise,
+					n-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+
+				impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
+					m-i-ib, n-i, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[(i+ib)*lda+i:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+			impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
+			for l := i; l < i+ib; l++ {
+				for j := 0; j < i; j++ {
+					a[l*lda+j] = 0
+				}
+			}
+		}
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgql.go
@ -0,0 +1,136 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dorgql generates the m×n matrix Q with orthonormal columns defined as the
+// last n columns of a product of k elementary reflectors of order m
+//  Q = H_{k-1} * ... * H_1 * H_0.
+//
+// It must hold that
+//  0 <= k <= n <= m,
+// and Dorgql will panic otherwise.
+//
+// On entry, the (n-k+i)-th column of A must contain the vector which defines
+// the elementary reflector H_i, for i=0,...,k-1, and tau[i] must contain its
+// scalar factor. On return, a contains the m×n matrix Q.
+//
+// tau must have length at least k, and Dorgql will panic otherwise.
+//
+// work must have length at least max(1,lwork), and lwork must be at least
+// max(1,n), otherwise Dorgql will panic. For optimum performance lwork must
+// be a sufficiently large multiple of n.
+//
+// If lwork == -1, instead of computing Dorgql the optimal work length is stored
+// into work[0].
+//
+// Dorgql is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorgql(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case n > m:
+		panic(nGTM)
+	case k < 0:
+		panic(kLT0)
+	case k > n:
+		panic(kGTN)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, n) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DORGQL", " ", m, n, k, -1)
+	if lwork == -1 {
+		work[0] = float64(n * nb)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	}
+
+	nbmin := 2
+	var nx, ldwork int
+	iws := n
+	if 1 < nb && nb < k {
+		// Determine when to cross over from blocked to unblocked code.
+		nx = max(0, impl.Ilaenv(3, "DORGQL", " ", m, n, k, -1))
+		if nx < k {
+			// Determine if workspace is large enough for blocked code.
+			iws = n * nb
+			if lwork < iws {
+				// Not enough workspace to use optimal nb: reduce nb and determine
+				// the minimum value of nb.
+				nb = lwork / n
+				nbmin = max(2, impl.Ilaenv(2, "DORGQL", " ", m, n, k, -1))
+			}
+			ldwork = nb
+		}
+	}
+
+	var kk int
+	if nbmin <= nb && nb < k && nx < k {
+		// Use blocked code after the first block. The last kk columns are handled
+		// by the block method.
+		kk = min(k, ((k-nx+nb-1)/nb)*nb)
+
+		// Set A(m-kk:m, 0:n-kk) to zero.
+		for i := m - kk; i < m; i++ {
+			for j := 0; j < n-kk; j++ {
+				a[i*lda+j] = 0
+			}
+		}
+	}
+
+	// Use unblocked code for the first or only block.
+	impl.Dorg2l(m-kk, n-kk, k-kk, a, lda, tau, work)
+	if kk > 0 {
+		// Use blocked code.
+		for i := k - kk; i < k; i += nb {
+			ib := min(nb, k-i)
+			if n-k+i > 0 {
+				// Form the triangular factor of the block reflector
+				// H = H_{i+ib-1} * ... * H_{i+1} * H_i.
+				impl.Dlarft(lapack.Backward, lapack.ColumnWise, m-k+i+ib, ib,
+					a[n-k+i:], lda, tau[i:], work, ldwork)
+
+				// Apply H to A[0:m-k+i+ib, 0:n-k+i] from the left.
+				impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Backward, lapack.ColumnWise,
+					m-k+i+ib, n-k+i, ib, a[n-k+i:], lda, work, ldwork,
+					a, lda, work[ib*ldwork:], ldwork)
+			}
+
+			// Apply H to rows 0:m-k+i+ib of current block.
+			impl.Dorg2l(m-k+i+ib, ib, ib, a[n-k+i:], lda, tau[i:], work)
+
+			// Set rows m-k+i+ib:m of current block to zero.
+			for j := n - k + i; j < n-k+i+ib; j++ {
+				for l := m - k + i + ib; l < m; l++ {
+					a[l*lda+j] = 0
+				}
+			}
+		}
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgqr.go
@ -0,0 +1,134 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
+// product of elementary reflectors
+//  Q = H_0 * H_1 * ... * H_{k-1}
+// as computed by Dgeqrf.
+// Dorgqr is the blocked version of Dorg2r that makes greater use of level-3 BLAS
+// routines.
+//
+// The length of tau must be at least k, and the length of work must be at least n.
+// It also must be that 0 <= k <= n and 0 <= n <= m.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At
+// minimum, lwork >= n, and the amount of blocking is limited by the usable
+// length. If lwork == -1, instead of computing Dorgqr the optimal work length
+// is stored into work[0].
+//
+// Dorgqr will panic if the conditions on input values are not met.
+//
+// Dorgqr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case n > m:
+		panic(nGTM)
+	case k < 0:
+		panic(kLT0)
+	case k > n:
+		panic(kGTN)
+	case lda < max(1, n) && lwork != -1:
+		// Normally, we follow the reference and require the leading
+		// dimension to be always valid, even in case of workspace
+		// queries. However, if a caller provided a placeholder value
+		// for lda (and a) when doing a workspace query that didn't
+		// fulfill the condition here, it would cause a panic. This is
+		// exactly what Dgesvd does.
+		panic(badLdA)
+	case lwork < max(1, n) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1)
+	// work is treated as an n×nb matrix
+	if lwork == -1 {
+		work[0] = float64(n * nb)
+		return
+	}
+
+	switch {
+	case len(a) < (m-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	}
+
+	nbmin := 2 // Minimum block size
+	var nx int // Crossover size from blocked to unbloked code
+	iws := n   // Length of work needed
+	var ldwork int
+	if 1 < nb && nb < k {
+		nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1))
+		if nx < k {
+			ldwork = nb
+			iws = n * ldwork
+			if lwork < iws {
+				nb = lwork / n
+				ldwork = nb
+				nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1))
+			}
+		}
+	}
+	var ki, kk int
+	if nbmin <= nb && nb < k && nx < k {
+		// The first kk columns are handled by the blocked method.
+		ki = ((k - nx - 1) / nb) * nb
+		kk = min(k, ki+nb)
+		for i := 0; i < kk; i++ {
+			for j := kk; j < n; j++ {
+				a[i*lda+j] = 0
+			}
+		}
+	}
+	if kk < n {
+		// Perform the operation on colums kk to the end.
+		impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
+	}
+	if kk > 0 {
+		// Perform the operation on column-blocks.
+		for i := ki; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			if i+ib < n {
+				impl.Dlarft(lapack.Forward, lapack.ColumnWise,
+					m-i, ib,
+					a[i*lda+i:], lda,
+					tau[i:],
+					work, ldwork)
+
+				impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise,
+					m-i, n-i-ib, ib,
+					a[i*lda+i:], lda,
+					work, ldwork,
+					a[i*lda+i+ib:], lda,
+					work[ib*ldwork:], ldwork)
+			}
+			impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:], work)
+			// Set rows 0:i-1 of current block to zero.
+			for j := i; j < i+ib; j++ {
+				for l := 0; l < i; l++ {
+					a[l*lda+j] = 0
+				}
+			}
+		}
+	}
+	work[0] = float64(iws)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorgtr.go
@ -0,0 +1,104 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dorgtr generates a real orthogonal matrix Q which is defined as the product
+// of n-1 elementary reflectors of order n as returned by Dsytrd.
+//
+// The construction of Q depends on the value of uplo:
+//  Q = H_{n-1} * ... * H_1 * H_0  if uplo == blas.Upper
+//  Q = H_0 * H_1 * ... * H_{n-1}  if uplo == blas.Lower
+// where H_i is constructed from the elementary reflectors as computed by Dsytrd.
+// See the documentation for Dsytrd for more information.
+//
+// tau must have length at least n-1, and Dorgtr will panic otherwise.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At
+// minimum, lwork >= max(1,n-1), and Dorgtr will panic otherwise. The amount of blocking
+// is limited by the usable length.
+// If lwork == -1, instead of computing Dorgtr the optimal work length is stored
+// into work[0].
+//
+// Dorgtr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorgtr(uplo blas.Uplo, n int, a []float64, lda int, tau, work []float64, lwork int) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, n-1) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	if n == 0 {
+		work[0] = 1
+		return
+	}
+
+	var nb int
+	if uplo == blas.Upper {
+		nb = impl.Ilaenv(1, "DORGQL", " ", n-1, n-1, n-1, -1)
+	} else {
+		nb = impl.Ilaenv(1, "DORGQR", " ", n-1, n-1, n-1, -1)
+	}
+	lworkopt := max(1, n-1) * nb
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(tau) < n-1:
+		panic(shortTau)
+	}
+
+	if uplo == blas.Upper {
+		// Q was determined by a call to Dsytrd with uplo == blas.Upper.
+		// Shift the vectors which define the elementary reflectors one column
+		// to the left, and set the last row and column of Q to those of the unit
+		// matrix.
+		for j := 0; j < n-1; j++ {
+			for i := 0; i < j; i++ {
+				a[i*lda+j] = a[i*lda+j+1]
+			}
+			a[(n-1)*lda+j] = 0
+		}
+		for i := 0; i < n-1; i++ {
+			a[i*lda+n-1] = 0
+		}
+		a[(n-1)*lda+n-1] = 1
+
+		// Generate Q[0:n-1, 0:n-1].
+		impl.Dorgql(n-1, n-1, n-1, a, lda, tau, work, lwork)
+	} else {
+		// Q was determined by a call to Dsytrd with uplo == blas.Upper.
+		// Shift the vectors which define the elementary reflectors one column
+		// to the right, and set the first row and column of Q to those of the unit
+		// matrix.
+		for j := n - 1; j > 0; j-- {
+			a[j] = 0
+			for i := j + 1; i < n; i++ {
+				a[i*lda+j] = a[i*lda+j-1]
+			}
+		}
+		a[0] = 1
+		for i := 1; i < n; i++ {
+			a[i*lda] = 0
+		}
+		if n > 1 {
+			// Generate Q[1:n, 1:n].
+			impl.Dorgqr(n-1, n-1, n-1, a[lda+1:], lda, tau, work, lwork)
+		}
+	}
+	work[0] = float64(lworkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorm2r.go
@ -0,0 +1,101 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dorm2r multiplies a general matrix C by an orthogonal matrix from a QR factorization
+// determined by Dgeqrf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of size m×k, and if side == blas.Right
+// a is of size n×k.
+//
+// tau contains the Householder factors and is of length at least k and this function
+// will panic otherwise.
+//
+// work is temporary storage of length at least n if side == blas.Left
+// and at least m if side == blas.Right and this function will panic otherwise.
+//
+// Dorm2r is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorm2r(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) {
+	left := side == blas.Left
+	switch {
+	case !left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.Trans && trans != blas.NoTrans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case left && k > m:
+		panic(kGTM)
+	case !left && k > n:
+		panic(kGTN)
+	case lda < max(1, k):
+		panic(badLdA)
+	case ldc < max(1, n):
+		panic(badLdC)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+
+	switch {
+	case left && len(a) < (m-1)*lda+k:
+		panic(shortA)
+	case !left && len(a) < (n-1)*lda+k:
+		panic(shortA)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case len(tau) < k:
+		panic(shortTau)
+	case left && len(work) < n:
+		panic(shortWork)
+	case !left && len(work) < m:
+		panic(shortWork)
+	}
+
+	if left {
+		if trans == blas.NoTrans {
+			for i := k - 1; i >= 0; i-- {
+				aii := a[i*lda+i]
+				a[i*lda+i] = 1
+				impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work)
+				a[i*lda+i] = aii
+			}
+			return
+		}
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], lda, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	}
+	if trans == blas.NoTrans {
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+		return
+	}
+	for i := k - 1; i >= 0; i-- {
+		aii := a[i*lda+i]
+		a[i*lda+i] = 1
+		impl.Dlarf(side, m, n-i, a[i*lda+i:], lda, tau[i], c[i:], ldc, work)
+		a[i*lda+i] = aii
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormbr.go
@ -0,0 +1,178 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dormbr applies a multiplicative update to the matrix C based on a
+// decomposition computed by Dgebrd.
+//
+// Dormbr overwrites the m×n matrix C with
+//  Q * C   if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.NoTrans
+//  C * Q   if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.NoTrans
+//  Q^T * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.Trans
+//  C * Q^T if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.Trans
+//
+//  P * C   if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans
+//  C * P   if vect == lapack.ApplyP, side == blas.Right, and trans == blas.NoTrans
+//  P^T * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.Trans
+//  C * P^T if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans
+// where P and Q are the orthogonal matrices determined by Dgebrd when reducing
+// a matrix A to bidiagonal form: A = Q * B * P^T. See Dgebrd for the
+// definitions of Q and P.
+//
+// If vect == lapack.ApplyQ, A is assumed to have been an nq×k matrix, while if
+// vect == lapack.ApplyP, A is assumed to have been a k×nq matrix. nq = m if
+// side == blas.Left, while nq = n if side == blas.Right.
+//
+// tau must have length min(nq,k), and Dormbr will panic otherwise. tau contains
+// the elementary reflectors to construct Q or P depending on the value of
+// vect.
+//
+// work must have length at least max(1,lwork), and lwork must be either -1 or
+// at least max(1,n) if side == blas.Left, and at least max(1,m) if side ==
+// blas.Right. For optimum performance lwork should be at least n*nb if side ==
+// blas.Left, and at least m*nb if side == blas.Right, where nb is the optimal
+// block size. On return, work[0] will contain the optimal value of lwork.
+//
+// If lwork == -1, the function only calculates the optimal value of lwork and
+// returns it in work[0].
+//
+// Dormbr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dormbr(vect lapack.ApplyOrtho, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	nq := n
+	nw := m
+	if side == blas.Left {
+		nq = m
+		nw = n
+	}
+	applyQ := vect == lapack.ApplyQ
+	switch {
+	case !applyQ && vect != lapack.ApplyP:
+		panic(badApplyOrtho)
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.NoTrans && trans != blas.Trans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case applyQ && lda < max(1, min(nq, k)):
+		panic(badLdA)
+	case !applyQ && lda < max(1, nq):
+		panic(badLdA)
+	case ldc < max(1, n):
+		panic(badLdC)
+	case lwork < max(1, nw) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		work[0] = 1
+		return
+	}
+
+	// The current implementation does not use opts, but a future change may
+	// use these options so construct them.
+	var opts string
+	if side == blas.Left {
+		opts = "L"
+	} else {
+		opts = "R"
+	}
+	if trans == blas.Trans {
+		opts += "T"
+	} else {
+		opts += "N"
+	}
+	var nb int
+	if applyQ {
+		if side == blas.Left {
+			nb = impl.Ilaenv(1, "DORMQR", opts, m-1, n, m-1, -1)
+		} else {
+			nb = impl.Ilaenv(1, "DORMQR", opts, m, n-1, n-1, -1)
+		}
+	} else {
+		if side == blas.Left {
+			nb = impl.Ilaenv(1, "DORMLQ", opts, m-1, n, m-1, -1)
+		} else {
+			nb = impl.Ilaenv(1, "DORMLQ", opts, m, n-1, n-1, -1)
+		}
+	}
+	lworkopt := max(1, nw) * nb
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	minnqk := min(nq, k)
+	switch {
+	case applyQ && len(a) < (nq-1)*lda+minnqk:
+		panic(shortA)
+	case !applyQ && len(a) < (minnqk-1)*lda+nq:
+		panic(shortA)
+	case len(tau) < minnqk:
+		panic(shortTau)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	}
+
+	if applyQ {
+		// Change the operation to get Q depending on the size of the initial
+		// matrix to Dgebrd. The size matters due to the storage location of
+		// the off-diagonal elements.
+		if nq >= k {
+			impl.Dormqr(side, trans, m, n, k, a, lda, tau[:k], c, ldc, work, lwork)
+		} else if nq > 1 {
+			mi := m
+			ni := n - 1
+			i1 := 0
+			i2 := 1
+			if side == blas.Left {
+				mi = m - 1
+				ni = n
+				i1 = 1
+				i2 = 0
+			}
+			impl.Dormqr(side, trans, mi, ni, nq-1, a[1*lda:], lda, tau[:nq-1], c[i1*ldc+i2:], ldc, work, lwork)
+		}
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	transt := blas.Trans
+	if trans == blas.Trans {
+		transt = blas.NoTrans
+	}
+
+	// Change the operation to get P depending on the size of the initial
+	// matrix to Dgebrd. The size matters due to the storage location of
+	// the off-diagonal elements.
+	if nq > k {
+		impl.Dormlq(side, transt, m, n, k, a, lda, tau, c, ldc, work, lwork)
+	} else if nq > 1 {
+		mi := m
+		ni := n - 1
+		i1 := 0
+		i2 := 1
+		if side == blas.Left {
+			mi = m - 1
+			ni = n
+			i1 = 1
+			i2 = 0
+		}
+		impl.Dormlq(side, transt, mi, ni, nq-1, a[1:], lda, tau, c[i1*ldc+i2:], ldc, work, lwork)
+	}
+	work[0] = float64(lworkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormhr.go
@ -0,0 +1,129 @@
+// Copyright ©2016 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dormhr multiplies an m×n general matrix C with an nq×nq orthogonal matrix Q
+//  Q * C,    if side == blas.Left and trans == blas.NoTrans,
+//  Q^T * C,  if side == blas.Left and trans == blas.Trans,
+//  C * Q,    if side == blas.Right and trans == blas.NoTrans,
+//  C * Q^T,  if side == blas.Right and trans == blas.Trans,
+// where nq == m if side == blas.Left and nq == n if side == blas.Right.
+//
+// Q is defined implicitly as the product of ihi-ilo elementary reflectors, as
+// returned by Dgehrd:
+//  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
+// Q is equal to the identity matrix except in the submatrix
+// Q[ilo+1:ihi+1,ilo+1:ihi+1].
+//
+// ilo and ihi must have the same values as in the previous call of Dgehrd. It
+// must hold that
+//  0 <= ilo <= ihi < m,   if m > 0 and side == blas.Left,
+//  ilo = 0 and ihi = -1,  if m = 0 and side == blas.Left,
+//  0 <= ilo <= ihi < n,   if n > 0 and side == blas.Right,
+//  ilo = 0 and ihi = -1,  if n = 0 and side == blas.Right.
+//
+// a and lda represent an m×m matrix if side == blas.Left and an n×n matrix if
+// side == blas.Right. The matrix contains vectors which define the elementary
+// reflectors, as returned by Dgehrd.
+//
+// tau contains the scalar factors of the elementary reflectors, as returned by
+// Dgehrd. tau must have length m-1 if side == blas.Left and n-1 if side ==
+// blas.Right.
+//
+// c and ldc represent the m×n matrix C. On return, c is overwritten by the
+// product with Q.
+//
+// work must have length at least max(1,lwork), and lwork must be at least
+// max(1,n), if side == blas.Left, and max(1,m), if side == blas.Right. For
+// optimum performance lwork should be at least n*nb if side == blas.Left and
+// m*nb if side == blas.Right, where nb is the optimal block size. On return,
+// work[0] will contain the optimal value of lwork.
+//
+// If lwork == -1, instead of performing Dormhr, only the optimal value of lwork
+// will be stored in work[0].
+//
+// If any requirement on input sizes is not met, Dormhr will panic.
+//
+// Dormhr is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dormhr(side blas.Side, trans blas.Transpose, m, n, ilo, ihi int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	nq := n // The order of Q.
+	nw := m // The minimum length of work.
+	if side == blas.Left {
+		nq = m
+		nw = n
+	}
+	switch {
+	case side != blas.Left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.NoTrans && trans != blas.Trans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case ilo < 0 || max(1, nq) <= ilo:
+		panic(badIlo)
+	case ihi < min(ilo, nq-1) || nq <= ihi:
+		panic(badIhi)
+	case lda < max(1, nq):
+		panic(badLdA)
+	case lwork < max(1, nw) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 {
+		work[0] = 1
+		return
+	}
+
+	nh := ihi - ilo
+	var nb int
+	if side == blas.Left {
+		opts := "LN"
+		if trans == blas.Trans {
+			opts = "LT"
+		}
+		nb = impl.Ilaenv(1, "DORMQR", opts, nh, n, nh, -1)
+	} else {
+		opts := "RN"
+		if trans == blas.Trans {
+			opts = "RT"
+		}
+		nb = impl.Ilaenv(1, "DORMQR", opts, m, nh, nh, -1)
+	}
+	lwkopt := max(1, nw) * nb
+	if lwork == -1 {
+		work[0] = float64(lwkopt)
+		return
+	}
+
+	if nh == 0 {
+		work[0] = 1
+		return
+	}
+
+	switch {
+	case len(a) < (nq-1)*lda+nq:
+		panic(shortA)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case len(tau) != nq-1:
+		panic(badLenTau)
+	}
+
+	if side == blas.Left {
+		impl.Dormqr(side, trans, nh, n, nh, a[(ilo+1)*lda+ilo:], lda,
+			tau[ilo:ihi], c[(ilo+1)*ldc:], ldc, work, lwork)
+	} else {
+		impl.Dormqr(side, trans, m, nh, nh, a[(ilo+1)*lda+ilo:], lda,
+			tau[ilo:ihi], c[ilo+1:], ldc, work, lwork)
+	}
+	work[0] = float64(lwkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dorml2.go
@ -0,0 +1,102 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dorml2 multiplies a general matrix C by an orthogonal matrix from an LQ factorization
+// determined by Dgelqf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of side k×m, and if side == blas.Right
+// a is of size k×n.
+//
+// tau contains the Householder factors and is of length at least k and this function will
+// panic otherwise.
+//
+// work is temporary storage of length at least n if side == blas.Left
+// and at least m if side == blas.Right and this function will panic otherwise.
+//
+// Dorml2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dorml2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) {
+	left := side == blas.Left
+	switch {
+	case !left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.Trans && trans != blas.NoTrans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case left && k > m:
+		panic(kGTM)
+	case !left && k > n:
+		panic(kGTN)
+	case left && lda < max(1, m):
+		panic(badLdA)
+	case !left && lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+
+	switch {
+	case left && len(a) < (k-1)*lda+m:
+		panic(shortA)
+	case !left && len(a) < (k-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case left && len(work) < n:
+		panic(shortWork)
+	case !left && len(work) < m:
+		panic(shortWork)
+	}
+
+	notrans := trans == blas.NoTrans
+	switch {
+	case left && notrans:
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+
+	case left && !notrans:
+		for i := k - 1; i >= 0; i-- {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m-i, n, a[i*lda+i:], 1, tau[i], c[i*ldc:], ldc, work)
+			a[i*lda+i] = aii
+		}
+
+	case !left && notrans:
+		for i := k - 1; i >= 0; i-- {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+
+	case !left && !notrans:
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+i]
+			a[i*lda+i] = 1
+			impl.Dlarf(side, m, n-i, a[i*lda+i:], 1, tau[i], c[i:], ldc, work)
+			a[i*lda+i] = aii
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormlq.go
@ -0,0 +1,174 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dormlq multiplies the matrix C by the orthogonal matrix Q defined by the
+// slices a and tau. A and tau are as returned from Dgelqf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right
+// A is of size k×n. This uses a blocked algorithm.
+//
+// work is temporary storage, and lwork specifies the usable memory length.
+// At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
+// and this function will panic otherwise.
+// Dormlq uses a block algorithm, but the block size is limited
+// by the temporary space available. If lwork == -1, instead of performing Dormlq,
+// the optimal work length will be stored into work[0].
+//
+// tau contains the Householder scales and must have length at least k, and
+// this function will panic otherwise.
+func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	left := side == blas.Left
+	nw := m
+	if left {
+		nw = n
+	}
+	switch {
+	case !left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.Trans && trans != blas.NoTrans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case left && k > m:
+		panic(kGTM)
+	case !left && k > n:
+		panic(kGTN)
+	case left && lda < max(1, m):
+		panic(badLdA)
+	case !left && lda < max(1, n):
+		panic(badLdA)
+	case lwork < max(1, nw) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 || k == 0 {
+		work[0] = 1
+		return
+	}
+
+	const (
+		nbmax = 64
+		ldt   = nbmax
+		tsize = nbmax * ldt
+	)
+	opts := string(side) + string(trans)
+	nb := min(nbmax, impl.Ilaenv(1, "DORMLQ", opts, m, n, k, -1))
+	lworkopt := max(1, nw)*nb + tsize
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	switch {
+	case left && len(a) < (k-1)*lda+m:
+		panic(shortA)
+	case !left && len(a) < (k-1)*lda+n:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	}
+
+	nbmin := 2
+	if 1 < nb && nb < k {
+		iws := nw*nb + tsize
+		if lwork < iws {
+			nb = (lwork - tsize) / nw
+			nbmin = max(2, impl.Ilaenv(2, "DORMLQ", opts, m, n, k, -1))
+		}
+	}
+	if nb < nbmin || k <= nb {
+		// Call unblocked code.
+		impl.Dorml2(side, trans, m, n, k, a, lda, tau, c, ldc, work)
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	t := work[:tsize]
+	wrk := work[tsize:]
+	ldwrk := nb
+
+	notrans := trans == blas.NoTrans
+	transt := blas.NoTrans
+	if notrans {
+		transt = blas.Trans
+	}
+
+	switch {
+	case left && notrans:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				wrk, ldwrk)
+		}
+
+	case left && !notrans:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i*ldc:], ldc,
+				wrk, ldwrk)
+		}
+
+	case !left && notrans:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				wrk, ldwrk)
+		}
+
+	case !left && !notrans:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.RowWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				t, ldt)
+			impl.Dlarfb(side, transt, lapack.Forward, lapack.RowWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				t, ldt,
+				c[i:], ldc,
+				wrk, ldwrk)
+		}
+	}
+	work[0] = float64(lworkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormqr.go
@ -0,0 +1,177 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/lapack"
+)
+
+// Dormqr multiplies an m×n matrix C by an orthogonal matrix Q as
+//  C = Q * C,    if side == blas.Left  and trans == blas.NoTrans,
+//  C = Q^T * C,  if side == blas.Left  and trans == blas.Trans,
+//  C = C * Q,    if side == blas.Right and trans == blas.NoTrans,
+//  C = C * Q^T,  if side == blas.Right and trans == blas.Trans,
+// where Q is defined as the product of k elementary reflectors
+//  Q = H_0 * H_1 * ... * H_{k-1}.
+//
+// If side == blas.Left, A is an m×k matrix and 0 <= k <= m.
+// If side == blas.Right, A is an n×k matrix and 0 <= k <= n.
+// The ith column of A contains the vector which defines the elementary
+// reflector H_i and tau[i] contains its scalar factor. tau must have length k
+// and Dormqr will panic otherwise. Dgeqrf returns A and tau in the required
+// form.
+//
+// work must have length at least max(1,lwork), and lwork must be at least n if
+// side == blas.Left and at least m if side == blas.Right, otherwise Dormqr will
+// panic.
+//
+// work is temporary storage, and lwork specifies the usable memory length. At
+// minimum, lwork >= m if side == blas.Left and lwork >= n if side ==
+// blas.Right, and this function will panic otherwise. Larger values of lwork
+// will generally give better performance. On return, work[0] will contain the
+// optimal value of lwork.
+//
+// If lwork is -1, instead of performing Dormqr, the optimal workspace size will
+// be stored into work[0].
+func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
+	left := side == blas.Left
+	nq := n
+	nw := m
+	if left {
+		nq = m
+		nw = n
+	}
+	switch {
+	case !left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.NoTrans && trans != blas.Trans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case left && k > m:
+		panic(kGTM)
+	case !left && k > n:
+		panic(kGTN)
+	case lda < max(1, k):
+		panic(badLdA)
+	case ldc < max(1, n):
+		panic(badLdC)
+	case lwork < max(1, nw) && lwork != -1:
+		panic(badLWork)
+	case len(work) < max(1, lwork):
+		panic(shortWork)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 || k == 0 {
+		work[0] = 1
+		return
+	}
+
+	const (
+		nbmax = 64
+		ldt   = nbmax
+		tsize = nbmax * ldt
+	)
+	opts := string(side) + string(trans)
+	nb := min(nbmax, impl.Ilaenv(1, "DORMQR", opts, m, n, k, -1))
+	lworkopt := max(1, nw)*nb + tsize
+	if lwork == -1 {
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	switch {
+	case len(a) < (nq-1)*lda+k:
+		panic(shortA)
+	case len(tau) != k:
+		panic(badLenTau)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	}
+
+	nbmin := 2
+	if 1 < nb && nb < k {
+		if lwork < nw*nb+tsize {
+			nb = (lwork - tsize) / nw
+			nbmin = max(2, impl.Ilaenv(2, "DORMQR", opts, m, n, k, -1))
+		}
+	}
+
+	if nb < nbmin || k <= nb {
+		// Call unblocked code.
+		impl.Dorm2r(side, trans, m, n, k, a, lda, tau, c, ldc, work)
+		work[0] = float64(lworkopt)
+		return
+	}
+
+	var (
+		ldwork  = nb
+		notrans = trans == blas.NoTrans
+	)
+	switch {
+	case left && notrans:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work[:tsize], ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				work[:tsize], ldt,
+				c[i*ldc:], ldc,
+				work[tsize:], ldwork)
+		}
+
+	case left && !notrans:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work[:tsize], ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m-i, n, ib,
+				a[i*lda+i:], lda,
+				work[:tsize], ldt,
+				c[i*ldc:], ldc,
+				work[tsize:], ldwork)
+		}
+
+	case !left && notrans:
+		for i := 0; i < k; i += nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work[:tsize], ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				work[:tsize], ldt,
+				c[i:], ldc,
+				work[tsize:], ldwork)
+		}
+
+	case !left && !notrans:
+		for i := ((k - 1) / nb) * nb; i >= 0; i -= nb {
+			ib := min(nb, k-i)
+			impl.Dlarft(lapack.Forward, lapack.ColumnWise, n-i, ib,
+				a[i*lda+i:], lda,
+				tau[i:],
+				work[:tsize], ldt)
+			impl.Dlarfb(side, trans, lapack.Forward, lapack.ColumnWise, m, n-i, ib,
+				a[i*lda+i:], lda,
+				work[:tsize], ldt,
+				c[i:], ldc,
+				work[tsize:], ldwork)
+		}
+	}
+	work[0] = float64(lworkopt)
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dormr2.go
@ -0,0 +1,103 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dormr2 multiplies a general matrix C by an orthogonal matrix from a RQ factorization
+// determined by Dgerqf.
+//  C = Q * C    if side == blas.Left and trans == blas.NoTrans
+//  C = Q^T * C  if side == blas.Left and trans == blas.Trans
+//  C = C * Q    if side == blas.Right and trans == blas.NoTrans
+//  C = C * Q^T  if side == blas.Right and trans == blas.Trans
+// If side == blas.Left, a is a matrix of size k×m, and if side == blas.Right
+// a is of size k×n.
+//
+// tau contains the Householder factors and is of length at least k and this function
+// will panic otherwise.
+//
+// work is temporary storage of length at least n if side == blas.Left
+// and at least m if side == blas.Right and this function will panic otherwise.
+//
+// Dormr2 is an internal routine. It is exported for testing purposes.
+func (impl Implementation) Dormr2(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64) {
+	left := side == blas.Left
+	nq := n
+	nw := m
+	if left {
+		nq = m
+		nw = n
+	}
+	switch {
+	case !left && side != blas.Right:
+		panic(badSide)
+	case trans != blas.NoTrans && trans != blas.Trans:
+		panic(badTrans)
+	case m < 0:
+		panic(mLT0)
+	case n < 0:
+		panic(nLT0)
+	case k < 0:
+		panic(kLT0)
+	case left && k > m:
+		panic(kGTM)
+	case !left && k > n:
+		panic(kGTN)
+	case lda < max(1, nq):
+		panic(badLdA)
+	case ldc < max(1, n):
+		panic(badLdC)
+	}
+
+	// Quick return if possible.
+	if m == 0 || n == 0 || k == 0 {
+		return
+	}
+
+	switch {
+	case len(a) < (k-1)*lda+nq:
+		panic(shortA)
+	case len(tau) < k:
+		panic(shortTau)
+	case len(c) < (m-1)*ldc+n:
+		panic(shortC)
+	case len(work) < nw:
+		panic(shortWork)
+	}
+
+	if left {
+		if trans == blas.NoTrans {
+			for i := k - 1; i >= 0; i-- {
+				aii := a[i*lda+(m-k+i)]
+				a[i*lda+(m-k+i)] = 1
+				impl.Dlarf(side, m-k+i+1, n, a[i*lda:], 1, tau[i], c, ldc, work)
+				a[i*lda+(m-k+i)] = aii
+			}
+			return
+		}
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+(m-k+i)]
+			a[i*lda+(m-k+i)] = 1
+			impl.Dlarf(side, m-k+i+1, n, a[i*lda:], 1, tau[i], c, ldc, work)
+			a[i*lda+(m-k+i)] = aii
+		}
+		return
+	}
+	if trans == blas.NoTrans {
+		for i := 0; i < k; i++ {
+			aii := a[i*lda+(n-k+i)]
+			a[i*lda+(n-k+i)] = 1
+			impl.Dlarf(side, m, n-k+i+1, a[i*lda:], 1, tau[i], c, ldc, work)
+			a[i*lda+(n-k+i)] = aii
+		}
+		return
+	}
+	for i := k - 1; i >= 0; i-- {
+		aii := a[i*lda+(n-k+i)]
+		a[i*lda+(n-k+i)] = 1
+		impl.Dlarf(side, m, n-k+i+1, a[i*lda:], 1, tau[i], c, ldc, work)
+		a[i*lda+(n-k+i)] = aii
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpbtf2.go
@ -0,0 +1,110 @@
+// Copyright ©2017 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dpbtf2 computes the Cholesky factorization of a symmetric positive banded
+// matrix ab. The matrix ab is n×n with kd diagonal bands. The Cholesky
+// factorization computed is
+//  A = U^T * U if ul == blas.Upper
+//  A = L * L^T if ul == blas.Lower
+// ul also specifies the storage of ab. If ul == blas.Upper, then
+// ab is stored as an upper-triangular banded matrix with kd super-diagonals,
+// and if ul == blas.Lower, ab is stored as a lower-triangular banded matrix
+// with kd sub-diagonals. On exit, the banded matrix U or L is stored in-place
+// into ab depending on the value of ul. Dpbtf2 returns whether the factorization
+// was successfully completed.
+//
+// The band storage scheme is illustrated below when n = 6, and kd = 2.
+// The resulting Cholesky decomposition is stored in the same elements as the
+// input band matrix (a11 becomes u11 or l11, etc.).
+//
+//  ul = blas.Upper
+//  a11 a12 a13
+//  a22 a23 a24
+//  a33 a34 a35
+//  a44 a45 a46
+//  a55 a56  *
+//  a66  *   *
+//
+//  ul = blas.Lower
+//   *   *  a11
+//   *  a21 a22
+//  a31 a32 a33
+//  a42 a43 a44
+//  a53 a54 a55
+//  a64 a65 a66
+//
+// Dpbtf2 is the unblocked version of the algorithm, see Dpbtrf for the blocked
+// version.
+//
+// Dpbtf2 is an internal routine, exported for testing purposes.
+func (Implementation) Dpbtf2(ul blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
+	switch {
+	case ul != blas.Upper && ul != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case kd < 0:
+		panic(kdLT0)
+	case ldab < kd+1:
+		panic(badLdA)
+	}
+
+	if n == 0 {
+		return
+	}
+
+	if len(ab) < (n-1)*ldab+kd {
+		panic(shortAB)
+	}
+
+	bi := blas64.Implementation()
+
+	kld := max(1, ldab-1)
+	if ul == blas.Upper {
+		for j := 0; j < n; j++ {
+			// Compute U(J,J) and test for non positive-definiteness.
+			ajj := ab[j*ldab]
+			if ajj <= 0 {
+				return false
+			}
+			ajj = math.Sqrt(ajj)
+			ab[j*ldab] = ajj
+			// Compute elements j+1:j+kn of row J and update the trailing submatrix
+			// within the band.
+			kn := min(kd, n-j-1)
+			if kn > 0 {
+				bi.Dscal(kn, 1/ajj, ab[j*ldab+1:], 1)
+				bi.Dsyr(blas.Upper, kn, -1, ab[j*ldab+1:], 1, ab[(j+1)*ldab:], kld)
+			}
+		}
+		return true
+	}
+	for j := 0; j < n; j++ {
+		// Compute L(J,J) and test for non positive-definiteness.
+		ajj := ab[j*ldab+kd]
+		if ajj <= 0 {
+			return false
+		}
+		ajj = math.Sqrt(ajj)
+		ab[j*ldab+kd] = ajj
+
+		// Compute elements J+1:J+KN of column J and update the trailing submatrix
+		// within the band.
+		kn := min(kd, n-j-1)
+		if kn > 0 {
+			bi.Dscal(kn, 1/ajj, ab[(j+1)*ldab+kd-1:], kld)
+			bi.Dsyr(blas.Lower, kn, -1, ab[(j+1)*ldab+kd-1:], kld, ab[(j+1)*ldab+kd:], kld)
+		}
+	}
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpocon.go
@ -0,0 +1,90 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dpocon estimates the reciprocal of the condition number of a positive-definite
+// matrix A given the Cholesky decomposition of A. The condition number computed
+// is based on the 1-norm and the ∞-norm.
+//
+// anorm is the 1-norm and the ∞-norm of the original matrix A.
+//
+// work is a temporary data slice of length at least 3*n and Dpocon will panic otherwise.
+//
+// iwork is a temporary data slice of length at least n and Dpocon will panic otherwise.
+func (impl Implementation) Dpocon(uplo blas.Uplo, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	case anorm < 0:
+		panic(negANorm)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return 1
+	}
+
+	switch {
+	case len(a) < (n-1)*lda+n:
+		panic(shortA)
+	case len(work) < 3*n:
+		panic(shortWork)
+	case len(iwork) < n:
+		panic(shortIWork)
+	}
+
+	if anorm == 0 {
+		return 0
+	}
+
+	bi := blas64.Implementation()
+
+	var (
+		smlnum = dlamchS
+		rcond  float64
+		sl, su float64
+		normin bool
+		ainvnm float64
+		kase   int
+		isave  [3]int
+	)
+	for {
+		ainvnm, kase = impl.Dlacn2(n, work[n:], work, iwork, ainvnm, kase, &isave)
+		if kase == 0 {
+			if ainvnm != 0 {
+				rcond = (1 / ainvnm) / anorm
+			}
+			return rcond
+		}
+		if uplo == blas.Upper {
+			sl = impl.Dlatrs(blas.Upper, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[2*n:])
+			normin = true
+			su = impl.Dlatrs(blas.Upper, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[2*n:])
+		} else {
+			sl = impl.Dlatrs(blas.Lower, blas.NoTrans, blas.NonUnit, normin, n, a, lda, work, work[2*n:])
+			normin = true
+			su = impl.Dlatrs(blas.Lower, blas.Trans, blas.NonUnit, normin, n, a, lda, work, work[2*n:])
+		}
+		scale := sl * su
+		if scale != 1 {
+			ix := bi.Idamax(n, work, 1)
+			if scale == 0 || scale < math.Abs(work[ix])*smlnum {
+				return rcond
+			}
+			impl.Drscl(n, scale, work, 1)
+		}
+	}
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotf2.go
@ -0,0 +1,82 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"math"
+
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dpotf2 computes the Cholesky decomposition of the symmetric positive definite
+// matrix a. If ul == blas.Upper, then a is stored as an upper-triangular matrix,
+// and a = U^T U is stored in place into a. If ul == blas.Lower, then a = L L^T
+// is computed and stored in-place into a. If a is not positive definite, false
+// is returned. This is the unblocked version of the algorithm.
+//
+// Dpotf2 is an internal routine. It is exported for testing purposes.
+func (Implementation) Dpotf2(ul blas.Uplo, n int, a []float64, lda int) (ok bool) {
+	switch {
+	case ul != blas.Upper && ul != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	bi := blas64.Implementation()
+
+	if ul == blas.Upper {
+		for j := 0; j < n; j++ {
+			ajj := a[j*lda+j]
+			if j != 0 {
+				ajj -= bi.Ddot(j, a[j:], lda, a[j:], lda)
+			}
+			if ajj <= 0 || math.IsNaN(ajj) {
+				a[j*lda+j] = ajj
+				return false
+			}
+			ajj = math.Sqrt(ajj)
+			a[j*lda+j] = ajj
+			if j < n-1 {
+				bi.Dgemv(blas.Trans, j, n-j-1,
+					-1, a[j+1:], lda, a[j:], lda,
+					1, a[j*lda+j+1:], 1)
+				bi.Dscal(n-j-1, 1/ajj, a[j*lda+j+1:], 1)
+			}
+		}
+		return true
+	}
+	for j := 0; j < n; j++ {
+		ajj := a[j*lda+j]
+		if j != 0 {
+			ajj -= bi.Ddot(j, a[j*lda:], 1, a[j*lda:], 1)
+		}
+		if ajj <= 0 || math.IsNaN(ajj) {
+			a[j*lda+j] = ajj
+			return false
+		}
+		ajj = math.Sqrt(ajj)
+		a[j*lda+j] = ajj
+		if j < n-1 {
+			bi.Dgemv(blas.NoTrans, n-j-1, j,
+				-1, a[(j+1)*lda:], lda, a[j*lda:], 1,
+				1, a[(j+1)*lda+j:], lda)
+			bi.Dscal(n-j-1, 1/ajj, a[(j+1)*lda+j:], lda)
+		}
+	}
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotrf.go
@ -0,0 +1,81 @@
+// Copyright ©2015 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import (
+	"gonum.org/v1/gonum/blas"
+	"gonum.org/v1/gonum/blas/blas64"
+)
+
+// Dpotrf computes the Cholesky decomposition of the symmetric positive definite
+// matrix a. If ul == blas.Upper, then a is stored as an upper-triangular matrix,
+// and a = U^T U is stored in place into a. If ul == blas.Lower, then a = L L^T
+// is computed and stored in-place into a. If a is not positive definite, false
+// is returned. This is the blocked version of the algorithm.
+func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool) {
+	switch {
+	case ul != blas.Upper && ul != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	nb := impl.Ilaenv(1, "DPOTRF", string(ul), n, -1, -1, -1)
+	if nb <= 1 || n <= nb {
+		return impl.Dpotf2(ul, n, a, lda)
+	}
+	bi := blas64.Implementation()
+	if ul == blas.Upper {
+		for j := 0; j < n; j += nb {
+			jb := min(nb, n-j)
+			bi.Dsyrk(blas.Upper, blas.Trans, jb, j,
+				-1, a[j:], lda,
+				1, a[j*lda+j:], lda)
+			ok = impl.Dpotf2(blas.Upper, jb, a[j*lda+j:], lda)
+			if !ok {
+				return ok
+			}
+			if j+jb < n {
+				bi.Dgemm(blas.Trans, blas.NoTrans, jb, n-j-jb, j,
+					-1, a[j:], lda, a[j+jb:], lda,
+					1, a[j*lda+j+jb:], lda)
+				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, jb, n-j-jb,
+					1, a[j*lda+j:], lda,
+					a[j*lda+j+jb:], lda)
+			}
+		}
+		return true
+	}
+	for j := 0; j < n; j += nb {
+		jb := min(nb, n-j)
+		bi.Dsyrk(blas.Lower, blas.NoTrans, jb, j,
+			-1, a[j*lda:], lda,
+			1, a[j*lda+j:], lda)
+		ok := impl.Dpotf2(blas.Lower, jb, a[j*lda+j:], lda)
+		if !ok {
+			return ok
+		}
+		if j+jb < n {
+			bi.Dgemm(blas.NoTrans, blas.Trans, n-j-jb, jb, j,
+				-1, a[(j+jb)*lda:], lda, a[j*lda:], lda,
+				1, a[(j+jb)*lda+j:], lda)
+			bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, n-j-jb, jb,
+				1, a[j*lda+j:], lda,
+				a[(j+jb)*lda+j:], lda)
+		}
+	}
+	return true
+}
--- a/vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go
+++ b/vendor/gonum.org/v1/gonum/lapack/gonum/dpotri.go
@ -0,0 +1,44 @@
+// Copyright ©2019 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gonum
+
+import "gonum.org/v1/gonum/blas"
+
+// Dpotri computes the inverse of a real symmetric positive definite matrix A
+// using its Cholesky factorization.
+//
+// On entry, a contains the triangular factor U or L from the Cholesky
+// factorization A = U^T*U or A = L*L^T, as computed by Dpotrf.
+// On return, a contains the upper or lower triangle of the (symmetric)
+// inverse of A, overwriting the input factor U or L.
+func (impl Implementation) Dpotri(uplo blas.Uplo, n int, a []float64, lda int) (ok bool) {
+	switch {
+	case uplo != blas.Upper && uplo != blas.Lower:
+		panic(badUplo)
+	case n < 0:
+		panic(nLT0)
+	case lda < max(1, n):
+		panic(badLdA)
+	}
+
+	// Quick return if possible.
+	if n == 0 {
+		return true
+	}
+
+	if len(a) < (n-1)*lda+n {
+		panic(shortA)
+	}
+
+	// Invert the triangular Cholesky factor U or L.
+	ok = impl.Dtrtri(uplo, blas.NonUnit, n, a, lda)
+	if !ok {
+		return false
+	}
+
+	// Form inv(U)*inv(U)^T or inv(L)^T*inv(L).
+	impl.Dlauum(uplo, n, a, lda)
+	return true
+}
--- a/Show more
+++ b/Show more