Tuesday, March 08, 2005

LCPSolver.java

Two classes are implemented and tested in this code. This is my first attempt of using Colt extensively. SlowLCP is a simple but slow implementation for LCP bookkeeping. In class SlowLCP, the index set C and N are implemented by BooleanArrayList. I don't know a good way to solve LDL' yet, so I just use a more generic but less efficient LUDecompositionQuick. It is very obvious that solveLCPBasic directly translates the pseudo-code from Baraff.
import cern.colt.Timer;
import cern.colt.matrix.*;
import cern.colt.matrix.impl.*;
import cern.colt.list.BooleanArrayList;
import cern.colt.matrix.linalg.LUDecompositionQuick;
import cern.colt.list.IntArrayList;


/*
THE ALGORITHM
-------------

solve A*x = b+w, with x and w subject to certain LCP conditions.
each x(i),w(i) must lie on one of the three line segments in the following
diagram. each line segment corresponds to one index set :

w(i)
/|\ | :
| | :
| |i in N :
w>0 | |state[i]=0 :
| | :
| | : i in C
w=0 + +-----------------------+
| : |
| : |
w<0 | : |i in N
| : |state[i]=1
| : |
| : |
+-------|-----------|-----------|----------> x(i)
lo 0 hi

the Dantzig algorithm proceeds as follows:
for i=1:n
* if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
negative towards the line. as this is done, the other (x(j),w(j))
for j<i are constrained to be on the line. if any (x,w) reaches the
end of a line segment then it is switched between index sets.
* i is added to the appropriate index set depending on what line segment
it hits.

we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
simpler, because the starting point for x(i),w(i) is always on the dotted
line x=0 and x will only ever increase in one direction, so it can only hit
two out of the three line segments.


NOTES
-----

this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
the implementation is split into an LCP problem object (dLCP) and an LCP
driver function. most optimization occurs in the dLCP object.

a naive implementation of the algorithm requires either a lot of data motion
or a lot of permutation-array lookup, because we are constantly re-ordering
rows and columns. to avoid this and make a more optimized algorithm, a
non-trivial data structure is used to represent the matrix A (this is
implemented in the fast version of the dLCP object).

during execution of this algorithm, some indexes in A are clamped (set C),
some are non-clamped (set N), and some are "don't care" (where x=0).
A,x,b,w (and other problem vectors) are permuted such that the clamped
indexes are first, the unclamped indexes are next, and the don't-care
indexes are last. this permutation is recorded in the array `p'.
initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
the corresponding elements of p are swapped.

because the C and N elements are grouped together in the rows of A, we can do
lots of work with a fast dot product function. if A,x,etc were not permuted
and we only had a permutation array, then those dot products would be much
slower as we would have a permutation array lookup in some inner loops.

A is accessed through an array of row pointers, so that element (i,j) of the
permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
we still have to actually move the data.

during execution of this algorithm we maintain an L*D*L' factorization of
the clamped submatrix of A (call it `AC') which is the top left nC*nC
submatrix of A. there are two ways we could arrange the rows/columns in AC.

(1) AC is always permuted such that L*D*L' = AC. this causes a problem
when a row/column is removed from C, because then all the rows/columns of A
between the deleted index and the end of C need to be rotated downward.
this results in a lot of data motion and slows things down.
(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
itself a permutation of the underlying A). this is what we do - the
permutation is recorded in the vector C. call this permutation A[C,C].
when a row/column is removed from C, all we have to do is swap two
rows/columns and manipulate C.

*/

public class LCPSolver {

public void test1() {
int n = 100;
double tol = 1e-9;
System.out.println(
"Test solve()");

cern.jet.random.AbstractDistribution dist =
new cern.jet.random.Uniform( -1, 1, 12345);
cern.jet.math.Functions F = cern.jet.math.Functions.functions;

DoubleMatrix2D A2 =
new DenseDoubleMatrix2D(n, n);
DoubleMatrix2D A =
new DenseDoubleMatrix2D(n, n);
DoubleMatrix1D x =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D b =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D tmp1 =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D
tmp2 = new DenseDoubleMatrix1D(n);
DoubleMatrix1D lo =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D hi =
new DenseDoubleMatrix1D(n);

A2.assign(dist);
A2.zMult(A2, A,
1, 0, false, true);
x.assign(dist);
A.zMult(x, b);

b.assign(tmp1.assign(dist), F.chain(F.minus(
0.1), F.plusMult(0.2)));

// choose `nub' in the range 0..n-1

int nub = 50; //dRandInt (n);

// make limits
for (int i = 0; i < nub; i++)lo.setQuick(i, Double.NEGATIVE_INFINITY);
for (int i = 0; i < nub; i++)hi.setQuick(i, Double.POSITIVE_INFINITY);
for (int i = nub; i < n; i++)lo.setQuick(i, -dist.nextDouble() - 0.01);
for (int i = nub; i < n; i++)hi.setQuick(i, dist.nextDouble() + 0.01);

// solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for

// SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
// ensure that it doesn't get referenced (if it does, the answer will be
// wrong).

A2.assign(A);
DoubleMatrix1D b2 = b.copy();
DoubleMatrix1D lo2 = lo.copy();
DoubleMatrix1D hi2 = hi.copy();
x.assign(
0);
DoubleMatrix1D w =
new DenseDoubleMatrix1D(n);

solve(n, A2, x, b2, w, nub, lo2, hi2,
0);
}

public void test2() {
int n = 10;
double tol = 1e-9;
System.out.println(
"Test solveLCPBasic()");

cern.jet.random.AbstractDistribution dist =
new cern.jet.random.Uniform( -1, 1, 12345);
cern.jet.math.Functions F = cern.jet.math.Functions.functions;

DoubleMatrix2D A2 =
new DenseDoubleMatrix2D(n, n);
DoubleMatrix2D A =
new DenseDoubleMatrix2D(n, n);
DoubleMatrix1D x =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D b =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D tmp1 =
new DenseDoubleMatrix1D(n);

A2.assign(dist);
A2.zMult(A2, A,
1, 0, false, true);
x.assign(dist);
A.zMult(x, b);
System.out.println(
"b = "+b);

b.assign(tmp1.assign(dist), F.chain(F.minus(
0.1), F.plusMult(0.2)));

System.out.println(
"x = "+x);
System.out.println(
"b = "+b);

int nub = 0;
x.assign(
0);
DoubleMatrix1D w =
new DenseDoubleMatrix1D(n);

Timer stopwatch =
new Timer();
stopwatch.start();
try {
solveLCPBasic(n, A, x, b, w, nub);
}
catch (Exception e) {
System.err.println(
"Error in solveLCPBasic()");
e.printStackTrace();
}
stopwatch.stop();
stopwatch.display();
System.out.println();
System.out.println(
"**********************************");
System.out.println(
"x = " + x);
A.zMult(x,tmp1);
System.out.println(
"A*x = " + tmp1);
System.out.println();
System.out.println(
"b = " + b);
System.out.println(
"w = " + w);
b.assign(w, F.plus);
System.out.println(
"b+w = " + b);
}

public static void main(String[] arg) {
LCPSolver solver =
new LCPSolver();
solver.test2();
}

void solve(int n, DoubleMatrix2D A, DoubleMatrix1D x, DoubleMatrix1D b,
DoubleMatrix1D w,
int nub, DoubleMatrix1D lo, DoubleMatrix1D hi,
int findex) {

int i, k, hit_first_friction_index = 0;

// if all the variables are unbounded then we can just factor, solve,

// and return
if (nub >= n) {
//dFactorLDLT (A,w,n,nskip); // use w for d
//dSolveLDLT (A,w,b,n,nskip);
x.assign(b);
w.assign(
0);
return;
}
}

/**
* An unoptimized Dantzig LCP driver routine for the basic LCP problem.
* Must have lo=0, hi=Infinity, and nub=0
*
* @param n int
* @param A DoubleMatrix2D
* @param x DoubleMatrix1D
* @param b DoubleMatrix1D
* @param w DoubleMatrix1D
* @param nub int
* @param lo DoubleMatrix1D
* @param hi DoubleMatrix1D
* @throws LCPException
*/


void solveLCPBasic(int n, DoubleMatrix2D A, DoubleMatrix1D x,
DoubleMatrix1D b, DoubleMatrix1D w,
int nub) throws LCPException {

if(!(n>0 && A!=null && x!=null && b!=null && w!=null && nub==0))
throw new LCPException("Input error in solverLCPBasic()");

int i, k;
DoubleMatrix1D tmp =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D delta_x =
new DenseDoubleMatrix1D(n);
DoubleMatrix1D delta_w =
new DenseDoubleMatrix1D(n);

SlowLCP lcp =
new SlowLCP(n, 0, A, x, b, w, tmp, tmp);
nub = lcp.getNub();
for (i = 0; i < n; i++) {
w.setQuick(i, lcp.AiC_times_qC(i, x) - b.getQuick(i));
if (w.getQuick(i) >= 0)
lcp.transfer_i_to_N(i);
else {
for (; ; ) {
// compute: delta_x(C)=-A(C,C)\A(C,i)

delta_x.assign(
0);
lcp.solve1(delta_x, i,
1, 0);
delta_x.setQuick(i,
1);

// compute: delta_w=A*delta_x
delta_w.assign(
0);
lcp.pN_equals_ANC_times_qC(delta_w, delta_x);
lcp.pN_plusequals_ANi(delta_w, i,
1);
delta_w.setQuick(i, lcp.AiC_times_qC(i, delta_x) + lcp.Aii(i));

// find index to switch

int si = i; // si = switch index
boolean si_in_N = false; // set to true if si in N
double s = -w.getQuick(i) / delta_w.getQuick(i);
if (s <= 0) {
System.err.println(
"LCP internal error, s <= 0 (s=" + s + ")");
if (i < n - 1) {
x.viewPart(i, n - i).assign(
0);
w.viewPart(i, n - i).assign(
0);
}
lcp.unpermute();
return;
}
for (k = 0; k < lcp.numN(); k++) {
int kk = lcp.indexN(k);
if (delta_w.getQuick(kk) < 0) {
double s2 = -w.getQuick(kk) / delta_w.getQuick(kk);
if (s2 < s) {
s = s2;
si = kk;
si_in_N =
true;
}
}
}
for (k = 0; k < lcp.numC(); k++) {
int kk = lcp.indexC(k);
if (delta_x.getQuick(kk) < 0) {
double s2 = -x.getQuick(kk) / delta_x.getQuick(kk);
if (s2 < s) {
s = s2;
si = kk;
si_in_N =
false;
}
}
}
// apply x = x + s * delta

lcp.pC_plusequals_s_times_qC(x, s, delta_x);
x.setQuick(i, x.getQuick(i) + s);
lcp.pN_plusequals_s_times_qN(w, s, delta_w);
w.setQuick(i, w.getQuick(i) + s * delta_w.getQuick(i));

// switch indexes between sets if necessary
if (si == i) {
w.setQuick(i,
0);
lcp.transfer_i_to_C(i);
break;
}
if(si_in_N) {
w.setQuick(si,
0);
lcp.transfer_i_from_N_to_C(si);
}
else {
x.setQuick(si,
0);
lcp.transfer_i_from_C_to_N(si);
}
}
}
}
}
}


class LCPException extends Exception {
public LCPException() {}

public LCPException(String msg) { super(msg); }
}

//***************************************************************************
// dLCP manipulator object. this represents an n*n LCP problem.

//
// two index sets C and N are kept. each set holds a subset of
// the variable indexes 0..n-1. an index can only be in one set.
// initially both sets are empty.
//
// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.


// simple but slow implementation of dLCP, for testing the LCP drivers.

class SlowLCP {
int n, nub;
DoubleMatrix2D
A;
DoubleMatrix1D
x, b, w, lo, hi; // LCP problem data

//ATYPE A; // A rows
BooleanArrayList
C, N; // index sets
int last_i_for_solve1; // last i value given to solve1

// the constructor is given an initial problem description (A,x,b,w) and
// space for other working data (which the caller may allocate on the stack).
// some of this data is specific to the fast dLCP implementation.
// the matrices A and L have size n*n, vectors have size n*1.
// A represents a symmetric matrix but only the lower triangle is valid.
// `nub' is the number of unbounded indexes at the start. all the indexes

// 0..nub-1 will be put into C.
SlowLCP(
int n, int nub, DoubleMatrix2D A, DoubleMatrix1D x, DoubleMatrix1D b,
DoubleMatrix1D w, DoubleMatrix1D lo, DoubleMatrix1D hi) {
this.n = n;
this.nub = nub;
this.A = A;
this.x = x;
this.b = b;
this.w = w;
this.lo = lo;
this.hi = hi;
this.x.assign(0);
last_i_for_solve1 = -1;

int i, j;
C = new BooleanArrayList(n);
N = new BooleanArrayList(n);

// lets make A symmetric

for (i = 0; i < n; i++)
for (j = i + 1; j < n; j++)
A.setQuick(i, j, A.getQuick(j, i));

// if nub>0, put all indexes 0..nub-1 into C and solve for x

if (nub > 0) {
DoubleMatrix2D L = A.viewPart(
0,0,nub,nub).copy();
LUDecompositionQuick LU =
new LUDecompositionQuick();
LU.decompose(L);
LU.solve(x.viewPart(
0,nub));
w.viewPart(
0,nub).assign(0); // nub

for (i = 0; i < nub; i++) C.setQuick(i, true);
}

}

/**
* return the value of `nub'. the constructor may want to change it,
* so the caller should find out its new value.
*/

int getNub() { return nub; }

// transfer functions: transfer index i to the given set (C or N). indexes

// less than `nub' can never be given. A,x,b,w,etc may be permuted by these
// functions, the caller must be robust to this.

/**
* this assumes C and N span 1:i-1. this also assumes that solve1() has
* been recently called for the same i without any other transfer
* functions in between (thereby allowing some data reuse for the fast
* implementation).
*/

void transfer_i_to_C(int i) throws LCPException {
if (i < nub) throw new LCPException("bad i");
if (C.getQuick(i)) throw new LCPException("i already in C");
if (N.getQuick(i)) throw new LCPException("i already in N");
for (int k = 0; k < i; k++)
if (!(C.getQuick(k) ^ N.getQuick(k)))
throw new LCPException("assumptions for C and N violated");
for (int k = i; k < n; k++)
if (C.getQuick(k) || N.getQuick(k))
throw new LCPException("assumptions for C and N violated");
if (i != last_i_for_solve1)
throw new LCPException("assumptions for i violated");
last_i_for_solve1 = -1;
C.setQuick(i, true);
}

// this assumes C and N span 1:i-1.

void transfer_i_to_N(int i) throws LCPException {
if (i < nub) throw new LCPException("bad i");
if (C.getQuick(i)) throw new LCPException("i already in C");
if (N.getQuick(i)) throw new LCPException("i already in N");
for (int k = 0; k < i; k++)
if (!C.getQuick(k) && !N.getQuick(k))
throw new LCPException("assumptions for C and N violated");
for (int k = i; k < n; k++)
if (C.getQuick(k) || N.getQuick(k))
throw new LCPException("assumptions for C and N violated");
last_i_for_solve1 = -1;
N.setQuick(i, true);
}

void transfer_i_from_N_to_C(int i) throws LCPException {
if (i < nub) throw new LCPException("bad i");
if (C.getQuick(i)) throw new LCPException("i already in C");
if (!N.getQuick(i)) throw new LCPException("i not in N");
last_i_for_solve1 = -1;
N.setQuick(i, false);
C.setQuick(i, true);
}

void transfer_i_from_C_to_N(int i) throws LCPException {
if (i < nub) throw new LCPException("bad i");
if (N.getQuick(i)) throw new LCPException("i already in N");
if (!C.getQuick(i)) throw new LCPException("i not in C");
last_i_for_solve1 = -1;
C.setQuick(i, false);
N.setQuick(i, true);
}

// return the number of indexes in set C/N

int numC() {
int i, count = 0;
for (i = 0; i < n; i++)
if (C.getQuick(i)) count++;
return count;
}

int numN() {
int i, count = 0;
for (i = 0; i < n; i++)
if (N.getQuick(i) ) count++;
return count;
}

// return index i in set C/N.

int indexC(int i) throws LCPException {
int k, count = 0;
for (k = 0; k < n; k++) {
if (C.getQuick(k)) {
if (count == i)return k;
count++;
}
}
throw new LCPException("bad index C " + i);
//return 0;

}

int indexN(int i) throws LCPException {
int k, count = 0;
for (k = 0; k < n; k++) {
if (N.getQuick(k)) {
if (count == i)return k;
count++;
}
}
throw new LCPException("bad index into N");
//return 0;

}

// accessor and arithmetic functions. Aij translates as A(i,j), etc.
// make sure that only the lower triangle of A is ever referenced.

double Aii(int i) { return A.getQuick(i, i); }

double AiC_times_qC(int i, DoubleMatrix1D q) {
double sum = 0;
for (int k = 0; k < n; k++)
if (C.getQuick(k))
sum +=
A.getQuick(i, k) * q.getQuick(k);
return sum;
}

double AiN_times_qN(int i, DoubleMatrix1D q) {
double sum = 0;
for (int k = 0; k < n; k++)
if (N.getQuick(k))
sum +=
A.getQuick(i, k) * q.getQuick(k);
return sum;
}

void pN_equals_ANC_times_qC(DoubleMatrix1D p, DoubleMatrix1D q) {
double sum;
for (int ii = 0; ii < n; ii++)
if (N.getQuick(ii)) {
sum =
0;
for (int jj = 0; jj < n; jj++)
if (C.getQuick(jj))
sum +=
A.getQuick(ii, jj) * q.getQuick(jj);
p.setQuick(ii, sum);
}
}

void pN_plusequals_ANi(DoubleMatrix1D p, int i, int sign) throws LCPException {
int k;
for (k = 0; k < n; k++)
if (N.getQuick(k) && k >= i)
throw new LCPException("N assumption violated");
for (k = 0; k < n; k++)
if (N.getQuick(k))
p.setQuick(k, p.getQuick(k) + sign*
A.getQuick(i, k));
}

// for all Nj. sign = +1,-1. assumes i > maximum index in N.

void pC_plusequals_s_times_qC(DoubleMatrix1D p, double s, DoubleMatrix1D q) {
for (int k = 0; k < n; k++)
if (C.getQuick(k))
p.setQuick(k, p.getQuick(k) + s * q.getQuick(k));
}

void pN_plusequals_s_times_qN(DoubleMatrix1D p, double s, DoubleMatrix1D q) {
for (int k = 0; k < n; k++)
if (N.getQuick(k))
p.setQuick(k, p.getQuick(k) + s * q.getQuick(k));
}

/* get a(C) = - dir * A(C,C) \ A(C,i). dir must be +/- 1.
the fast version of this function computes some data that is needed by
transfer_i_to_C(). if only_transfer is nonzero then this function
*only* computes that data, it does not set a(C).
*/


void solve1(DoubleMatrix1D a, int i, int dir, int only_transfer) {

DoubleMatrix2D AA =
new DenseDoubleMatrix2D(n, n);
//DoubleMatrix1D dd = new DenseDoubleMatrix1D(n);

DoubleMatrix1D bb =
new DenseDoubleMatrix1D(n);
int ii, jj, AAi, AAj;

last_i_for_solve1 = i;

AAi =
0;
for (ii = 0; ii < n; ii++)
if (C.getQuick(ii)) {
AAj =
0;
for (jj = 0; jj < n; jj++)
if (C.getQuick(jj)) {
AA.setQuick(AAi, AAj,
A.getQuick(ii, jj));
AAj++;
}
bb.setQuick(AAi,
A.getQuick(i, ii));
AAi++;
}
if (AAi == 0) return;

LUDecompositionQuick LU =
new LUDecompositionQuick();
LU.decompose(AA.viewPart(
0,0,AAi,AAi));
LU.solve(bb.viewPart(
0,AAi));
//dFactorLDLT (AA,dd,AAi,nskip);

//dSolveLDLT (AA,dd,bb,AAi,nskip);

AAi =
0;
for (ii = 0; ii < n; ii++)
if (C.getQuick(ii)) a.setQuick(ii, -dir*bb.getQuick(AAi++));

}

// call this at the end of the LCP function. if the x/w values have been

// permuted then this will unscramble them.
void unpermute() {}
}