/* * Copy from http://www.netlib.org/benchmark/linpackc.new * Version: V1.0 * / /* * ** * ** LINPACK.C Linpack benchmark, calculates FLOPS. * ** (FLoating Point Operations Per Second) * ** * ** Translated to C by Bonnie Toy 5/88 * ** * ** Modified by Will Menninger, 10/93, with these features: * ** (modified on 2/25/94 to fix a problem with daxpy for * ** unequal increments or equal increments not equal to 1. * ** Jack Dongarra) * ** * ** - Defaults to double precision. * ** - Averages ROLLed and UNROLLed performance. * ** - User selectable array sizes. * ** - Automatically does enough repetitions to take at least 10 CPU seconds. * ** - Prints machine precision. * ** - ANSI prototyping. * ** * ** To compile: cc -O -o linpack linpack.c -lm * ** * ** * */ #include #include #include #include #include #define DP #ifdef SP #define ZERO 0.0 #define ONE 1.0 #define PREC "Single" #define BASE10DIG FLT_DIG typedef float REAL; #endif #ifdef DP #define ZERO 0.0e0 #define ONE 1.0e0 #define PREC "Double" #define BASE10DIG DBL_DIG typedef double REAL; #endif static REAL linpack(long nreps, int arsize); static void matgen(REAL *a, int lda, int n, REAL *b, REAL *norma); static void dgefa(REAL *a, int lda, int n, int *ipvt, int *info, int roll); static void dgesl(REAL *a, int lda, int n, int *ipvt, REAL *b, int job, int roll); static void daxpy_r(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy); static REAL ddot_r(int n, REAL *dx, int incx, REAL *dy, int incy); static void dscal_r(int n, REAL da, REAL *dx, int incx); static void daxpy_ur(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy); static REAL ddot_ur(int n, REAL *dx, int incx, REAL *dy, int incy); static void dscal_ur(int n, REAL da, REAL *dx, int incx); static int idamax(int n, REAL *dx, int incx); static REAL second(void); static void *mempool; void linpack_main(int argc, char *argv[]) { char buf[80]; int arsize; long arsize2d, memreq, nreps; size_t malloc_arg; arsize = 12; while (1) { #if 0 if (argc != 2) { printf("Enter array size (q to quit) [200]: "); fflush(stdout); fgets(buf, 79, stdin); if (buf[0] == 'q' || buf[0] == 'Q') { break; } if (buf[0] == '\0' || buf[0] == '\n') { arsize = 200; } else { arsize = atoi(buf); } } else { arsize = atoi(argv[1]); } #endif arsize /= 2; arsize *= 2; if (arsize < 10) { printf("Too small.\n"); continue; } arsize2d = (long)arsize * (long)arsize; memreq = arsize2d * sizeof(REAL) + (long)arsize * sizeof(REAL) + (long)arsize * sizeof(int); printf("Memory required: %ldK.\n", (memreq + 512L) >> 10); malloc_arg = (size_t)memreq; if (malloc_arg != memreq || (mempool = malloc(malloc_arg)) == NULL) { printf("Not enough memory available for given array size.\n\n"); continue; } printf("\n\nLINPACK benchmark, %s precision.\n", PREC); printf("Machine precision: %d digits.\n", BASE10DIG); printf("Array size %d X %d.\n", arsize, arsize); printf("Average rolled and unrolled performance:\n\n"); printf(" Reps Time(s) DGEFA DGESL OVERHEAD KFLOPS\n"); printf("----------------------------------------------------\n"); nreps = 1; while (linpack(nreps, arsize) < 10.) { nreps *= 2; } free(mempool); printf("\n"); } } static REAL linpack(long nreps, int arsize) { REAL *a, *b; REAL norma, t1, kflops, tdgesl, tdgefa, totalt, toverhead, ops; int *ipvt, n, info, lda; long i, arsize2d; lda = arsize; n = arsize / 2; arsize2d = (long)arsize * (long)arsize; ops = ((2.0 * n * n * n) / 3.0 + 2.0 * n * n); a = (REAL *)mempool; b = a + arsize2d; ipvt = (int *)&b[arsize]; tdgesl = 0; tdgefa = 0; totalt = second(); for (i = 0; i < nreps; i++) { matgen(a, lda, n, b, &norma); t1 = second(); dgefa(a, lda, n, ipvt, &info, 1); tdgefa += second() - t1; t1 = second(); dgesl(a, lda, n, ipvt, b, 0, 1); tdgesl += second() - t1; } for (i = 0; i < nreps; i++) { matgen(a, lda, n, b, &norma); t1 = second(); dgefa(a, lda, n, ipvt, &info, 0); tdgefa += second() - t1; t1 = second(); dgesl(a, lda, n, ipvt, b, 0, 0); tdgesl += second() - t1; } totalt = second() - totalt; if (totalt < 0.5 || tdgefa + tdgesl < 0.2) { return (0.); } kflops = 2.*nreps * ops / (1000.*(tdgefa + tdgesl)); toverhead = totalt - tdgefa - tdgesl; if (tdgefa < 0.) { tdgefa = 0.; } if (tdgesl < 0.) { tdgesl = 0.; } if (toverhead < 0.) { toverhead = 0.; } printf("%8ld %6.2f %6.2f%% %6.2f%% %6.2f%% %9.3f\n", nreps, totalt, 100.*tdgefa / totalt, 100.*tdgesl / totalt, 100.*toverhead / totalt, kflops); return (totalt); } /* * ** For matgen, * ** We would like to declare a[][lda], but c does not allow it. In this * ** function, references to a[i][j] are written a[lda*i+j]. * */ static void matgen(REAL *a, int lda, int n, REAL *b, REAL *norma) { int init, i, j; init = 1325; *norma = 0.0; for (j = 0; j < n; j++) for (i = 0; i < n; i++) { init = (int)((long)3125 * (long)init % 65536L); a[lda * j + i] = (init - 32768.0) / 16384.0; *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma; } for (i = 0; i < n; i++) { b[i] = 0.0; } for (j = 0; j < n; j++) for (i = 0; i < n; i++) { b[i] = b[i] + a[lda * j + i]; } } /* * ** * ** DGEFA benchmark * ** * ** We would like to declare a[][lda], but c does not allow it. In this * ** function, references to a[i][j] are written a[lda*i+j]. * ** * ** dgefa factors a double precision matrix by gaussian elimination. * ** * ** dgefa is usually called by dgeco, but it can be called * ** directly with a saving in time if rcond is not needed. * ** (time for dgeco) = (1 + 9/n)*(time for dgefa) . * ** * ** on entry * ** * ** a REAL precision[n][lda] * ** the matrix to be factored. * ** * ** lda integer * ** the leading dimension of the array a . * ** * ** n integer * ** the order of the matrix a . * ** * ** on return * ** * ** a an upper triangular matrix and the multipliers * ** which were used to obtain it. * ** the factorization can be written a = l*u where * ** l is a product of permutation and unit lower * ** triangular matrices and u is upper triangular. * ** * ** ipvt integer[n] * ** an integer vector of pivot indices. * ** * ** info integer * ** = 0 normal value. * ** = k if u[k][k] .eq. 0.0 . this is not an error * ** condition for this subroutine, but it does * ** indicate that dgesl or dgedi will divide by zero * ** if called. use rcond in dgeco for a reliable * ** indication of singularity. * ** * ** linpack. this version dated 08/14/78 . * ** cleve moler, university of New Mexico, argonne national lab. * ** * ** functions * ** * ** blas daxpy,dscal,idamax * ** * */ static void dgefa(REAL *a, int lda, int n, int *ipvt, int *info, int roll) { REAL t; int j, k, kp1, l, nm1; /* gaussian elimination with partial pivoting */ if (roll) { *info = 0; nm1 = n - 1; if (nm1 >= 0) for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n - k, &a[lda * k + k], 1) + k; ipvt[k] = l; /* zero pivot implies this column already * triangularized */ if (a[lda * k + l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda * k + l]; a[lda * k + l] = a[lda * k + k]; a[lda * k + k] = t; } /* compute multipliers */ t = -ONE / a[lda * k + k]; dscal_r(n - (k + 1), t, &a[lda * k + k + 1], 1); /* row elimination with column indexing */ for (j = kp1; j < n; j++) { t = a[lda * j + l]; if (l != k) { a[lda * j + l] = a[lda * j + k]; a[lda * j + k] = t; } daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &a[lda * j + k + 1], 1); } } else { (*info) = k; } } ipvt[n - 1] = n - 1; if (a[lda * (n - 1) + (n - 1)] == ZERO) { (*info) = n - 1; } } else { *info = 0; nm1 = n - 1; if (nm1 >= 0) for (k = 0; k < nm1; k++) { kp1 = k + 1; /* find l = pivot index */ l = idamax(n - k, &a[lda * k + k], 1) + k; ipvt[k] = l; /* zero pivot implies this column already * triangularized */ if (a[lda * k + l] != ZERO) { /* interchange if necessary */ if (l != k) { t = a[lda * k + l]; a[lda * k + l] = a[lda * k + k]; a[lda * k + k] = t; } /* compute multipliers */ t = -ONE / a[lda * k + k]; dscal_ur(n - (k + 1), t, &a[lda * k + k + 1], 1); /* row elimination with column indexing */ for (j = kp1; j < n; j++) { t = a[lda * j + l]; if (l != k) { a[lda * j + l] = a[lda * j + k]; a[lda * j + k] = t; } daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1, &a[lda * j + k + 1], 1); } } else { (*info) = k; } } ipvt[n - 1] = n - 1; if (a[lda * (n - 1) + (n - 1)] == ZERO) { (*info) = n - 1; } } } /* * ** * ** DGESL benchmark * ** * ** We would like to declare a[][lda], but c does not allow it. In this * ** function, references to a[i][j] are written a[lda*i+j]. * ** * ** dgesl solves the double precision system * ** a * x = b or trans(a) * x = b * ** using the factors computed by dgeco or dgefa. * ** * ** on entry * ** * ** a double precision[n][lda] * ** the output from dgeco or dgefa. * ** * ** lda integer * ** the leading dimension of the array a . * ** * ** n integer * ** the order of the matrix a . * ** * ** ipvt integer[n] * ** the pivot vector from dgeco or dgefa. * ** * ** b double precision[n] * ** the right hand side vector. * ** * ** job integer * ** = 0 to solve a*x = b , * ** = nonzero to solve trans(a)*x = b where * ** trans(a) is the transpose. * ** * ** on return * ** * ** b the solution vector x . * ** * ** error condition * ** * ** a division by zero will occur if the input factor contains a * ** zero on the diagonal. technically this indicates singularity * ** but it is often caused by improper arguments or improper * ** setting of lda . it will not occur if the subroutines are * ** called correctly and if dgeco has set rcond .gt. 0.0 * ** or dgefa has set info .eq. 0 . * ** * ** to compute inverse(a) * c where c is a matrix * ** with p columns * ** dgeco(a,lda,n,ipvt,rcond,z) * ** if (!rcond is too small){ * ** for (j=0,j= 1) for (k = 0; k < nm1; k++) { l = ipvt[k]; t = b[l]; if (l != k) { b[l] = b[k]; b[k] = t; } daxpy_r(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1); } /* now solve u*x = y */ for (kb = 0; kb < n; kb++) { k = n - (kb + 1); b[k] = b[k] / a[lda * k + k]; t = -b[k]; daxpy_r(k, t, &a[lda * k + 0], 1, &b[0], 1); } } else { /* job = nonzero, solve trans(a) * x = b */ /* first solve trans(u)*y = b */ for (k = 0; k < n; k++) { t = ddot_r(k, &a[lda * k + 0], 1, &b[0], 1); b[k] = (b[k] - t) / a[lda * k + k]; } /* now solve trans(l)*x = y */ if (nm1 >= 1) for (kb = 1; kb < nm1; kb++) { k = n - (kb + 1); b[k] = b[k] + ddot_r(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1); l = ipvt[k]; if (l != k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } } else { nm1 = n - 1; if (job == 0) { /* job = 0 , solve a * x = b */ /* first solve l*y = b */ if (nm1 >= 1) for (k = 0; k < nm1; k++) { l = ipvt[k]; t = b[l]; if (l != k) { b[l] = b[k]; b[k] = t; } daxpy_ur(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1); } /* now solve u*x = y */ for (kb = 0; kb < n; kb++) { k = n - (kb + 1); b[k] = b[k] / a[lda * k + k]; t = -b[k]; daxpy_ur(k, t, &a[lda * k + 0], 1, &b[0], 1); } } else { /* job = nonzero, solve trans(a) * x = b */ /* first solve trans(u)*y = b */ for (k = 0; k < n; k++) { t = ddot_ur(k, &a[lda * k + 0], 1, &b[0], 1); b[k] = (b[k] - t) / a[lda * k + k]; } /* now solve trans(l)*x = y */ if (nm1 >= 1) for (kb = 1; kb < nm1; kb++) { k = n - (kb + 1); b[k] = b[k] + ddot_ur(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1); l = ipvt[k]; if (l != k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } } } /* * ** Constant times a vector plus a vector. * ** Jack Dongarra, linpack, 3/11/78. * ** ROLLED version * */ static void daxpy_r(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy) { int i, ix, iy; if (n <= 0) { return; } if (da == ZERO) { return; } if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 1; iy = 1; if (incx < 0) { ix = (-n + 1) * incx + 1; } if (incy < 0) { iy = (-n + 1) * incy + 1; } for (i = 0; i < n; i++) { dy[iy] = dy[iy] + da * dx[ix]; ix = ix + incx; iy = iy + incy; } return; } /* code for both increments equal to 1 */ for (i = 0; i < n; i++) { dy[i] = dy[i] + da * dx[i]; } } /* * ** Forms the dot product of two vectors. * ** Jack Dongarra, linpack, 3/11/78. * ** ROLLED version * */ static REAL ddot_r(int n, REAL *dx, int incx, REAL *dy, int incy) { REAL dtemp; int i, ix, iy; dtemp = ZERO; if (n <= 0) { return (ZERO); } if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 0; iy = 0; if (incx < 0) { ix = (-n + 1) * incx; } if (incy < 0) { iy = (-n + 1) * incy; } for (i = 0; i < n; i++) { dtemp = dtemp + dx[ix] * dy[iy]; ix = ix + incx; iy = iy + incy; } return (dtemp); } /* code for both increments equal to 1 */ for (i = 0; i < n; i++) { dtemp = dtemp + dx[i] * dy[i]; } return (dtemp); } /* * ** Scales a vector by a constant. * ** Jack Dongarra, linpack, 3/11/78. * ** ROLLED version * */ static void dscal_r(int n, REAL da, REAL *dx, int incx) { int i, nincx; if (n <= 0) { return; } if (incx != 1) { /* code for increment not equal to 1 */ nincx = n * incx; for (i = 0; i < nincx; i = i + incx) { dx[i] = da * dx[i]; } return; } /* code for increment equal to 1 */ for (i = 0; i < n; i++) { dx[i] = da * dx[i]; } } /* * ** constant times a vector plus a vector. * ** Jack Dongarra, linpack, 3/11/78. * ** UNROLLED version * */ static void daxpy_ur(int n, REAL da, REAL *dx, int incx, REAL *dy, int incy) { int i, ix, iy, m; if (n <= 0) { return; } if (da == ZERO) { return; } if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 1; iy = 1; if (incx < 0) { ix = (-n + 1) * incx + 1; } if (incy < 0) { iy = (-n + 1) * incy + 1; } for (i = 0; i < n; i++) { dy[iy] = dy[iy] + da * dx[ix]; ix = ix + incx; iy = iy + incy; } return; } /* code for both increments equal to 1 */ m = n % 4; if (m != 0) { for (i = 0; i < m; i++) { dy[i] = dy[i] + da * dx[i]; } if (n < 4) { return; } } for (i = m; i < n; i = i + 4) { dy[i] = dy[i] + da * dx[i]; dy[i + 1] = dy[i + 1] + da * dx[i + 1]; dy[i + 2] = dy[i + 2] + da * dx[i + 2]; dy[i + 3] = dy[i + 3] + da * dx[i + 3]; } } /* * ** Forms the dot product of two vectors. * ** Jack Dongarra, linpack, 3/11/78. * ** UNROLLED version * */ static REAL ddot_ur(int n, REAL *dx, int incx, REAL *dy, int incy) { REAL dtemp; int i, ix, iy, m; dtemp = ZERO; if (n <= 0) { return (ZERO); } if (incx != 1 || incy != 1) { /* code for unequal increments or equal increments != 1 */ ix = 0; iy = 0; if (incx < 0) { ix = (-n + 1) * incx; } if (incy < 0) { iy = (-n + 1) * incy; } for (i = 0; i < n; i++) { dtemp = dtemp + dx[ix] * dy[iy]; ix = ix + incx; iy = iy + incy; } return (dtemp); } /* code for both increments equal to 1 */ m = n % 5; if (m != 0) { for (i = 0; i < m; i++) { dtemp = dtemp + dx[i] * dy[i]; } if (n < 5) { return (dtemp); } } for (i = m; i < n; i = i + 5) { dtemp = dtemp + dx[i] * dy[i] + dx[i + 1] * dy[i + 1] + dx[i + 2] * dy[i + 2] + dx[i + 3] * dy[i + 3] + dx[i + 4] * dy[i + 4]; } return (dtemp); } /* * ** Scales a vector by a constant. * ** Jack Dongarra, linpack, 3/11/78. * ** UNROLLED version * */ static void dscal_ur(int n, REAL da, REAL *dx, int incx) { int i, m, nincx; if (n <= 0) { return; } if (incx != 1) { /* code for increment not equal to 1 */ nincx = n * incx; for (i = 0; i < nincx; i = i + incx) { dx[i] = da * dx[i]; } return; } /* code for increment equal to 1 */ m = n % 5; if (m != 0) { for (i = 0; i < m; i++) { dx[i] = da * dx[i]; } if (n < 5) { return; } } for (i = m; i < n; i = i + 5) { dx[i] = da * dx[i]; dx[i + 1] = da * dx[i + 1]; dx[i + 2] = da * dx[i + 2]; dx[i + 3] = da * dx[i + 3]; dx[i + 4] = da * dx[i + 4]; } } /* * ** Finds the index of element having max. absolute value. * ** Jack Dongarra, linpack, 3/11/78. * */ static int idamax(int n, REAL *dx, int incx) { REAL dmax; int i, ix, itemp; if (n < 1) { return (-1); } if (n == 1) { return (0); } if (incx != 1) { /* code for increment not equal to 1 */ ix = 1; dmax = fabs((double)dx[0]); ix = ix + incx; for (i = 1; i < n; i++) { if (fabs((double)dx[ix]) > dmax) { itemp = i; dmax = fabs((double)dx[ix]); } ix = ix + incx; } } else { /* code for increment equal to 1 */ itemp = 0; dmax = fabs((double)dx[0]); for (i = 1; i < n; i++) if (fabs((double)dx[i]) > dmax) { itemp = i; dmax = fabs((double)dx[i]); } } return (itemp); } static REAL second(void) { return ((REAL)((REAL)clock() / (REAL)CLOCKS_PER_SEC)); }