blob: 42dd373442992a342f3ee9f87269162482562d61 [file] [log] [blame]
/* test.c - MemTest-86 Version 3.4
*
* Released under version 2 of the Gnu Public License.
* By Chris Brady
*/
#include "test.h"
#include "config.h"
#include "smp.h"
extern volatile int mstr_cpu;
extern volatile int run_cpus;
extern volatile int test;
extern volatile int segs, bail;
extern int test_ticks, nticks;
extern struct tseq tseq[];
extern void update_err_counts(void);
extern void print_err_counts(void);
void rand_seed( unsigned int seed1, unsigned int seed2, int me);
ulong rand(int me);
void poll_errors();
int ecount = 0;
static inline ulong roundup(ulong value, ulong mask)
{
return (value + mask) & ~mask;
}
/*
* Memory address test, walking ones
*/
void addr_tst1(int me)
{
int i, j, k;
volatile ulong *p, *pt, *end;
ulong bad, mask, bank, p1;
/* Test the global address bits */
for (p1=0, j=0; j<2; j++) {
hprint(LINE_PAT, COL_PAT, p1);
/* Set pattern in our lowest multiple of 0x20000 */
p = (ulong *)roundup((ulong)v->map[0].start, 0x1ffff);
*p = p1;
/* Now write pattern compliment */
p1 = ~p1;
end = v->map[segs-1].end;
for (i=0; i<100; i++) {
mask = 4;
do {
pt = (ulong *)((ulong)p | mask);
if (pt == p) {
mask = mask << 1;
continue;
}
if (pt >= end) {
break;
}
*pt = p1;
if ((bad = *p) != ~p1) {
ad_err1((ulong *)p, (ulong *)mask,
bad, ~p1);
i = 1000;
}
mask = mask << 1;
} while(mask);
}
do_tick(me);
BAILR
}
/* Now check the address bits in each bank */
/* If we have more than 8mb of memory then the bank size must be */
/* bigger than 256k. If so use 1mb for the bank size. */
if (v->pmap[v->msegs - 1].end > (0x800000 >> 12)) {
bank = 0x100000;
} else {
bank = 0x40000;
}
for (p1=0, k=0; k<2; k++) {
hprint(LINE_PAT, COL_PAT, p1);
for (j=0; j<segs; j++) {
p = v->map[j].start;
/* Force start address to be a multiple of 256k */
p = (ulong *)roundup((ulong)p, bank - 1);
end = v->map[j].end;
/* Redundant checks for overflow */
while (p < end && p > v->map[j].start && p != 0) {
*p = p1;
p1 = ~p1;
for (i=0; i<50; i++) {
mask = 4;
do {
pt = (ulong *)
((ulong)p | mask);
if (pt == p) {
mask = mask << 1;
continue;
}
if (pt >= end) {
break;
}
*pt = p1;
if ((bad = *p) != ~p1) {
ad_err1((ulong *)p,
(ulong *)mask,
bad,~p1);
i = 200;
}
mask = mask << 1;
} while(mask);
}
if (p + bank > p) {
p += bank;
} else {
p = end;
}
p1 = ~p1;
}
}
do_tick(me);
BAILR
p1 = ~p1;
}
}
/*
* Memory address test, own address
*/
void addr_tst2(int me)
{
int j, done;
ulong *p, *pe, *end, *start;
cprint(LINE_PAT, COL_PAT, "address ");
/* Write each address with it's own address */
for (j=0; j<segs; j++) {
start = v->map[j].start;
end = v->map[j].end;
pe = (ulong *)start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* *p = (ulong)p;
* }
*/
asm __volatile__ (
"jmp L91\n\t"
".p2align 4,,7\n\t"
"L90:\n\t"
"addl $4,%%edi\n\t"
"L91:\n\t"
"movl %%edi,(%%edi)\n\t"
"cmpl %%edx,%%edi\n\t"
"jb L90\n\t"
: : "D" (p), "d" (pe)
: "cc", "memory"
);
p = pe + 1;
} while (!done);
}
/* Each address should have its own address */
for (j=0; j<segs; j++) {
start = v->map[j].start;
end = v->map[j].end;
pe = (ulong *)start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* if((bad = *p) != (ulong)p) {
* ad_err2((ulong)p, bad);
* }
* }
*/
asm __volatile__ (
"jmp L95\n\t"
".p2align 4,,7\n\t"
"L99:\n\t"
"addl $4,%%edi\n\t"
"L95:\n\t"
"movl (%%edi),%%ecx\n\t"
"cmpl %%edi,%%ecx\n\t"
"jne L97\n\t"
"L96:\n\t"
"cmpl %%edx,%%edi\n\t"
"jb L99\n\t"
"jmp L98\n\t"
"L97:\n\t"
"pushl %%edx\n\t"
"pushl %%ecx\n\t"
"pushl %%edi\n\t"
"call ad_err2\n\t"
"popl %%edi\n\t"
"popl %%ecx\n\t"
"popl %%edx\n\t"
"jmp L96\n\t"
"L98:\n\t"
: : "D" (p), "d" (pe)
: "ecx", "cc", "memory"
);
p = pe + 1;
} while (!done);
}
}
/*
* Test all of memory using a "half moving inversions" algorithm using random
* numbers and their complment as the data pattern. Since we are not able to
* produce random numbers in reverse order testing is only done in the forward
* direction.
*/
void movinvr(int me)
{
int i, j, done, seed1, seed2;
ulong *p;
ulong *pe;
ulong *start,*end;
ulong num, chunk;
/* Initialize memory with initial sequence of random numbers. */
if (v->rdtsc) {
asm __volatile__ ("rdtsc":"=a" (seed1),"=d" (seed2));
} else {
seed1 = 521288629 + v->pass;
seed2 = 362436069 - v->pass;
}
/* Display the current seed */
if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, seed1);
rand_seed(seed1, seed2, me);
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code */
/*
for (; p <= pe; p++) {
*p = rand();
}
*/
asm __volatile__ (
"jmp L200\n\t"
".p2align 4,,7\n\t"
"L201:\n\t"
"addl $4,%%edi\n\t"
"L200:\n\t"
"pushl %%edx\n\t"
"pushl %%ecx\n\t" \
"call rand\n\t"
"popl %%ecx\n\t" \
"popl %%edx\n\t"
"movl %%eax,(%%edi)\n\t"
"cmpl %%esi,%%edi\n\t"
"jb L201\n\t"
: : "D" (p), "S" (pe), "c" (me)
: "eax", "cc", "memory"
);
p = pe + 1;
} while (!done);
}
/* Do moving inversions test. Check for initial pattern and then
* write the complement for each memory location.
*/
for (i=0; i<2; i++) {
rand_seed(seed1, seed2, me);
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code */
/*
for (; p <= pe; p++) {
num = rand();
if (i) {
num = ~num;
}
if ((bad=*p) != num) {
error((ulong*)p, num, bad);
}
*p = ~num;
}
*/
if (i) {
num = 0xffffffff;
} else {
num = 0;
}
asm __volatile__ (
"pushl %%ebp\n\t"
"jmp L26\n\t" \
".p2align 4,,7\n\t" \
"L27:\n\t" \
"addl $4,%%edi\n\t" \
"L26:\n\t" \
"pushl %%ecx\n\t" \
"pushl %%edx\n\t" \
"call rand\n\t"
"popl %%edx\n\t" \
"popl %%ecx\n\t" \
"xorl %%ecx,%%eax\n\t" \
"cmpl %%eax,(%%edi)\n\t" \
"jne L23\n\t" \
"L25:\n\t" \
"movl $0xffffffff,%%ebp\n\t" \
"xorl %%ebp,%%eax\n\t" \
"movl %%eax,(%%edi)\n\t" \
"cmpl %%esi,%%edi\n\t" \
"jb L27\n\t" \
"jmp L24\n" \
"L23:\n\t" \
"pushl %%edx\n\t" \
"pushl %%ecx\n\t" \
"pushl %%eax\n\t" \
"pushl %%edi\n\t" \
"call error\n\t" \
"popl %%edi\n\t" \
"popl %%eax\n\t" \
"popl %%ecx\n\t" \
"popl %%edx\n\t" \
"jmp L25\n" \
"L24:\n\t" \
"popl %%ebp\n\t"
:: "D" (p), "S" (pe), "c" (num),
"d" (me)
: "eax", "cc", "memory"
);
p = pe + 1;
} while (!done);
}
}
}
/*
* Test all of memory using a "moving inversions" algorithm using the
* pattern in p1 and it's complement in p2.
*/
void movinv1(int iter, ulong p1, ulong p2, int me)
{
int i, j, done;
ulong *p, *pe, len, chunk, *start, *end;
/* Display the current pattern */
if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1);
/* Initialize memory with the initial pattern. */
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start + (chunk * me);
/* Set the end addrs for the highest numbers CPU to the
* end of the segment to take care of rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
len = pe - p + 1;
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* *p = p1;
* }
*/
asm __volatile__ (
"rep\n\t" \
"stosl\n\t"
: : "c" (len), "D" (p), "a" (p1)
: "memory"
);
p = pe + 1;
} while (!done);
}
/* Do moving inversions test. Check for initial pattern and then
* write the complement for each memory location. Test from bottom
* up and then from the top down. */
for (i=0; i<iter; i++) {
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start + (chunk * me);
/* Set the end addrs for the highest numbers CPU to the
* end of the segment to take care of rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* if ((bad=*p) != p1) {
* error((ulong*)p, p1, bad);
* }
* *p = p2;
* }
*/
asm __volatile__ (
"jmp L2\n\t" \
".p2align 4,,7\n\t" \
"L0:\n\t" \
"addl $4,%%edi\n\t" \
"L2:\n\t" \
"cmpl %%eax,(%%edi)\n\t" \
"jne L3\n\t" \
"L5:\n\t" \
"movl %%ecx,(%%edi)\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L0\n\t" \
"jmp L4\n" \
"L3:\n\t" \
"pushl %%edx\n\t" \
"pushl %%ebx\n\t" \
"pushl %%ecx\n\t" \
"pushl %%eax\n\t" \
"pushl %%edi\n\t" \
"call error\n\t" \
"popl %%edi\n\t" \
"popl %%eax\n\t" \
"popl %%ecx\n\t" \
"popl %%ebx\n\t" \
"popl %%edx\n\t" \
"jmp L5\n" \
"L4:\n\t" \
:: "a" (p1), "D" (p), "d" (pe), "c" (p2)
: "cc", "memory"
);
p = pe + 1;
} while (!done);
}
for (j=segs-1; j>=0; j--) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start + (chunk * me);
/* Set the end addrs for the highest num CPU to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = end;
p = end;
done = 0;
do {
do_tick(me);
BAILR
/* Check for underflow */
if (pe - SPINSZ < pe && pe != 0) {
pe -= SPINSZ;
} else {
pe = start;
done++;
}
/* Since we are using unsigned addresses a
* redundent check is required */
if (pe < start || pe > end) {
pe = start;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* do {
* if ((bad=*p) != p2) {
* error((ulong*)p, p2, bad);
* }
* *p = p1;
* } while (p-- >= pe);
*/
asm __volatile__ (
"jmp L9\n\t"
".p2align 4,,7\n\t"
"L11:\n\t"
"subl $4, %%edi\n\t"
"L9:\n\t"
"cmpl %%ecx,(%%edi)\n\t"
"jne L6\n\t"
"L10:\n\t"
"movl %%eax,(%%edi)\n\t"
"cmpl %%edi, %%edx\n\t"
"jne L11\n\t"
"jmp L7\n\t"
"L6:\n\t"
"pushl %%edx\n\t"
"pushl %%eax\n\t"
"pushl %%ecx\n\t"
"pushl %%ebx\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%ebx\n\t"
"popl %%ecx\n\t"
"popl %%eax\n\t"
"popl %%edx\n\t"
"jmp L10\n"
"L7:\n\t"
:: "a" (p1), "D" (p), "d" (pe), "c" (p2)
: "cc", "memory"
);
p = pe - 1;
} while (!done);
}
}
}
void movinv32(int iter, ulong p1, ulong lb, ulong hb, int sval, int off,int me)
{
int i, j, k=0, n=0, done;
ulong *p, *pe, *start, *end, chunk, pat = 0, p3;
p3 = sval << 31;
/* Display the current pattern */
if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1);
/* Initialize memory with the initial pattern. */
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
k = off;
pat = p1;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Do a SPINSZ section of memory */
/* Original C code replaced with hand tuned assembly code
* while (p <= pe) {
* *p = pat;
* if (++k >= 32) {
* pat = lb;
* k = 0;
* } else {
* pat = pat << 1;
* pat |= sval;
* }
* p++;
* }
*/
asm __volatile__ (
"jmp L20\n\t"
".p2align 4,,7\n\t"
"L923:\n\t"
"addl $4,%%edi\n\t"
"L20:\n\t"
"movl %[pat],(%%edi)\n\t"
"addl $1,%%ecx\n\t"
"cmpl $32,%%ecx\n\t"
"jne L21\n\t"
"movl %%esi,%[pat]\n\t"
"xorl %%ecx,%%ecx\n\t"
"jmp L22\n"
"L21:\n\t"
"shll $1,%[pat]\n\t"
"orl %%eax,%[pat]\n\t"
"L22:\n\t"
"cmpl %%edx,%%edi\n\t"
"jb L923\n\t"
: "=c" (k), [pat] "=r" (pat)
: "D" (p),"d" (pe),"c" (k), "[pat]" (pat),
"a" (sval), "S" (lb)
: "cc", "memory"
);
p = pe + 1;
} while (!done);
}
/* Do moving inversions test. Check for initial pattern and then
* write the complement for each memory location. Test from bottom
* up and then from the top down. */
for (i=0; i<iter; i++) {
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = start;
p = start;
done = 0;
k = off;
pat = p1;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* while (1) {
* if ((bad=*p) != pat) {
* error((ulong*)p, pat, bad);
* }
* *p = ~pat;
* if (p >= pe) break;
* p++;
*
* if (++k >= 32) {
* pat = lb;
* k = 0;
* } else {
* pat = pat << 1;
* pat |= sval;
* }
* }
*/
asm __volatile__ (
"jmp L30\n\t"
".p2align 4,,7\n\t"
"L930:\n\t"
"addl $4,%%edi\n\t"
"L30:\n\t"
"cmpl %[pat],(%%edi)\n\t"
"jne L34\n\t"
"L35:\n\t"
"notl %[pat]\n\t"
"movl %[pat],(%%edi)\n\t"
"notl %[pat]\n\t"
"incl %%ecx\n\t"
"cmpl $32,%%ecx\n\t"
"jne L31\n\t"
"movl %%esi,%[pat]\n\t"
"xorl %%ecx,%%ecx\n\t"
"jmp L32\n"
"L31:\n\t"
"shll $1,%[pat]\n\t"
"orl %%eax,%[pat]\n\t"
"L32:\n\t"
"cmpl %%edx,%%edi\n\t"
"jb L930\n\t"
"jmp L33\n\t"
"L34:\n\t" \
"pushl %%esi\n\t"
"pushl %%eax\n\t"
"pushl %%ebx\n\t"
"pushl %%edx\n\t"
"pushl %%ebp\n\t"
"pushl %%ecx\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%ecx\n\t"
"popl %%ebp\n\t"
"popl %%edx\n\t"
"popl %%ebx\n\t"
"popl %%eax\n\t"
"popl %%esi\n\t"
"jmp L35\n"
"L33:\n\t"
: "=c" (k), [pat] "=r" (pat)
: "D" (p), "d" (pe), "c" (k),
"[pat]" (pat), "a" (sval), "S" (lb)
: "cc", "memory"
);
p = pe + 1;
} while (!done);
}
if (--k < 0) {
k = 31;
}
for (pat = lb, n = 0; n < k; n++) {
pat = pat << 1;
pat |= sval;
}
k++;
for (j=segs-1; j>=0; j--) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
p = end;
pe = end;
done = 0;
do {
do_tick(me);
BAILR
/* Check for underflow */
if (pe - SPINSZ < pe && pe != 0) {
pe -= SPINSZ;
} else {
pe = start;
done++;
}
/* We need this redundant check because we are
* using unsigned longs for the address.
*/
if (pe < start || pe > end) {
pe = start;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* while(1) {
* if ((bad=*p) != ~pat) {
* error((ulong*)p, ~pat, bad);
* }
* *p = pat;
if (p >= pe) break;
p++;
* if (--k <= 0) {
* pat = hb;
* k = 32;
* } else {
* pat = pat >> 1;
* pat |= p3;
* }
* };
*/
asm __volatile__ (
"jmp L40\n\t"
".p2align 4,,7\n\t"
"L49:\n\t"
"subl $4,%%edi\n\t"
"L40:\n\t"
"notl %[pat]\n\t"
"cmpl %[pat],(%%edi)\n\t"
"jne L44\n\t"
"L45:\n\t"
"notl %[pat]\n\t"
"movl %[pat],(%%edi)\n\t"
"decl %%ecx\n\t"
"cmpl $0,%%ecx\n\t"
"jg L41\n\t"
"movl %%esi,%[pat]\n\t"
"movl $32,%%ecx\n\t"
"jmp L42\n"
"L41:\n\t"
"shrl $1,%[pat]\n\t"
"orl %%eax,%[pat]\n\t"
"L42:\n\t"
"cmpl %%edx,%%edi\n\t"
"ja L49\n\t"
"jmp L43\n\t"
"L44:\n\t" \
"pushl %%esi\n\t"
"pushl %%eax\n\t"
"pushl %%ebx\n\t"
"pushl %%edx\n\t"
"pushl %%ebp\n\t"
"pushl %%ecx\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%ecx\n\t"
"popl %%ebp\n\t"
"popl %%edx\n\t"
"popl %%ebx\n\t"
"popl %%eax\n\t"
"popl %%esi\n\t"
"jmp L45\n"
"L43:\n\t"
: "=c" (k), [pat] "=r" (pat)
: "D" (p), "d" (pe), "c" (k),
"[pat]" (pat), "a" (p3), "S" (hb)
: "cc", "memory"
);
p = pe - 1;
} while (!done);
}
}
}
/*
* Test all of memory using modulo X access pattern.
*/
void modtst(int offset, int iter, ulong p1, ulong p2, int me)
{
int j, k, l, done;
ulong *p;
ulong *pe;
ulong *start, *end, chunk;
/* Display the current pattern */
if (mstr_cpu == me) {
hprint(LINE_PAT, COL_PAT-2, p1);
cprint(LINE_PAT, COL_PAT+6, "-");
dprint(LINE_PAT, COL_PAT+7, offset, 2, 1);
}
/* Write every nth location with pattern */
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
end -= MOD_SZ; /* adjust the ending address */
pe = (ulong *)start;
p = start+offset;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p += MOD_SZ) {
* *p = p1;
* }
*/
asm __volatile__ (
"jmp L60\n\t" \
".p2align 4,,7\n\t" \
"L60:\n\t" \
"movl %%eax,(%%edi)\n\t" \
"addl $80,%%edi\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L60\n\t" \
: "=D" (p)
: "D" (p), "d" (pe), "a" (p1)
: "cc", "memory"
);
} while (!done);
}
/* Write the rest of memory "iter" times with the pattern complement */
for (l=0; l<iter; l++) {
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = (ulong *)start;
p = start;
done = 0;
k = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* if (k != offset) {
* *p = p2;
* }
* if (++k > MOD_SZ-1) {
* k = 0;
* }
* }
*/
asm __volatile__ (
"jmp L50\n\t" \
".p2align 4,,7\n\t" \
"L54:\n\t" \
"addl $4,%%edi\n\t" \
"L50:\n\t" \
"cmpl %%esi,%%ecx\n\t" \
"je L52\n\t" \
"movl %%eax,(%%edi)\n\t" \
"L52:\n\t" \
"incl %%esi\n\t" \
"cmpl $19,%%esi\n\t" \
"jle L53\n\t" \
"xorl %%esi,%%esi\n\t" \
"L53:\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L54\n\t" \
: "=S" (k)
: "D" (p), "d" (pe), "a" (p2),
"S" (k), "c" (offset)
: "cc", "memory"
);
p = pe + 1;
} while (!done);
}
}
/* Now check every nth location */
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
chunk++;
start = v->map[j].start+(chunk*me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
pe = (ulong *)start;
p = start+offset;
done = 0;
end -= MOD_SZ; /* adjust the ending address */
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p += MOD_SZ) {
* if ((bad=*p) != p1) {
* error((ulong*)p, p1, bad);
* }
* }
*/
asm __volatile__ (
"jmp L70\n\t" \
".p2align 4,,7\n\t" \
"L70:\n\t" \
"movl (%%edi),%%ecx\n\t" \
"cmpl %%eax,%%ecx\n\t" \
"jne L71\n\t" \
"L72:\n\t" \
"addl $80,%%edi\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L70\n\t" \
"jmp L73\n\t" \
"L71:\n\t" \
"pushl %%edx\n\t"
"pushl %%ecx\n\t"
"pushl %%eax\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%eax\n\t"
"popl %%ecx\n\t"
"popl %%edx\n\t"
"jmp L72\n"
"L73:\n\t" \
: "=D" (p)
: "D" (p), "d" (pe), "a" (p1)
: "ecx", "cc", "memory"
);
} while (!done);
}
}
/*
* Test memory using block moves
* Adapted from Robert Redelmeier's burnBX test
*/
void block_move(int iter, int me)
{
int i, j, done;
ulong len;
ulong *p, *pe, pp;
ulong *start, *end, chunk;
cprint(LINE_PAT, COL_PAT-2, " ");
/* Initialize memory with the initial pattern. */
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start + (chunk * me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
#ifdef USB_WAR
/* We can't do the block move test on low memory because
* BIOS USB support clobbers location 0x410 and 0x4e0
*/
if (start < (ulong *)0x500) {
start = (ulong *)0x500;
}
#endif
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
len = ((ulong)pe - (ulong)p) / 64;
len++;
asm __volatile__ (
"jmp L100\n\t"
".p2align 4,,7\n\t"
"L100:\n\t"
"movl %%eax, %%edx\n\t"
"notl %%edx\n\t"
"movl %%eax,0(%%edi)\n\t"
"movl %%eax,4(%%edi)\n\t"
"movl %%eax,8(%%edi)\n\t"
"movl %%eax,12(%%edi)\n\t"
"movl %%edx,16(%%edi)\n\t"
"movl %%edx,20(%%edi)\n\t"
"movl %%eax,24(%%edi)\n\t"
"movl %%eax,28(%%edi)\n\t"
"movl %%eax,32(%%edi)\n\t"
"movl %%eax,36(%%edi)\n\t"
"movl %%edx,40(%%edi)\n\t"
"movl %%edx,44(%%edi)\n\t"
"movl %%eax,48(%%edi)\n\t"
"movl %%eax,52(%%edi)\n\t"
"movl %%edx,56(%%edi)\n\t"
"movl %%edx,60(%%edi)\n\t"
"rcll $1, %%eax\n\t"
"leal 64(%%edi), %%edi\n\t"
"decl %%ecx\n\t"
"jnz L100\n\t"
: "=D" (p)
: "D" (p), "c" (len), "a" (1)
: "edx", "cc", "memory"
);
} while (!done);
}
s_barrier();
/* Now move the data around
* First move the data up half of the segment size we are testing
* Then move the data to the original location + 32 bytes
*/
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start + (chunk * me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
#ifdef USB_WAR
/* We can't do the block move test on low memory beacuase
* BIOS USB support clobbers location 0x410 and 0x4e0
*/
if (start < (ulong *)0x500) {
start = (ulong *)0x500;
}
#endif
pe = start;
p = start;
done = 0;
do {
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = (ulong *)((ulong)end & 0xfffffff0);
}
if (pe >= end) {
pe = (ulong *)((ulong)end & 0xfffffff0);
done++;
}
if (p == pe ) {
break;
}
pp = (ulong)p + (((ulong)pe - (ulong)p) / 2);
len = ((ulong)pe - (ulong)p) / 8;
for(i=0; i<iter; i++) {
do_tick(me);
BAILR
asm __volatile__ (
"cld\n"
"jmp L110\n\t"
".p2align 4,,7\n\t"
"L110:\n\t"
"movl %1,%%edi\n\t"
"movl %0,%%esi\n\t"
"movl %2,%%ecx\n\t"
"rep\n\t"
"movsl\n\t"
"movl %0,%%edi\n\t"
"addl $32,%%edi\n\t"
"movl %1,%%esi\n\t"
"movl %2,%%ecx\n\t"
"subl $8,%%ecx\n\t"
"rep\n\t"
"movsl\n\t"
"movl %0,%%edi\n\t"
"movl $8,%%ecx\n\t"
"rep\n\t"
"movsl\n\t"
:: "g" (p), "g" (pp), "g" (len)
: "edi", "esi", "ecx", "cc", "memory"
);
}
p = pe;
} while (!done);
}
s_barrier();
/* Now check the data
* The error checking is rather crude. We just check that the
* adjacent words are the same.
*/
for (j=0; j<segs; j++) {
if (run_cpus == 1) {
start = v->map[j].start;
end = v->map[j].end;
} else {
chunk = v->map[j].end - v->map[j].start;
chunk /= run_cpus;
/* Force chunk to be a multiple of 64 */
chunk = (chunk + 63) & 0xffffff80;
start = v->map[j].start + (chunk * me);
/* Set end addrs for the highest CPU num to the
* end of the segment for rounding errors */
if (me == mstr_cpu) {
end = v->map[j].end;
} else {
end = start + chunk - 1;
}
}
#ifdef USB_WAR
/* We can't do the block move test on low memory beacuase
* BIOS USB support clobbers location 0x4e0 and 0x410
*/
if (start < (ulong *)0x500) {
start = (ulong *)0x500;
}
#endif
pe = start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
pe--; /* adjust the end since we are testing pe+1 */
asm __volatile__ (
"jmp L120\n\t"
".p2align 4,,7\n\t"
"L124:\n\t"
"addl $8,%%edi\n\t"
"L120:\n\t"
"movl (%%edi),%%ecx\n\t"
"cmpl 4(%%edi),%%ecx\n\t"
"jnz L121\n\t"
"L122:\n\t"
"cmpl %%edx,%%edi\n\t"
"jb L124\n"
"jmp L123\n\t"
"L121:\n\t"
"pushl %%edx\n\t"
"pushl 4(%%edi)\n\t"
"pushl %%ecx\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%ecx\n\t"
"addl $4,%%esp\n\t"
"popl %%edx\n\t"
"jmp L122\n"
"L123:\n\t"
: "=D" (p)
: "D" (p), "d" (pe)
: "ecx", "cc", "memory"
);
} while (!done);
}
}
/*
* Test memory for bit fade, fill memory with pattern.
*/
void bit_fade_fill(ulong p1, int me)
{
int j, done;
ulong *p, *pe;
ulong *start,*end;
/* Display the current pattern */
hprint(LINE_PAT, COL_PAT, p1);
/* Initialize memory with the initial pattern. */
for (j=0; j<segs; j++) {
start = v->map[j].start;
end = v->map[j].end;
pe = (ulong *)start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
for (; p < pe;) {
*p = p1;
p++;
}
p = pe + 1;
} while (!done);
}
}
void bit_fade_chk(ulong p1, int me)
{
int j, done;
ulong *p, *pe, bad;
ulong *start,*end;
/* Make sure that nothing changed while sleeping */
for (j=0; j<segs; j++) {
start = v->map[j].start;
end = v->map[j].end;
pe = (ulong *)start;
p = start;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
for (; p < pe;) {
if ((bad=*p) != p1) {
error((ulong*)p, p1, bad);
}
p++;
}
p = pe + 1;
} while (!done);
}
}
/* Sleep for N seconds */
void sleep(long n, int flag, int me)
{
ulong sh, sl, l, h, t, ip=0;
/* save the starting time */
asm __volatile__(
"rdtsc":"=a" (sl),"=d" (sh));
/* loop for n seconds */
while (1) {
asm __volatile__(
"rep ; nop\n\t"
"rdtsc":"=a" (l),"=d" (h));
asm __volatile__ (
"subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (l), "=d" (h)
:"g" (sl), "g" (sh),
"0" (l), "1" (h)
:"cc");
t = h * ((unsigned)0xffffffff / v->clks_msec) / 1000;
t += (l / v->clks_msec) / 1000;
/* Is the time up? */
if (t >= n) {
break;
}
/* Only display elapsed time if flag is set */
if (flag == 0) {
continue;
}
if (t != ip) {
do_tick(me);
BAILR
ip = t;
}
}
}