blob: 84926e86c0f6dcd5044f329126a01de3834a033c [file] [log] [blame]
/* init.c - MemTest-86 Version 3.6
*
* Released under version 2 of the Gnu Public License.
* By Chris Brady
* ----------------------------------------------------
* MemTest86+ V1.11 Specific code (GPL V2.0)
* By Samuel DEMEULEMEESTER, sdemeule@memtest.org
* http://www.x86-secret.com - http://www.memtest.org
*/
#include "stddef.h"
#include "stdin.h"
#include "cpuid.h"
#include "test.h"
#include "defs.h"
#include "config.h"
#include "smp.h"
#include "io.h"
extern struct tseq tseq[];
extern short memsz_mode;
extern int num_cpus;
extern int found_cpus;
/* Here we store all of the cpuid data */
extern struct cpu_ident cpu_id;
int l1_cache=0, l2_cache=0, l3_cache=0;
int tsc_invariable = 0;
ulong extclock;
ulong memspeed(ulong src, ulong len, int iter);
static void cpu_type(void);
static int cpuspeed(void);
static void get_cache_size();
static void cpu_cache_speed();
void get_cpuid();
static void display_init(void)
{
int i;
volatile char *pp;
serial_echo_init();
serial_echo_print("INE_SCROLL;24r"); /* Set scroll area row 7-23 */
serial_echo_print(""); /* Clear Screen */
serial_echo_print("");
serial_echo_print("");
serial_echo_print("");
/* Clear screen & set background to blue */
for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) {
*pp++ = ' ';
*pp++ = 0x17;
}
/* Make the name background red */
for(i=0, pp=(char *)(SCREEN_ADR+1); i<TITLE_WIDTH; i++, pp+=2) {
*pp = 0x47;
}
cprint(0, 0, " Memtest-86 v4.0a ");
/* Do reverse video for the bottom display line */
for(i=0, pp=(char *)(SCREEN_ADR+1+(24 * 160)); i<80; i++, pp+=2) {
*pp = 0x71;
}
serial_echo_print("");
}
/*
* Initialize test, setup screen and find out how much memory there is.
*/
void init(void)
{
int i;
outb(0x8, 0x3f2); /* Kill Floppy Motor */
/* Turn on cache */
set_cache(1);
/* Setup the display */
display_init();
cprint(1, COL_MID,"Pass %");
cprint(2, COL_MID,"Test %");
cprint(3, COL_MID,"Test #");
cprint(4, COL_MID,"Testing: ");
cprint(5, COL_MID,"Pattern: ");
cprint(1, 0, "CPU Clk : ");
cprint(2, 0, "L1 Cache: Unknown ");
cprint(3, 0, "L2 Cache: Unknown ");
cprint(4, 0, "L3 Cache: None ");
cprint(5, 0, "Memory : ");
cprint(6, 0, "------------------------------------------------------------------------------");
cprint(7, 0, "CPU:");
cprint(8, 0, "State:");
cprint(7, 39, "| CPUs_Started: CPU_Select: All");
cprint(8, 39, "| CPUs_Active: CPUs_Found: ");
for (i = 0; i <num_cpus; i++) {
dprint(7, 2*i+7, i, 1, 0);
cprint(8, 2*i+7, "S");
}
dprint(7, 54, num_cpus, 2, 0);
dprint(8, 72, found_cpus, 2, 0);
cprint(9, 0, "------------------------------------------------------------------------------");
for(i=1; i < 6; i++) {
cprint(i, COL_MID-2, "| ");
}
cprint(LINE_INFO, 0,
"Time: 0:00:00 Iterations: Test_Sel: Std Pass: 0 Errors: 0");
footer();
aprint(5, 10, v->test_pages);
v->pass = 0;
v->msg_line = 0;
v->ecount = 0;
v->ecc_ecount = 0;
v->testsel = -1;
v->msg_line = LINE_SCROLL-1;
v->scroll_start = v->msg_line * 160;
v->erri.low_addr.page = 0x7fffffff;
v->erri.low_addr.offset = 0xfff;
v->erri.high_addr.page = 0;
v->erri.high_addr.offset = 0;
v->erri.min_bits = 32;
v->erri.max_bits = 0;
v->erri.min_bits = 32;
v->erri.max_bits = 0;
v->erri.maxl = 0;
v->erri.cor_err = 0;
v->erri.ebits = 0;
v->erri.hdr_flag = 0;
v->erri.tbits = 0;
for (i=0; tseq[i].msg != NULL; i++) {
tseq[i].errors = 0;
}
/* Get the cpu and cache information */
get_cpuid();
get_cache_size();
cpu_type();
cpu_cache_speed();
/* Record the start time */
asm __volatile__ ("rdtsc":"=a" (v->startl),"=d" (v->starth));
v->snapl = v->startl;
v->snaph = v->starth;
if (l1_cache == 0) { l1_cache = 66; }
if (l2_cache == 0) { l1_cache = 666; }
v->printmode=PRINTMODE_ADDRESSES;
v->numpatn=0;
}
/* Get cache sizes for most AMD and Intel CPUs, exceptions for old CPUs are
* handled in CPU detection */
void get_cache_size()
{
int i, j, n, size;
unsigned int v[4];
unsigned char *dp = (unsigned char *)v;
struct cpuid4_eax *eax = (struct cpuid4_eax *)&v[0];
struct cpuid4_ebx *ebx = (struct cpuid4_ebx *)&v[1];
struct cpuid4_ecx *ecx = (struct cpuid4_ecx *)&v[2];
switch(cpu_id.vend_id.char_array[0]) {
/* AMD Processors */
case 'A':
l1_cache = cpu_id.cache_info.amd.l1_i_sz;
l1_cache += cpu_id.cache_info.amd.l1_d_sz;
l2_cache = cpu_id.cache_info.amd.l2_sz;
l3_cache = cpu_id.cache_info.amd.l3_sz;
l3_cache *= 512;
break;
case 'G':
/* Intel Processors */
l1_cache = 0;
l2_cache = 0;
l3_cache = 0;
/* Use CPUID(4) if it is available */
if (cpu_id.max_cpuid > 3) {
/* figure out how many cache leaves */
n = -1;
do {
++n;
/* Do cpuid(4) loop to find out num_cache_leaves */
cpuid_count(4, n, &v[0], &v[1], &v[2], &v[3]);
} while ((eax->ctype) != 0);
/* loop through all of the leaves */
for (i=0; i<n; i++) {
cpuid_count(4, i, &v[0], &v[1], &v[2], &v[3]);
/* Check for a valid cache type */
if (eax->ctype > 0 && eax->ctype < 4) {
/* Compute the cache size */
size = (ecx->number_of_sets + 1) *
(ebx->coherency_line_size + 1) *
(ebx->physical_line_partition + 1) *
(ebx->ways_of_associativity + 1);
size /= 1024;
switch (eax->level) {
case 1:
l1_cache += size;
break;
case 2:
l2_cache += size;
break;
case 3:
l3_cache += size;
break;
}
}
}
return;
}
/* No CPUID(4) so we use the older CPUID(2) method */
/* Get number of times to iterate */
cpuid(2, &v[0], &v[1], &v[2], &v[3]);
n = v[0] & 0xff;
for (i=0 ; i<n ; i++) {
cpuid(2, &v[0], &v[1], &v[2], &v[3]);
/* If bit 31 is set, this is an unknown format */
for (j=0 ; j<3 ; j++) {
if (v[j] & (1 << 31)) {
v[j] = 0;
}
}
/* Byte 0 is level count, not a descriptor */
for (j = 1 ; j < 16 ; j++) {
switch(dp[j]) {
case 0x6:
case 0xa:
case 0x66:
l1_cache += 8;
break;
case 0x8:
case 0xc:
case 0xd:
case 0x60:
case 0x67:
l1_cache += 16;
break;
case 0xe:
l1_cache += 24;
break;
case 0x9:
case 0x2c:
case 0x30:
case 0x68:
l1_cache += 32;
break;
case 0x39:
case 0x3b:
case 0x41:
case 0x79:
l2_cache += 128;
break;
case 0x3a:
l2_cache += 192;
break;
case 0x21:
case 0x3c:
case 0x3f:
case 0x42:
case 0x7a:
case 0x82:
l2_cache += 256;
break;
case 0x3d:
l2_cache += 384;
break;
case 0x3e:
case 0x43:
case 0x7b:
case 0x7f:
case 0x80:
case 0x83:
case 0x86:
l2_cache += 512;
break;
case 0x44:
case 0x78:
case 0x7c:
case 0x84:
case 0x87:
l2_cache += 1024;
break;
case 0x45:
case 0x7d:
case 0x85:
l2_cache += 2048;
break;
case 0x48:
l2_cache += 3072;
break;
case 0x4e:
l2_cache += 6144;
break;
case 0x23:
case 0xd0:
l3_cache += 512;
break;
case 0xd1:
case 0xd6:
l3_cache += 1024;
break;
case 0x25:
case 0xd2:
case 0xd7:
case 0xdc:
case 0xe2:
l3_cache += 2048;
break;
case 0x29:
case 0x46:
case 0x49:
case 0xd8:
case 0xdd:
case 0xe3:
l3_cache += 4096;
break;
case 0x4a:
l3_cache += 6144;
break;
case 0x47:
case 0x4b:
case 0xde:
case 0xe4:
l3_cache += 8192;
break;
case 0x4c:
case 0xea:
l3_cache += 12288;
break;
case 0x4d:
l3_cache += 16384;
break;
case 0xeb:
l3_cache += 18432;
break;
case 0xec:
l3_cache += 24576;
break;
} /* end switch */
} /* end for 1-16 */
} /* end for 0 - n */
}
}
/*
* Find CPU type
*/
void cpu_type(void)
{
v->rdtsc = 0;
v->pae = 0;
/* See if we have pae support */
if (cpu_id.fid.bits.pae) {
v->pae = 1;
}
/* See if we have rdtsc nstruction support */
if (cpu_id.fid.bits.tsc) {
v->rdtsc = 1;
}
/* If we can get a brand string use it, and we are done */
if (cpu_id.max_cpuid >= 4) {
cprint(0, COL_MID, cpu_id.brand_id.char_array);
return;
}
/* The brand string is not available so we need to figure out
* CPU what we have */
switch(cpu_id.vend_id.char_array[0]) {
/* AMD Processors */
case 'A':
switch(cpu_id.vers.bits.family) {
case 4:
switch(cpu_id.vers.bits.model) {
case 3:
cprint(0, COL_MID, "AMD 486DX2");
break;
case 7:
cprint(0, COL_MID, "AMD 486DX2-WB");
break;
case 8:
cprint(0, COL_MID, "AMD 486DX4");
break;
case 9:
cprint(0, COL_MID, "AMD 486DX4-WB");
break;
case 14:
cprint(0, COL_MID, "AMD 5x86-WT");
break;
case 15:
cprint(0, COL_MID, "AMD 5x86-WB");
break;
}
/* Since we can't get CPU speed or cache info return */
return;
case 5:
switch(cpu_id.vers.bits.model) {
case 0:
case 1:
case 2:
case 3:
cprint(0, COL_MID, "AMD K5");
l1_cache = 8;
break;
case 6:
case 7:
cprint(0, COL_MID, "AMD K6");
break;
case 8:
cprint(0, COL_MID, "AMD K6-2");
break;
case 9:
cprint(0, COL_MID, "AMD K6-III");
break;
case 13:
cprint(0, COL_MID, "AMD K6-III+");
break;
}
break;
case 6:
switch(cpu_id.vers.bits.model) {
case 1:
cprint(0, COL_MID, "AMD Athlon (0.25)");
break;
case 2:
case 4:
cprint(0, COL_MID, "AMD Athlon (0.18)");
break;
case 6:
if (l2_cache == 64) {
cprint(0, COL_MID, "AMD Duron (0.18)");
} else {
cprint(0, COL_MID, "Athlon XP (0.18)");
}
break;
case 8:
case 10:
if (l2_cache == 64) {
cprint(0, COL_MID, "AMD Duron (0.13)");
} else {
cprint(0, COL_MID, "Athlon XP (0.13)");
}
break;
case 3:
case 7:
cprint(0, COL_MID, "AMD Duron");
/* Duron stepping 0 CPUID for L2 is broken */
/* (AMD errata T13)*/
if (cpu_id.vers.bits.stepping == 0) { /* stepping 0 */
/* Hard code the right L2 size */
l2_cache = 64;
} else {
}
break;
}
break;
/* All AMD family values >= 10 have the Brand ID
* feature so we don't need to find the CPU type */
}
break;
/* Intel or Transmeta Processors */
case 'G':
if ( cpu_id.vend_id.char_array[7] == 'T' ) { /* GenuineTMx86 */
if (cpu_id.vers.bits.family == 5) {
cprint(0, COL_MID, "TM 5x00");
} else if (cpu_id.vers.bits.family == 15) {
cprint(0, COL_MID, "TM 8x00");
}
l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
l2_cache = (cpu_id.cache_info.ch[11]*256) + cpu_id.cache_info.ch[10];
} else { /* GenuineIntel */
if (cpu_id.vers.bits.family == 4) {
switch(cpu_id.vers.bits.model) {
case 0:
case 1:
cprint(0, COL_MID, "Intel 486DX");
break;
case 2:
cprint(0, COL_MID, "Intel 486SX");
break;
case 3:
cprint(0, COL_MID, "Intel 486DX2");
break;
case 4:
cprint(0, COL_MID, "Intel 486SL");
break;
case 5:
cprint(0, COL_MID, "Intel 486SX2");
break;
case 7:
cprint(0, COL_MID, "Intel 486DX2-WB");
break;
case 8:
cprint(0, COL_MID, "Intel 486DX4");
break;
case 9:
cprint(0, COL_MID, "Intel 486DX4-WB");
break;
}
/* Since we can't get CPU speed or cache info return */
return;
}
switch(cpu_id.vers.bits.family) {
case 5:
switch(cpu_id.vers.bits.model) {
case 0:
case 1:
case 2:
case 3:
case 7:
cprint(0, COL_MID, "Pentium");
if (l1_cache == 0) {
l1_cache = 8;
}
break;
case 4:
case 8:
cprint(0, COL_MID, "Pentium-MMX");
if (l1_cache == 0) {
l1_cache = 16;
}
break;
}
break;
case 6:
switch(cpu_id.vers.bits.model) {
case 0:
case 1:
cprint(0, COL_MID, "Pentium Pro");
break;
case 3:
case 4:
cprint(0, COL_MID, "Pentium II");
break;
case 5:
if (l2_cache == 0) {
cprint(0, COL_MID, "Celeron");
} else {
cprint(0, COL_MID, "Pentium II");
}
break;
case 6:
if (l2_cache == 128) {
cprint(0, COL_MID, "Celeron");
} else {
cprint(0, COL_MID, "Pentium II");
}
}
break;
case 7:
case 8:
case 11:
if (l2_cache == 128) {
cprint(0, COL_MID, "Celeron");
} else {
cprint(0, COL_MID, "Pentium III");
}
break;
case 9:
if (l2_cache == 512) {
cprint(0, COL_MID, "Celeron M (0.13)");
} else {
cprint(0, COL_MID, "Pentium M (0.13)");
}
break;
case 10:
cprint(0, COL_MID, "Pentium III Xeon");
break;
case 12:
l1_cache = 24;
cprint(0, COL_MID, "Atom (0.045)");
break;
case 13:
if (l2_cache == 1024) {
cprint(0, COL_MID, "Celeron M (0.09)");
} else {
cprint(0, COL_MID, "Pentium M (0.09)");
}
break;
case 14:
cprint(0, COL_MID, "Intel Core");
break;
case 15:
if (l2_cache == 1024) {
cprint(0, COL_MID, "Pentium E");
} else {
cprint(0, COL_MID, "Intel Core 2");
}
break;
}
break;
case 15:
switch(cpu_id.vers.bits.model) {
case 0:
case 1:
case 2:
if (l2_cache == 128) {
cprint(0, COL_MID, "Celeron");
} else {
cprint(0, COL_MID, "Pentium 4");
}
break;
case 3:
case 4:
if (l2_cache == 256) {
cprint(0, COL_MID, "Celeron (0.09)");
} else {
cprint(0, COL_MID, "Pentium 4 (0.09)");
}
break;
case 6:
cprint(0, COL_MID, "Pentium D (65nm)");
break;
default:
cprint(0, COL_MID, "Unknown Intel");
break;
break;
}
}
break;
/* VIA/Cyrix/Centaur Processors with CPUID */
case 'C':
if ( cpu_id.vend_id.char_array[1] == 'e' ) { /* CentaurHauls */
l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
l2_cache = cpu_id.cache_info.ch[11];
switch(cpu_id.vers.bits.family){
case 5:
cprint(0, COL_MID, "Centaur 5x86");
break;
case 6: // VIA C3
switch(cpu_id.vers.bits.model){
default:
if (cpu_id.vers.bits.stepping < 8) {
cprint(0, COL_MID, "VIA C3 Samuel2");
} else {
cprint(0, COL_MID, "VIA C3 Eden");
}
break;
case 10:
cprint(0, COL_MID, "VIA C7 (C5J)");
l1_cache = 64;
l2_cache = 128;
break;
case 13:
cprint(0, COL_MID, "VIA C7 (C5R)");
l1_cache = 64;
l2_cache = 128;
break;
case 15:
cprint(0, COL_MID, "VIA Isaiah (CN)");
l1_cache = 64;
l2_cache = 128;
break;
}
}
} else { /* CyrixInstead */
switch(cpu_id.vers.bits.family) {
case 5:
switch(cpu_id.vers.bits.model) {
case 0:
cprint(0, COL_MID, "Cyrix 6x86MX/MII");
break;
case 4:
cprint(0, COL_MID, "Cyrix GXm");
break;
}
return;
case 6: // VIA C3
switch(cpu_id.vers.bits.model) {
case 6:
cprint(0, COL_MID, "Cyrix III");
break;
case 7:
if (cpu_id.vers.bits.stepping < 8) {
cprint(0, COL_MID, "VIA C3 Samuel2");
} else {
cprint(0, COL_MID, "VIA C3 Ezra-T");
}
break;
case 8:
cprint(0, COL_MID, "VIA C3 Ezra-T");
break;
case 9:
cprint(0, COL_MID, "VIA C3 Nehemiah");
break;
}
// L1 = L2 = 64 KB from Cyrix III to Nehemiah
l1_cache = 64;
l2_cache = 64;
break;
}
}
break;
/* Unknown processor */
default:
/* Make a guess at the family */
switch(cpu_id.vers.bits.family) {
case 5:
cprint(0, COL_MID, "586");
case 6:
cprint(0, COL_MID, "686");
default:
cprint(0, COL_MID, "Unidentified Processor");
}
}
}
#define STEST_ADDR 0x100000 /* Measure memory speed starting at 1MB */
/* Measure and display CPU and cache sizes and speeds */
void cpu_cache_speed()
{
int i, off = 10;
ulong speed;
/* Print CPU speed */
if ((speed = cpuspeed()) > 0) {
if (speed < 999499) {
speed += 50; /* for rounding */
cprint(1, off, " . MHz");
dprint(1, off+1, speed/1000, 3, 1);
dprint(1, off+5, (speed/100)%10, 1, 0);
} else {
speed += 500; /* for rounding */
cprint(1, off, " MHz");
dprint(1, off, speed/1000, 5, 0);
}
extclock = speed;
}
/* Print out L1 cache info */
/* To measure L1 cache speed we use a block size that is 1/4th */
/* of the total L1 cache size since half of it is for instructions */
if (l1_cache) {
cprint(2, 0, "L1 Cache: K ");
dprint(2, 11, l1_cache, 3, 0);
if ((speed=memspeed(STEST_ADDR, (l1_cache/4)*1024, 200))) {
cprint(2, 16, " MB/s");
dprint(2, 16, speed, 6, 0);
}
}
/* Print out L2 cache info */
/* We measure the L2 cache speed by using a block size that is */
/* the size of the L1 cache. We have to fudge if the L1 */
/* cache is bigger than the L2 */
if (l2_cache) {
cprint(3, 0, "L2 Cache: K ");
dprint(3, 10, l2_cache, 4, 0);
if (l2_cache < l1_cache) {
i = l1_cache / 4 + l2_cache / 4;
} else {
i = l1_cache;
}
if ((speed=memspeed(STEST_ADDR, i*1024, 200))) {
cprint(3, 16, " MB/s");
dprint(3, 16, speed, 6, 0);
}
}
/* Print out L3 cache info */
/* We measure the L3 cache speed by using a block size that is */
/* 2X the size of the L2 cache. */
if (l3_cache) {
cprint(4, 0, "L3 Cache: K ");
dprint(4, 10, l3_cache, 4, 0);
dprint(4, 10, l3_cache, 4, 0);
i = l2_cache*2;
if ((speed=memspeed(STEST_ADDR, i*1024, 150))) {
cprint(4, 16, " MB/s");
dprint(4, 16, speed, 6, 0);
}
}
}
/* Measure and display memory speed, multitasked using all CPUs */
ulong spd[MAX_CPUS];
void get_mem_speed(int me, int ncpus)
{
int i;
ulong speed=0;
ulong start, len;
/* Determine memory speed. To find the memory speed we use
* A block size that is the sum of all the L1, L2 & L3 caches
* in all cpus * 6 */
i = (l3_cache + l2_cache*ncpus + l1_cache*ncpus) * 6;
/* Make sure that we have enough memory to do the test */
/* If not use all we have */
if ((1 + (i * 2)) > (v->plim_upper << 2)) {
i = ((v->plim_upper <<2) - 1) / 2;
}
/* Divide up the memory block among the CPUs */
len = i * 1024 / ncpus;
start = STEST_ADDR + (len * me);
barrier();
spd[me] = memspeed(start, len, 50);
barrier();
if (me == 0) {
for (i=0; i<ncpus; i++) {
speed += spd[i];
}
cprint(5, 16, " MB/s");
dprint(5, 16, speed, 6, 0);
}
}
/* #define TICKS 5 * 11832 (count = 6376)*/
/* #define TICKS (65536 - 12752) */
#define TICKS 59659 /* 50 ms */
/* Returns CPU clock in khz */
ulong stlow, sthigh;
static int cpuspeed(void)
{
int loops;
ulong end_low, end_high;
if (v->rdtsc == 0 ) {
return(-1);
}
/* Setup timer */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
outb(0xb0, 0x43);
outb(TICKS & 0xff, 0x42);
outb(TICKS >> 8, 0x42);
asm __volatile__ ("rdtsc":"=a" (stlow),"=d" (sthigh));
loops = 0;
do {
loops++;
} while ((inb(0x61) & 0x20) == 0);
asm __volatile__ (
"rdtsc\n\t" \
"subl stlow,%%eax\n\t" \
"sbbl sthigh,%%edx\n\t" \
:"=a" (end_low), "=d" (end_high)
);
/* Make sure we have a credible result */
if (loops < 4 || end_low < 50000) {
return(-1);
}
v->clks_msec = end_low/50;
/*
if (tsc_invariable) end_low = correct_tsc(end_low);
*/
return(v->clks_msec);
}
/* Measure cache speed by copying a block of memory. */
/* Returned value is kbytes/second */
ulong memspeed(ulong src, ulong len, int iter)
{
int i;
ulong dst, wlen;
ulong st_low, st_high;
ulong end_low, end_high;
ulong cal_low, cal_high;
if (v->rdtsc == 0 ) {
return(-1);
}
if (len == 0) return(-2);
dst = src + len;
wlen = len / 4; /* Length is bytes */
/* Calibrate the overhead with a zero word copy */
asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
for (i=0; i<iter; i++) {
asm __volatile__ (
"movl %0,%%esi\n\t" \
"movl %1,%%edi\n\t" \
"movl %2,%%ecx\n\t" \
"cld\n\t" \
"rep\n\t" \
"movsl\n\t" \
:: "g" (src), "g" (dst), "g" (0)
: "esi", "edi", "ecx"
);
}
asm __volatile__ ("rdtsc":"=a" (cal_low),"=d" (cal_high));
/* Compute the overhead time */
asm __volatile__ (
"subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (cal_low), "=d" (cal_high)
:"g" (st_low), "g" (st_high),
"0" (cal_low), "1" (cal_high)
);
/* Now measure the speed */
/* Do the first copy to prime the cache */
asm __volatile__ (
"movl %0,%%esi\n\t" \
"movl %1,%%edi\n\t" \
"movl %2,%%ecx\n\t" \
"cld\n\t" \
"rep\n\t" \
"movsl\n\t" \
:: "g" (src), "g" (dst), "g" (wlen)
: "esi", "edi", "ecx"
);
asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
for (i=0; i<iter; i++) {
asm __volatile__ (
"movl %0,%%esi\n\t" \
"movl %1,%%edi\n\t" \
"movl %2,%%ecx\n\t" \
"cld\n\t" \
"rep\n\t" \
"movsl\n\t" \
:: "g" (src), "g" (dst), "g" (wlen)
: "esi", "edi", "ecx"
);
}
asm __volatile__ ("rdtsc":"=a" (end_low),"=d" (end_high));
/* Compute the elapsed time */
asm __volatile__ (
"subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (end_low), "=d" (end_high)
:"g" (st_low), "g" (st_high),
"0" (end_low), "1" (end_high)
);
/* Subtract the overhead time */
asm __volatile__ (
"subl %2,%0\n\t"
"sbbl %3,%1"
:"=a" (end_low), "=d" (end_high)
:"g" (cal_low), "g" (cal_high),
"0" (end_low), "1" (end_high)
);
/* Make sure that the result fits in 32 bits */
if (end_high) {
return(-3);
}
end_low /= 2;
/* Convert to clocks/KB */
end_low /= len;
end_low *= 1024;
end_low /= iter;
if (end_low == 0) {
return(-4);
}
/* Convert to kbytes/sec */
/*
if (tsc_invariable) end_low = correct_tsc(end_low);
*/
return((v->clks_msec)/end_low);
}
#define rdmsr(msr,val1,val2) \
__asm__ __volatile__("rdmsr" \
: "=a" (val1), "=d" (val2) \
: "c" (msr))
/*
ulong correct_tsc(ulong el_org)
{
float coef_now, coef_max;
int msr_lo, msr_hi, is_xe;
rdmsr(0x198, msr_lo, msr_hi);
is_xe = (msr_lo >> 31) & 0x1;
if(is_xe){
rdmsr(0x198, msr_lo, msr_hi);
coef_max = ((msr_hi >> 8) & 0x1F);
if ((msr_hi >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
} else {
rdmsr(0x17, msr_lo, msr_hi);
coef_max = ((msr_lo >> 8) & 0x1F);
if ((msr_lo >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
}
if((cpu_id.feature_flag >> 7) & 1) {
rdmsr(0x198, msr_lo, msr_hi);
coef_now = ((msr_lo >> 8) & 0x1F);
if ((msr_lo >> 14) & 0x1) { coef_now = coef_now + 0.5f; }
} else {
rdmsr(0x2A, msr_lo, msr_hi);
coef_now = (msr_lo >> 22) & 0x1F;
}
if(coef_max && coef_now) {
el_org = (ulong)(el_org * coef_now / coef_max);
}
return el_org;
}
*/