memtest source code

Original submission, untarred
http://www.memtest86.com/memtest86-4.0a.tar.gz.

BUG=chromium-os:26458
TEST=none
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..78bbdf6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,74 @@
+# Makefile for MemTest86+
+#
+# Author:		Chris Brady
+# Created:		January 1, 1996
+
+#
+# Path for the floppy disk device
+#
+FDISK=/dev/fd0
+
+AS=as -32
+CC=gcc
+
+CFLAGS= -Wall -march=i486 -m32 -O1 -fomit-frame-pointer -fno-builtin \
+	-ffreestanding -fPIC $(SMP_FL) -fno-stack-protector
+
+OBJS= head.o reloc.o main.o test.o init.o lib.o patn.o screen_buffer.o \
+      config.o linuxbios.o memsize.o error.o smp.o cpuid.o vmem.o random.o
+
+all: clean memtest.bin memtest
+
+# Link it statically once so I know I don't have undefined
+# symbols and then link it dynamically so I have full
+# relocation information
+memtest_shared: $(OBJS) memtest_shared.lds Makefile
+	$(LD) --warn-constructors --warn-common -static -T memtest_shared.lds \
+	-o $@ $(OBJS) && \
+	$(LD) -shared -Bsymbolic -T memtest_shared.lds -o $@ $(OBJS)
+
+memtest_shared.bin: memtest_shared
+	objcopy -O binary $< memtest_shared.bin
+
+memtest: memtest_shared.bin memtest.lds
+	$(LD) -s -T memtest.lds -b binary memtest_shared.bin -o $@
+
+head.s: head.S config.h defs.h test.h
+	$(CC) -E -traditional $< -o $@
+
+bootsect.s: bootsect.S config.h defs.h
+	$(CC) -E -traditional $< -o $@
+
+setup.s: setup.S config.h defs.h
+	$(CC) -E -traditional $< -o $@
+
+memtest.bin: memtest_shared.bin bootsect.o setup.o memtest.bin.lds
+	$(LD) -T memtest.bin.lds bootsect.o setup.o -b binary \
+	memtest_shared.bin -o memtest.bin
+
+reloc.o: reloc.c
+	$(CC) -c $(CFLAGS) -fno-strict-aliasing reloc.c
+
+test.o: test.c
+	$(CC) -c -Wall -march=i486 -m32 -O0 -fomit-frame-pointer -fno-builtin -ffreestanding test.c
+
+random.o: random.c
+	$(CC) -c -Wall -march=i486 -m32 -O3 -fomit-frame-pointer -fno-builtin -ffreestanding random.c
+
+clean:
+	rm -f *.o *.s *.iso memtest.bin memtest memtest_shared \
+		memtest_shared.bin memtest.iso
+
+iso:
+	make all
+	./makeiso.sh
+
+install: all
+	dd <memtest.bin >$(FDISK) bs=8192
+
+install-precomp:
+	dd <precomp.bin >$(FDISK) bs=8192
+	
+dos: all
+	cat mt86+_loader memtest.bin > memtest.exe
+
diff --git a/README b/README
new file mode 100644
index 0000000..f9df887
--- /dev/null
+++ b/README
@@ -0,0 +1,925 @@
+			====================
+			= MemTest-86 v4.0  =
+			=   28 Mar, 2011    =
+			=   Chris Brady    =
+			====================
+Table of Contents
+=================
+  1) Introduction
+  2) Licensing
+  3) Installation
+  4) Serial Port Console
+  5) Online Commands
+  6) Memory Sizing
+  7) Error Display
+  8) Trouble-shooting Memory Errors
+  9) Execution Time
+ 10) Memory Testing Philosophy
+ 11) Memtest86 Test Algorithms
+ 12) Individual Test Descriptions
+ 13) Problem Reporting - Contact Information
+ 14) Known Problems
+ 15) Planned Features List
+ 16) Change Log
+ 17) Acknowledgments
+
+
+1) Introduction
+===============
+Memtest86 is thorough, stand alone memory test for Intel/AMD x86 architecture
+systems.  BIOS based memory tests are only a quick check and often miss
+failures that are detected by Memtest86.
+
+For updates go to the Memtest86 web page:
+
+	http://www.memtest86.com
+
+
+2) Licensing
+============
+Memtest86 is released under the terms of the Gnu Public License (GPL). Other
+than the provisions of the GPL there are no restrictions for use, private or
+commercial.  See: http://www.gnu.org/licenses/gpl.html for details.
+
+
+3) Linux Installation
+============================
+Memtest86 is a stand alone program and can be loaded from either a disk
+partition or from a floppy disk.
+
+To build Memtest86:
+   1) Review the Makefile and adjust options as needed.
+   2) Type "make"
+
+This creates a file named "memtest.bin" which is a bootable image.  This
+image file may be copied to a floppy disk or may be loaded from a disk
+partition via Lilo or Grub image from a hard disk partition.
+
+  To create a Memtest86 bootdisk
+   1) Insert a blank write enabled floppy disk.
+   2) As root, Type "make install"
+
+  To boot from a disk partition via Grub
+   1) Copy the image file to a permanent location (ie. /boot/memtest.bin).
+   2) Add an entry in the Grub config file (/boot/grub/menu.lst) to boot
+      memtest86.  Only the title and kernel fields need to be specified. 
+      The following is a sample Grub entry for booting memtest86:
+
+	title Memtest86
+	    kernel (hd0,0)/memtest.bin
+
+  To boot from a disk partition via Lilo
+   1) Copy the image file to a permanent location (ie. /boot/memtest.bin).
+   2) Add an entry in the lilo config file (usually /etc/lilo.conf) to boot
+      memtest86.  Only the image and label fields need to be specified.
+      The following is a sample Lilo entry for booting memtest86:
+
+        image = /boot/memtest.bin
+        label = memtest86
+
+   3) As root,  type "lilo"
+
+If you encounter build problems a binary image has been included (precomp.bin).
+To create a boot-disk with this pre-built image do the following:
+   1) Insert a blank write enabled floppy disk.
+   2) Type "make install-precomp"
+
+
+4) Serial Console
+=================
+Memtest86 can be used on PC's equipped with a serial port for the console.
+By default serial port console support is not enabled since it slows
+down testing.  To enable change the SERIAL_CONSOLE_DEFAULT define in
+config.h from a zero to a one.  The serial console baud rate may also
+be set in config.h with the SERIAL_BAUD_RATE define.  The other serial
+port settings are no parity, 8 data bits, 1 stop bit.  All of the features
+used by memtest86 are accessible via the serial console.  However, the
+screen sometimes is garbled when the online commands are used.
+
+
+5) Online Commands
+==================
+Memtest86 has a limited number of online commands.  Online commands
+provide control over caching, test selection, address range and error
+scrolling.  A help bar is displayed at the bottom of the screen listing
+the available on-line commands. 
+
+  Command  Description
+
+  ESC   Exits the test and does a warm restart via the BIOS.
+
+  c     Enters test configuration menu
+	    Menu options are:
+               1) Test selection
+	       2) Address Range
+	       3) Error Report Mode
+	       4) CPU Selection Mode
+	       5) Refresh Screen 
+
+  SP    Set scroll lock (Stops scrolling of error messages)
+	Note: Testing is stalled when the scroll lock is
+	set and the scroll region is full.
+
+  CR    Clear scroll lock (Enables error message scrolling)
+
+
+6) Error Information
+======================
+Memtest has three options for reporting errors.  The default is an an error
+summary that displays the most relevant error information. The second option
+is reporting of individual errors.  In BadRAM Patterns mode patterns are
+created for use with the Linux BadRAM feature. This slick feature allows
+Linux to avoid bad memory pages. Details about the BadRAM feature can be
+found at:
+
+	http://home.zonnet.nl/vanrein/badram
+
+The error summary mode displays the following information:
+
+  Error Confidence Value:
+     A value that indicates the validity of the errors being reported with
+     larger values indicating greater validity. There is a high probability
+     that all errors reported are valid regardless of this value.  However,
+     when this value exceeds 100 it is nearly impossible that the reported
+     errors will be invalid.
+
+  Lowest Error Address:
+     The lowest address that where an error has been reported.
+
+  Highest Error Address:
+     The highest address that where an error has been reported.
+
+  Bits in Error Mask:
+     A mask of all bits that have been in error (hexadecimal).
+
+  Bits in Error:
+     Total bit in error for all error instances and the min, max and average
+     bit in error of each individual occurrence.
+
+  Max Contiguous Errors:
+     The maximum of contiguous addresses with errors.
+
+  ECC Correctable Errors:
+     The number of errors that have been corrected by ECC hardware.
+
+  Test  Errors:
+     On the right hand side of the screen the number of errors for each test
+     are displayed.
+
+For individual errors the following information is displayed when a memory
+error is detected.  An error message is only displayed for errors with a
+different address or failing bit pattern.  All displayed values are in
+hexadecimal.
+
+  Tst:			Test number
+  Failing Address:	Failing memory address 
+  Good:			Expected data pattern 
+  Bad:			Failing data pattern 
+  Err-Bits:		Exclusive or of good and bad data (this shows the
+			position of the failing bit(s))
+  Count:		Number of consecutive errors with the same address
+			and failing bits
+  CPU:			CPU that detected the error
+
+In BadRAM Patterns mode, Lines are printed in a form badram=F1,M1,F2,M2.
+In each F/M pair, the F represents a fault address, and the corresponding M
+is a bitmask for that address. These patterns state that faults have
+occurred in addresses that equal F on all "1" bits in M. Such a pattern may
+capture more errors that actually exist, but at least all the errors are
+captured. These patterns have been designed to capture regular patterns of
+errors caused by the hardware structure in a terse syntax.
+
+The BadRAM patterns are `grown' increment-ally rather than `designed' from an
+overview of all errors. The number of pairs is constrained to five for a
+number of practical reasons. As a result, handcrafting patterns from the
+output in address printing mode may, in exceptional cases, yield better
+results.
+
+
+7) Trouble-shooting Memory Errors
+================================
+Please be aware that not all errors reported by Memtest86 are due to
+bad memory. The test implicitly tests the CPU, L1 and L2 caches as well as
+the motherboard.  It is impossible for the test to determine what causes
+the failure to occur.  Most failures will be due to a problem with memory.
+When it is not, the only option is to replace parts until the failure is
+corrected.  
+
+Once a memory error has been detected, determining the failing
+module is not a clear cut procedure.  With the large number of motherboard
+vendors and possible combinations of simm slots it would be difficult if
+not impossible to assemble complete information about how a particular
+error would map to a failing memory module.  However, there are steps
+that may be taken to determine the failing module.  Here are three
+techniques that you may wish to use:
+
+1) Removing modules
+This is simplest method for isolating a failing modules, but may only be
+employed when one or more modules can be removed from the system.  By
+selectively removing modules from the system and then running the test
+you will be able to find the bad module(s).  Be sure to note exactly which
+modules are in the system when the test passes and when the test fails.
+
+2) Rotating modules
+When none of the modules can be removed then you may wish to rotate modules
+to find the failing one.  This technique can only be used if there are
+three or more modules in the system.  Change the location of two modules
+at a time.  For example put the module from slot 1 into slot 2 and put
+the module from slot 2 in slot 1.  Run the test and if either the failing
+bit or address changes then you know that the failing module is one of the
+ones just moved. By using several combinations of module movement you
+should be able to determine which module is failing.
+
+3) Replacing modules
+If you are unable to use either of the previous techniques then you are
+left to selective replacement of modules to find the failure.  
+
+4) Avoiding allocation
+The printing mode for BadRAM patterns is intended to construct boot time
+parameters for a Linux kernel that is compiled with BadRAM support. This
+work-around makes it possible for Linux to reliably run on defective
+RAM.  For more information on BadRAM support
+for Linux, sail to
+
+       http://home.zonnet.nl/vanrein/badram
+
+Sometimes memory errors show up due to component incompatibility.  A memory
+module may work fine in one system and not in another.  This is not
+uncommon and is a source of confusion.  The components are not necessarily
+bad but certain combinations may need to be avoided.
+
+I am often asked about the reliability of errors reported by Mestest86.
+In the vast majority of cases errors reported by the test are valid.
+There are some systems that cause Memtest86 to be confused about the size of
+memory and it will try to test non-existent memory.  This will cause a large
+number of consecutive addresses to be reported as bad and generally there
+will be many bits in error.  If you have a relatively small number of
+failing addresses and only one or two bits in error you can be certain
+that the errors are valid.  Also intermittent errors are always valid.
+
+All valid memory errors should be corrected.  It is possible that a
+particular error will never show up in normal operation. However, operating
+with marginal memory is risky and can result in data loss and even
+disk corruption.  You can be sure that Murphy will get you if you know
+about a memory error and ignore it.
+
+Memtest86 can not diagnose many types of PC failures.  For example a
+faulty CPU that causes Windows to crash will most likely just cause
+Memtest86 to crash in the same way.
+
+
+8) Execution Time
+==================
+The time required for a complete pass of Memtest86 will vary greatly
+depending on CPU speed, memory speed and memory size. Memtest86 executes 
+indefinitely.  The pass counter increments each time that all of the 
+selected tests have been run.  Generally a single pass is sufficient to 
+catch all but the most obscure errors. However, for complete confidence 
+when intermittent errors are suspected testing for a longer period is advised.
+
+9) Memory Testing Philosophy
+=============================
+There are many good approaches for testing memory.  However, many tests
+simply throw some patterns at memory without much thought or knowledge
+of memory architecture or how errors can best be detected. This
+works fine for hard memory failures but does little to find intermittent
+errors. BIOS based memory tests are useless for finding intermittent
+memory errors.
+
+Memory chips consist of a large array of tightly packed memory cells,
+one for each bit of data.  The vast majority of the intermittent failures
+are a result of interaction between these memory cells.  Often writing a
+memory cell can cause one of the adjacent cells to be written with the
+same data. An effective memory test attempts to test for this
+condition. Therefore, an ideal strategy for testing memory would be
+the following:
+
+  1) write a cell with a zero
+  2) write all of the adjacent cells with a one, one or more times
+  3) check that the first cell still has a zero
+
+It should be obvious that this strategy requires an exact knowledge
+of how the memory cells are laid out on the chip.  In addition there is a
+never ending number of possible chip layouts for different chip types
+and manufacturers making this strategy impractical.  However, there
+are testing algorithms that can approximate this ideal strategy. 
+
+
+11) Memtest86 Test Algorithms
+=============================
+Memtest86 uses two algorithms that provide a reasonable approximation
+of the ideal test strategy above.  The first of these strategies is called
+moving inversions.  The moving inversion test works as follows:
+
+  1) Fill memory with a pattern
+  2) Starting at the lowest address
+	2a check that the pattern has not changed
+	2b write the patterns complement
+	2c increment the address
+	repeat 2a - 2c
+  3) Starting at the highest address
+	3a check that the pattern has not changed
+	3b write the patterns complement
+	3c decrement the address
+	repeat 3a - 3c
+
+This algorithm is a good approximation of an ideal memory test but
+there are some limitations.  Most high density chips today store data
+4 to 16 bits wide.  With chips that are more than one bit wide it
+is impossible to selectively read or write just one bit.  This means
+that we cannot guarantee that all adjacent cells have been tested
+for interaction.  In this case the best we can do is to use some
+patterns to insure that all adjacent cells have at least been written
+with all possible one and zero combinations.
+
+It can also be seen that caching, buffering and out of order execution
+will interfere with the moving inversions algorithm and make less effective.
+It is possible to turn off cache but the memory buffering in new high
+performance chips can not be disabled.  To address this limitation a new
+algorithm I call Modulo-X was created.  This algorithm is not affected by
+cache or buffering.  The algorithm works as follows:
+  1) For starting offsets of 0 - 20 do
+	1a write every 20th location with a pattern
+	1b write all other locations with the patterns complement
+	   repeat 1b one or more times
+	1c check every 20th location for the pattern
+
+This algorithm accomplishes nearly the same level of adjacency testing
+as moving inversions but is not affected by caching or buffering.  Since
+separate write passes (1a, 1b) and the read pass (1c) are done for all of
+memory we can be assured that all of the buffers and cache have been
+flushed between passes.  The selection of 20 as the stride size was somewhat
+arbitrary.  Larger strides may be more effective but would take longer to
+execute.  The choice of 20 seemed to be a reasonable compromise between
+speed and thoroughness.
+
+
+11) Individual Test Descriptions
+================================
+Memtest86 executes a series of numbered test sections to check for
+errors.  These test sections consist of a combination of test
+algorithm, data pattern and caching. The execution order for these tests
+were arranged so that errors will be detected as rapidly as possible.
+A description of each of the test sections follows:
+
+Test 0 [Address test, walking ones, no cache]
+  Tests all address bits in all memory banks by using a walking ones
+  address pattern.  Errors from this test are not used to calculate
+  BadRAM patterns.
+
+Test 1 [Address test, own address Sequential]
+  Each address is written with its own address and then is checked
+  for consistency.  In theory previous tests should have caught any
+  memory addressing problems.  This test should catch any addressing
+  errors that somehow were not previously detected. This test is done
+  sequentially with each available CPU.
+
+Test 2 [Address test, own address Parallel]
+  Same as test 1 but the testing is done in parallel using all CPUs using
+  overlapping addresses. 
+
+Test 3 [Moving inversions, ones&zeros Sequential]
+  This test uses the moving inversions algorithm with patterns of all
+  ones and zeros.  Cache is enabled even though it interferes to some
+  degree with the test algorithm.  With cache enabled this test does not
+  take long and should quickly find all "hard" errors and some more
+  subtle errors.  This test is done sequentially with each available CPU.
+
+Test 4 [Moving inversions, ones&zeros Parallel]
+  Same as test 3 but the testing is done in parallel using all CPUs.
+ 
+Test 5 [Moving inversions, 8 bit pat]
+  This is the same as test 4 but uses a 8 bit wide pattern of
+  "walking" ones and zeros.  This test will better detect subtle errors
+  in "wide" memory chips.  A total of 20 data patterns are used.
+
+Test 6 [Moving inversions, random pattern]
+  Test 6 uses the same algorithm as test 4 but the data pattern is a
+  random number and it's complement. This test is particularly effective
+  in finding difficult to detect data sensitive errors. 
+  The random number sequence is different with each pass 
+  so multiple passes increase effectiveness. 
+  
+Test 7 [Block move, 64 moves]
+  This test stresses memory by using block move (movsl) instructions
+  and is based on Robert Redelmeier's burnBX test.  Memory is initialized
+  with shifting patterns that are inverted every 8 bytes.  Then 4MB blocks
+  of memory are moved around using the movsl instruction.  After the moves
+  are completed the data patterns are checked.  Because the data is checked
+  only after the memory moves are completed it is not possible to know
+  where the error occurred.  The addresses reported are only for where the
+  bad pattern was found.  Since the moves are constrained to a 8MB segment
+  of memory the failing address will always be lest than 8MB away from the
+  reported address.  Errors from this test are not used to calculate
+  BadRAM patterns.
+
+Test 8 [Moving inversions, 32 bit pat]
+  This is a variation of the moving inversions algorithm that shifts the data
+  pattern left one bit for each successive address. The starting bit position
+  is shifted left for each pass. To use all possible data patterns 32 passes
+  are required.  This test is quite effective at detecting data sensitive
+  errors but the execution time is long.
+ 
+Test 9 [Random number sequence]
+ This test writes a series of random numbers into memory. By resetting the
+ seed for the random number the same sequence of number can be created for
+ a reference. The initial pattern is checked and then complemented and
+ checked again on the next pass. However, unlike the moving inversions test
+ writing and checking can only be done in the forward direction.
+
+Test 10 [Modulo 20, random pattern]
+  Using the Modulo-X algorithm should uncover errors that are not
+  detected by moving inversions due to cache and buffering interference
+  with the the algorithm.  A 32 bit random pattern is used.
+
+Test 11 [Bit fade test, 2 patterns]
+  The bit fade test initializes all of memory with a pattern and then
+  sleeps for 5 minutes. Then memory is examined to see if any memory bits
+  have changed. All ones and all zero patterns are used.
+
+
+12) Problem Reporting - Contact Information
+===========================================
+Due to the growing popularity of Memtest86 (more than 200,000 downloads per
+month) I have been inundated by, questions, feedback, problem reports and
+requests for enhancements. I simply do not have time to respond to ANY Memtest86
+emails. Bug reports and suggestions are welcome but will typically not be
+responded to.
+
+*** NOTE: *** The Keyword MEM86 must appear in the subject of all emails or
+the message will be automaticly deleted before it gets to me. This thanks to
+spam and viruses!
+
+Problems/Bugs:
+Before submitting a problem report please check the Known Problems section
+to see if this problem has already been reported.  Be sure to include the
+version number and also any details that may be relevant.
+
+Chris Brady, Email: bugs@memtest86.com
+
+With some PC's Memtest86 will just die with no hints as to what went wrong.
+Without any details it is impossible to fix these failures.  Fixing these
+problems will require debugging on your part. There is no point in reporting
+these failures unless you have a Linux system and would be willing to debug
+the failure.
+
+Enhancements:
+If you would like to request an enhancement please see if is already on
+the Planned Features List before sending your request.  All requests will
+be considered, but not all can be implemented.  If you are be interested in
+contributing code please contact me so that the integration can be
+co-ordinated.
+
+Chris Brady, Email: enhance@memtest86.com
+
+Questions:
+Unfortunately, I do not have time to respond to any questions or provide
+assistance with troubleshooting problems. Please read the Troubleshooting
+and Known Problems sections for assistance with problems. These sections have
+the answers for the questions that I have answers to. If there is not an
+answer for your problem in these sections it is probably not something I can
+help you with.
+
+
+15) Known Problems
+==================
+Sometimes when booting from a floppy disk the following messages scroll up
+on the screen:
+        X:8000
+        AX:0212
+        BX:8600
+        CX:0201
+        DX:0000
+This the BIOS reporting floppy disk read errors.  Either re-write or toss
+the floppy disk.
+
+Memtest86 can not diagnose many types of PC failures.  For example a
+faulty CPU that causes Windows to crash will most likely just cause
+Memtest86 to crash in the same way.
+
+There have been numerous reports of errors in only the block move test. Often
+the memory works in a different system or the vendor insists that it is good.
+In these cases the memory is not necessarily bad but is not able to operate
+reliably high speeds.  Sometimes more conservative memory timings on the
+motherboard will correct these errors.  In other cases the only option is to
+replace the memory with better quality, higher speed memory.  Don't buy cheap
+memory and expect it to work at full speed.
+
+Memtest86 supports all types of memory.  If fact the test has absolutely
+no knowledge of the memory type nor does it need to.  This not a problem
+or bug but is listed here due to the many questions I get about this issue.
+
+Changes in the compiler and loader have caused problems with
+Memtest86 resulting in both build failures and errors in execution.  A
+binary image (precomp.bin) of the test is included and may be used if
+problems are encountered.
+
+
+15) Planned Features List
+=========================
+This is a list of enhancements planned for future releases of Memtest86.
+There is no timetable for when these will be implemented.
+
+  - Testing in 64 bit mode with 64 data patterns
+  - Support for reporting ECC errors was removed in the 4.0 release. A
+    simplified implementation of ECC reporting is planned for a future release.
+
+
+16) Change Log
+==============
+Enhancements in v4.0 (28/Mar/2011)
+
+  Full support for testing with multiple CPUs. All tests except for #11 (Bit
+  Fade) have been multi-threaded. A maximum of 16 CPUs will be used for testing.
+
+  CPU detection has been completely re-written to use the brand ID string
+  rather than the cumbersome, difficult to maintain and often out of date
+  CPUID family information. All new processors will now be correctly
+  identified without requiring code support. 
+
+  All code related to controller identification, PCI and DMI has been removed.
+  This may be a controversial decision and was not made lightly. The following
+  are justifications for the decision:
+
+    1. Controller identification has nothing to do with actual testing of
+       memory, the core purpose of Memtest86. 
+    2. This code needed to be updated with every new chipset. With the ever
+       growing number of chipsets it is not possible to keep up with the
+       changes. The result is that new chipsets were more often than not
+       reported in-correctly. In the authors opinion incorrect information is
+       worse than no information.
+    3. Probing for chipset information carries the risk of making the program
+       crash. 
+    4. The amount of code involved with controller identification was quite
+       large, making support more difficult.
+
+  Removing this code also had the unfortunate effect of removing reporting of
+  correctable ECC errors. The code to support ECC was hopelessly intertwined
+  the controller identification code. A fresh, streamlined implementation of
+  ECC reporting is planned for a future release.
+    
+  A surprising number of conditions existed that potentially cause problems
+  when testing more than 4 GB of memory. Most if not all of these conditions
+  have been identified and corrected.
+
+  A number of cases were corrected where not all of memory was being tested.
+  For most tests the last word of each test block was not tested. In addition
+  an error in the paging code was fixed that omitted from testing the last 256
+  bytes of each block above 2 GB.
+
+  The information display has been simplified and a number of details that were
+  not relevant to testing were removed.
+
+  Memory speed reporting has been parallelized for more accurate reporting for
+  multi channel memory controllers.
+
+  This is a major re-write of the Memtest86 with a large number of minor
+  bug-fixes and substantial cleanup and re-organization of the code. 
+
+
+Enhancements in v3.5 (3/Jan/2008)
+
+  Limited support for execution with multiple CPUs. CPUs are selected
+  round-robin or sequential for each test.
+
+  Support for additional chipsets. (from Memtest86+ v2.11).
+
+  Additions and corrections for CPU detection including reporting of L3 cache.
+
+  Reworked information display for better readability and new information.
+
+  Abbreviated iterations for first pass.
+
+  Enhancements to memory sizing.
+
+  Misc fixes.
+
+Enhancements in v3.4 (2/Aug/2007)
+
+  A new error summary display with error confidence analysis.
+
+  Support for additional chipsets. (from Memtest86+ v1.70).
+
+  Additions and corrections for CPU detection.
+
+  Support for memory module information reporting.
+
+  Misc bug fixes.
+
+Enhancements in v3.3 (12/Jan/2007)
+
+  Added support for additional chipsets. (from Memtest86+ v1.60)
+
+  Changed Modulo 20 test (#8) to use a more effective random pattern rather
+  than simple ones and zeros.
+
+  Fixed a bug that prevented testing of low memory.
+
+  Added an advanced menu option to display SPD info (only for selected
+  chipsets).
+
+  Updated CPU detection for new CPUs and corrected some bugs.
+
+  Reworked online command text for better clarity.
+
+  Added a fix to correct a Badram pattern bug.
+
+
+Enhancements in v3.2 (11/Nov/2004)
+
+  Added two new, highly effective tests that use random number patterns
+  (tests 4 and 6).
+
+  Reworked the online commands:
+	- Changed wording for better clarity
+	- Dropped Cache Mode menu
+
+  Updated CPU detection for newer AMD, Intel and Cyrix CPUs.
+
+  Reworked test sequence:
+	- Dropped ineffective non cached tests (Numbers 7-11) 
+	- Changed cache mode to "cached" for test 2 
+	- Fixed bug that did not allow some tests to be skipped
+	- Added bailout for Bit fade test
+
+  Error reports are highlighted in red to provide a more vivid error 
+  indication.
+	    
+  Added support for a large number of additional chipsets. (from Memtest86+
+  v1.30)
+
+  Added an advanced setup feature that with new chiset allows memory timings
+  to be altered from inside Memtest86. (from Memtest86+ v1.30)
+
+
+Enhancements in v3.1 (11/Mar/2004)
+
+   Added processor detection for newer AMD processors.
+
+   Added new "Bit Fade" extended test.
+
+   Fixed a compile time bug with gcc version 3.x.
+
+   E7500 memory controller ECC support
+
+   Added support for 16bit ECC syndromes
+
+   Option to keep the serial port baud rate of the boot loader
+
+
+Enhancements in v3.0 (22/May/2002) Provided by Eric Biederman
+
+   Testing of more than 2gb of memory is at last fixed (tested with 6Gb)
+
+   The infrastructure is to poll ecc error reporting chipset regisets,
+   and the support has been done for some chipsets.
+
+   Uses dynamic relocation information records to make itself PIC
+   instead of requiring 2 copies of memtest86 in the binary.
+
+   The serial console code does not do redundant writes to the serial port
+   Very little slow down at 9600 baud.
+
+   You can press ^l or just l to get a screen refresh, when you are
+   connecting and UN-connecting a serial cable.
+
+   Net-booting is working again
+
+   Linux-BIOS support (To get the memory size)
+
+   Many bug-fixes and code cleanup.
+
+Enhancements in v2.9 (29/Feb/2002)
+
+   The memory sizing code has been completely rewritten.  By default
+   Memtest86 gets a memory map from the BIOS that is now used to find 
+   available memory. A new online configuration option provides three
+   choices for how memory will be sized, including the old "probe" method.
+   The default mode generally will not test all of memory, but should be more
+   stable. See the "Memory Sizing" section for details.
+
+   Testing of more than 2gb of memory should now work.  A number of bugs
+   were found and corrected that prevented testing above 2gb.  Testing
+   with more than 2gb has been limited and there could be problems with a
+   full 4gb of memory.
+
+   Memory is divided into segments for testing.  This allow for frequent
+   progress updates and responsiveness to interactive commands.  The
+   memory segment size has been increased from 8 to 32mb.  This should
+   improve testing effectiveness but progress reports will be less frequent.
+
+   Minor bug fixes.
+
+Enhancements in v2.8 (18/Oct/2001)
+   Eric Biederman reworked the build process making it far simpler and also
+   to produce a network bootable ELF image.
+
+   Re-wrote the memory and cache speed detection code.  Previously the
+   reported numbers were inaccurate for Intel CPU's and completely wrong
+   for Athlon/Duron CPU's.
+
+   By default the serial console is disabled since this was slowing
+   down testing.
+
+   Added CPU detection for Pentium 4.
+
+   
+Enhancements in v2.7 (12/Jul/2001)
+   Expanded workaround for errors caused by BIOS USB keyboard support to
+   include test #5.
+
+   Re-worked L1 / L2 cache detection code to provide clearer reporting.
+
+   Fixed an obvious bug in the computation of cache and memory speeds.
+
+   Changed on-line menu to stay in the menu between option selections.
+
+   Fixed bugs in the test restart and redraw code.
+
+   Adjusted code size to fix compilation problems with RedHat 7.1.
+
+   Misc updates to the documentation.
+
+Enhancements in v2.6 (25/May/2001)
+   Added workaround for errors caused by BIOS USB keyboard support.
+
+   Fixed problems with reporting of 1 GHZ + processor speeds.
+
+   Fixed Duron cache detection.
+
+   Added screen buffer so that menus will work correctly from a serial
+   console.
+
+   The Memtest86 image is now built in ELF format.
+
+Enhancements in v2.5 (14/Dec/00)
+   Enhanced CPU and cache detection to correctly identify Duron CPU
+   and K6-III 1MB cache.
+
+   Added code to report cache-able memory size.
+
+   Added limited support for parity memory.
+
+   Support was added to allow use of on-line commands from a serial
+   port.
+
+   Dropped option for changing refresh rates.  This was not useful
+   and did not work on newer motherboards.
+
+   Improved fatal exception reporting to include a register and stack
+   dump.
+
+   The pass number is now displayed in the error report.
+
+   Fixed a bug that crashed the test when selecting one of the extended
+   tests.
+
+Enhancements in v2.4
+   The error report format was reworked for better clarity and now
+   includes a decimal address in megabytes.
+
+   A new memory move test was added (from Robert Redelmeier's CPU-Burn)
+
+   The test sequence and iterations were modified.
+
+   Fixed scrolling problems with the BadRAM patterns.
+
+
+Enhancements in v2.3
+   A progress meter was added to replace the spinner and dots.
+
+   Measurement and reporting of memory and cache performance  
+   was added.
+
+   Support for creating BadRAM patterns was added.
+
+   All of the test routines were rewritten in assembler to
+   improve both test performance and speed.
+
+   The screen layout was reworked to hopefully be more readable.
+
+   An error summary option was added to the online commands.
+
+
+Enhancements in v2.2
+   Added two new address tests
+
+   Added an on-line command for setting test address range
+
+   Optimized test code for faster execution (-O3, -funroll-loops and
+	-fomit-frame-pointer)
+
+   Added and elapsed time counter.
+
+   Adjusted menu options for better consistency
+
+
+Enhancements in v2.1
+   Fixed a bug in the CPU detection that caused the test to
+   hang or crash with some 486 and Cryrix CPU's
+
+   Added CPU detection for Cyrix CPU's
+
+   Extended and improved CPU detection for Intel and AMD CPU's
+
+   Added a compile time option (BIOS_MEMSZ) for obtaining the last
+   memory address from the BIOS.  This should fix problems with memory
+   sizing on certain motherboards.  This option is not enabled by default.
+   It may be enabled be default in a future release.
+
+Enhancements in v2.0
+   Added new Modulo-20 test algorithm.
+
+   Added a 32 bit shifting pattern to the moving inversions algorithm.
+
+   Created test sections to specify algorithm, pattern and caching.
+
+   Improved test progress indicators.
+
+   Created  popup menus for configuration.
+
+   Added menu for test selection.
+
+   Added CPU and cache identification.
+
+   Added a "bail out" feature to quit the current test when it does not
+   fit the test selection parameters.
+
+   Re-arranged the screen layout and colors.
+
+   Created local include files for I/O and serial interface definitions
+   rather than using the sometimes incompatible system include files. 
+
+   Broke up the "C" source code into four separate source modules.
+
+Enhancements in v1.5
+   Some additional changes were made to fix obscure memory sizing
+   problems.
+
+   The 4 bit wide data pattern was increased to 8 bits since 8 bit
+   wide memory chips are becoming more common.
+
+   A new test algorithm was added to improve detection of data
+   pattern sensitive errors. 
+
+
+Enhancements in v1.4
+   Changes to the memory sizing code to avoid problems with some
+   motherboards where memtest would find more memory than actually
+   exists.
+
+   Added support for a console serial port. (thanks to Doug Sisk)
+
+   On-line commands are now available for configuring Memtest86 on
+   the fly (see On-line Commands).
+	
+
+Enhancements in v1.3
+   Scrolling of memory errors is now provided.  Previously, only one screen
+   of error information was displayed.
+
+   Memtest86 can now be booted from any disk via lilo.
+
+   Testing of up to 4gb of memory has been fixed is now enabled by default.
+   This capability was clearly broken in v1.2a and should work correctly
+   now but has not been fully tested (4gb PC's are a bit rare).
+
+   The maximum memory size supported by the motherboard is now being
+   calculated correctly.  In previous versions there were cases where not
+   all of memory would be tested and the maximum memory size supported
+   was incorrect.
+
+   For some types of failures the good and bad values were reported to be
+   same with an Xor value of 0.  This has been fixed by retaining the data
+   read from memory and not re-reading the bad data in the error reporting
+   routine.
+
+   APM (advanced power management) is now disabled by Memtest86.  This
+   keeps the screen from blanking while the test is running.
+
+   Problems with enabling & disabling cache on some motherboards have been
+   corrected.
+
+
+17) Acknowledgments
+===================
+Memtest86 was developed by Chris Brady with the resources and assistance
+listed below:
+
+- The initial versions of the source files bootsect.S, setup.S, head.S and
+  build.c are from the Linux 1.2.1 kernel and have been heavily modified.
+
+- Doug Sisk provided code to support a console connected via a serial port.
+
+- Code to create BadRAM patterns was provided by Rick van Rein.
+
+- Tests 5 and 8 are based on Robert Redelmeier's burnBX test.
+
+- Screen buffer code was provided by Jani Averbach.
+
+- Eric Biederman provided all of the feature content for version 3.0
+  plus many bugfixes and significant code cleanup.
+
+- Major enhancements to hardware detection and reporting in version 3.2,
+  3.3 pnd 3.4 rovided by Samuel Demeulemeester (from Memtest86+ v1.11, v1.60
+  and v1.70).
diff --git a/README.background b/README.background
new file mode 100644
index 0000000..9c35250
--- /dev/null
+++ b/README.background
@@ -0,0 +1,156 @@
+                       The Anatomy & Physiology of Memtest86-SMP
+                       -----------------------------------------
+
+1. Binary layout
+                                            
+       ---------------------------------------------------------------
+       | bootsect.o      | setup.o          | head.o memtest_shared  |
+       ---------------------------------------------------------------
+Labels                                _start<-------memtest---------->_end
+       -----------------------------------------------------------
+addr   0               512        512+4*512 |
+       -----------------------------------------------------------
+
+2. The following steps occur after we power on.
+   a. The bootsect.o code gets loaded at 0x7c00 
+      and copies 
+      i.   itself to 0x90000
+      ii.  setup.o to 0x90200
+      iii. everything between _start and _end i.e memtest 
+           to 0x10000
+   b. jumps somewhere into the copied bootsect.o code at 0x90000
+      ,does some trivial stuff and jumps to setup.o
+   c. setup.o puts the processor in protected mode, with a basic
+      gdt and idt and does a long jump to the start of the 
+      memtest code (startup_32, see 4 below). The code and data 
+      segment base address are all set to 0x0. So a linear 
+      address range and no paging is enabled.
+   d. From now on we no longer required the bootsect.o and setup.o 
+      code.
+3. The code in memtest is compiled as position independent
+   code. Which implies that the code can be moved dynamically in 
+   the address space and can still work. Since we are now in head.o,
+   which is compiled with PIC , we no longer should use absolute 
+   addresses references while accessing functions or globals.  
+   All symbols are stored in a table called Global Offset Table(GOT)
+   and %ebx is set to point to the base of that table. So to get/set 
+   the value of a symbol we need to read (%ebx + symbolOffsetIntoGOT) to 
+   get the symbol value. For eg. if foo is global varible the assembly
+   code to store %eax value into foo will be changed from
+                    mov %eax, foo
+                        to 
+                    mov %eax, foo@GOTOFF(%ebx)
+4. (startup_32) The first step done in head.o is to change   
+   the gdtr and idtr register values to point to the final(!) 
+   gdt and ldt tables in head.o, since we can no longer use the 
+   gdt and ldt tables in setup.o, and call the dynamic linker 
+   stub in memtest_shared (see call _dl_start in head.S). This 
+   dynamic linker stub relocates all the code in memtest w.r.t 
+   the new base location i.e 0x1000. Finally we call the test_start() 
+   'C' routine.
+5. The test_start() C routine is the main routine which lets the BSP 
+   bring up the APs from their halt state, relocate the code 
+   (if necessary) to new address, move the APs to the newly 
+   relocated address and execute the tests. The BSP is the
+   master which controls the execution of the APs, and mostly
+   it is the one which manupulates the global variables. 
+   i.  we change the stack to a private per cpu stack.
+       (this step happens every time we move to a new location)
+   ii. We kick start the APs in the system by
+      a. Putting a temporary real mode code 
+         (_ap_trampoline_start - _ap_trampoline_protmode) 
+         at 0x9000, which puts the AP in protected mode and jumps 
+         to _ap_trampoline_protmode in head.o. The code in 
+         _ap_trampoline_protmode calls start_32 in head.o which 
+         reinitialises the AP's gdt and idt to point to the
+         final(!) gdt and idt. (see step 4 above)
+      b. Since the APs also traverse through the same initialisation
+         code(startup_32 in head.o), the APs also call test_start().
+         The APs just spin wait (see AP_SpinWaitStart) till the 
+         are instructed by the BSP to jump to a new location, 
+         which can either be a test execution or spin wait at a 
+         new location.
+  iii. The base address at which memtest tries to execute as far
+       as possible is 0x2000. This is the lowest possible address
+       memtest can put itself at. So the next step is to 
+       move to 0x2000, which it cannot directly, since copying
+       code to 0x2000 will override the existing code at 0x1000.
+       0x2000 +sizeof(memtest) will usually be greater than 0x1000.
+       so we temporarily relocated to 0x200000 and then relocate
+       back to 0x2000. Every time the BSP relocates the code to the
+       new location, it pulls up the APs spin waiting at the old 
+       location to spin wait at the corresponding relocated 
+       spin wait location, by making them jump to the new
+       statup_32 relocated location(see 4 above).
+       Hence forth during the tests 0x200000 is the only place 
+       we relocate to if we need to test a memory window 
+       (see v. below to get a description of what a window is)
+       which includes address range 0x2000.
+
+   Address map during normal execution.
+       --------------------------------------------------------------------
+             | head.o memtest_shared  |                                   |RAM_END
+       --------------------------------------------------------------------
+Labels _start<-------memtest---------->_end
+       --------------------------------------------------------------------
+addr   0x0   0x2000                   | Memory that is being tested..     |RAM_END
+       --------------------------------------------------------------------
+
+   Address map during relocated state.
+       --------------------------------------------------------------------
+                                      | head.o memtest_shared  |          |RAM_END
+       --------------------------------------------------------------------
+Labels                          _start<-------memtest---------->_end
+       --------------------------------------------------------------------
+addr   memory that is being tested... |0x200000                 |         |RAM_END
+       --------------------------------------------------------------------
+
+   iv. Once we are at 0x2000 we initialise the system, and 
+       determine the memory map ,usually via the bios e820 map. 
+       The sorted, and non-overlapping RAM page ranges are 
+       placed in v->pmap[] array. This array is the reference 
+       of the RAM memory map on the system. 
+    v. The memory range(in page numbers) which the 
+       memtest86 can test is partitioned into windows.
+       the current version of memtest86-smp has the capability
+       to test the memory from 0x0 - 0xFFFFFFFFF (max address
+       when pae mode is enabled). 
+       We then compute the linear memory address ranges(called
+       segments) for the window we are currently about to 
+       test. The windows are
+          a. 0  - 640K 
+          b. (0x2000 + (_end - _start))  - 4G (since the code is at 0x2000).
+          c. >4G to test pae address range, each window with size 
+             of 0x80000(2G), large enough to be mapped in one page directory
+             entry. So a window size of 0x80000 means we can map 1024 page 
+             table entries, with page size of 2M(pae mode), with one
+             page directory entry. Something similar to kseg entry
+             in linux. The upper bound page number is 0x1000000 which
+             corresponds to linear address 0xFFFFFFFFF + 1 which uses
+             all the 36 address bits. 
+       Each window is compared against the sorted & non-overlapping 
+       e820 map which we have stored in v->pmap[] array, since all 
+       memory in the selected window address range may correspond to
+       RAM or can be usable. A list of segments within the window is
+       created , which contain the usable portions of the window. 
+       This is stored in v->mmap[] array.
+   vi. Once the v->mmap[] array populated, we have the list of 
+       non-overlapping segments in the current window which are the
+       final address ranges that can be tested. The BSP executes the
+       test first and lets each AP execute the test one by one. Once
+       all the APs finish execting the same test, the BSP moves to the
+       next window follows the same procedure till all the windows 
+       are done. Once all the windows are done, the BSP moves to the
+       next test. Before executing in any window the BSP checks if
+       the window overlaps with the code/data of memtest86, if so
+       tries to relocate to 0x200000. If the window includes both 
+       0x2000 as well as 0x200000  the BSP skips that window.
+       Looking at the window values the only time the memtest
+       relocates is when testing the 0 - 640K window.
+
+Known Issues:
+* Memtest86-smp does not work on IBM-NUMA machines, x440 and friends.
+
+email comments to:
+Kalyan Rajasekharuni<kc_rajasekharuni@yahoo.com>
+Sub: Memtest86-SMP
diff --git a/README.build-process b/README.build-process
new file mode 100644
index 0000000..19edfcf
--- /dev/null
+++ b/README.build-process
@@ -0,0 +1,39 @@
+During memory testing memtest86 relocates itself in memory so it can test the
+memory it was previously running from.  memtest86 is compiled as position mostly
+independent code.  Some relocations records must be processed to achieve the
+affect of position independent code.  A 16 bit loader is prepended to memtest86
+so it can be loaded from a floppy, or from lilo.
+
+In restructuring the build process I had several goals.  Maintainability and
+comprehsibility of the build process.  Simplicity of the toolset. And the
+ability to build images bootable by both the legacy x86 bootloader,
+and images bootable by bootloaders that directly load static ELF images. 
+
+With the ability to proecess relocation records, memtest.bin has been
+reduced in size from 84480 bytes to 49308 bytes.  And now only requires one copy
+of memtest86.  A reduction in size of 35K.  And the build process can now ignore
+the size of memtest86.
+
+BIOS calls have been moved from setup.S to head.S making bootsect.S and
+setup.S exclusively for booting.
+
+memtest86 is built in three stages.  In the first stage the relocatable object
+files are built as with any program.  In the second stage the relocatable object
+files are linked together into memtest_shared, a shared library version
+of memtest86.  In the third stage a raw memory image of memtest_shared is formed
+and linked into memtest.bin, and memtest.
+
+memtest.bin is the floppy/lilo bootable target.
+
+memtest is the ELF bootable target.
+
+Another major change is now data in the bss segment is also preserved
+when memtest86 is relocated, and memtest86 can be relocated to any address. 
+
+The one thing to watch out for is pointers to data inside of memtest86.  Except
+for constant pointers to static data there is not enough information to generate
+relocation records for pointers so they will not change when memtest86 is
+relocated, which might lead to nasty surpises.
+
+Eric Biederman <ebiederman@lnxi.com>
+
diff --git a/bootsect.S b/bootsect.S
new file mode 100644
index 0000000..23718e3
--- /dev/null
+++ b/bootsect.S
@@ -0,0 +1,383 @@
+/*

+ *	 bootsect.s		Copyright (C) 1991, 1992 Linus Torvalds

+ *

+ * bootsect.s is loaded at 0x7c00 by the bios-startup routines, and moves

+ * itself out of the way to address 0x90000, and jumps there.

+ *

+ * It then loads 'setup' directly after itself (0x90200), and the system

+ * at 0x10000, using BIOS interrupts.

+ *

+ * The loader has been made as simple as possible, and continuos

+ * read errors will result in a unbreakable loop. Reboot by hand. It

+ * loads pretty fast by getting whole tracks at a time whenever possible.

+ *

+ * 1-Jan-96 Modified by Chris Brady for use as a boot loader for MemTest-86.

+ */

+

+#include "defs.h"

+

+ROOT_DEV = 0

+

+.code16

+.section ".bootsect", "ax", @progbits

+_boot:

+

+

+# ld86 requires an entry symbol. This may as well be the usual one.

+.globl	_main

+_main:

+	movw	$BOOTSEG, %ax

+	movw	%ax, %ds

+	movw	$INITSEG, %ax

+	movw	%ax, %es

+	movw	$256, %cx

+	subw	%si, %si

+	subw	%di, %di

+	cld

+	rep

+	movsw

+	ljmp	$INITSEG, $go - _boot

+

+go:

+	movw	%cs, %ax

+	movw	$(0x4000-12), %dx	# 0x4000 is arbitrary value >= length of

+					# bootsect + length of setup + room for stack

+					# 12 is disk parm size

+

+# bde - changed 0xff00 to 0x4000 to use debugger at 0x6400 up (bde).  We

+# wouldn't have to worry about this if we checked the top of memory.  Also

+# my BIOS can be configured to put the wini drive tables in high memory

+# instead of in the vector table.  The old stack might have clobbered the

+# drive table.

+

+	movw	%ax, %ds

+	movw	%ax, %es

+	movw	%ax, %ss		# put stack at INITSEG:0x4000-12.

+	movw	%dx, %sp

+

+/*

+ *	Many BIOS's default disk parameter tables will not

+ *	recognize multi-sector reads beyond the maximum sector number

+ *	specified in the default diskette parameter tables - this may

+ *	mean 7 sectors in some cases.

+ *

+ *	Since single sector reads are slow and out of the question,

+ *	we must take care of this by creating new parameter tables

+ *	(for the first disk) in RAM.  We will set the maximum sector

+ *	count to 18 - the most we will encounter on an HD 1.44.

+ *

+ *	High doesn't hurt.  Low does.

+ *

+ *	Segments are as follows: ds=es=ss=cs - INITSEG,

+ *		fs = 0, gs = parameter table segment

+ */

+	pushw	$0

+	popw	%fs

+	movw	$0x78, %bx		# fs:bx is parameter table address

+	lgs	%fs:(%bx),%si		# gs:si is source

+

+	movw	%dx, %di		# es:di is destination

+	movw	$6, %cx			# copy 12 bytes

+	cld

+

+	rep	movsw %gs:(%si), (%di)

+

+	movw	%dx, %di

+	movb	$18, 4(%di)		# patch sector count

+

+	movw	%di, %fs:(%bx)

+	movw	%es, %fs:2(%bx)

+

+	movw	%cs, %ax

+	movw	%ax, %fs

+	movw	%ax, %gs

+

+	xorb	%ah, %ah		# reset FDC

+	xorb	%dl, %dl

+	int	$0x13

+

+# load the setup-sectors directly after the bootblock.

+# Note that 'es' is already set up.

+

+load_setup:

+	xorw	%dx, %dx			# drive 0, head 0

+	movw	$0x0002, %cx			# sector 2, track 0

+	movw	$0x0200, %bx			# address = 512, in INITSEG

+	movw	$(0x0200 + SETUPSECS), %ax	# service 2, nr of sectors

+						# (assume all on head 0, track 0)

+	int	$0x13				# read it

+	jnc	ok_load_setup			# ok - continue

+

+	pushw	%ax			# dump error code

+	call	print_nl

+	movw	%sp, %bp

+	call	print_hex

+	popw	%ax

+

+	xorb	%dl, %dl		# reset FDC

+	xorb	%ah, %ah

+	int	$0x13

+	jmp	load_setup

+

+ok_load_setup:

+

+# Get disk drive parameters, specifically nr of sectors/track

+

+

+/* It seems that there is no BIOS call to get the number of sectors.  Guess

+ * 18 sectors if sector 18 can be read, 15 if sector 15 can be read.

+ * Otherwise guess 9

+ */

+

+	xorw	%dx, %dx			# drive 0, head 0

+	movw	$0x0012, %cx			# sector 18, track 0

+	movw	$(0x200+(SETUPSECS*0x200)), %bx	# address after setup (es = cs)

+	movw	$0x0201, %ax			# service 2, 1 sector

+	int	$0x13

+	jnc	got_sectors

+	movb	$0x0f, %cl			# sector 15

+	movw	$0x0201, %ax			# service 2, 1 sector

+	int	$0x13

+	jnc	got_sectors

+	movb	$0x09, %cl

+

+got_sectors:

+	movw	%cx, %cs:sectors - _boot

+	movw	$INITSEG, %ax

+	movw	%ax, %es

+

+# Print some inane message

+

+	movb	$0x03, %ah		# read cursor pos

+	xorb	%bh, %bh

+	int	$0x10

+

+	movw	$9, %cx

+	movw	$0x0007, %bx		# page 0, attribute 7 (normal)

+	movw	$msg1 - _boot, %bp

+	movw	$0x1301, %ax		# write string, move cursor

+	int	$0x10

+

+# ok, we've written the message, now

+# we want to load the system (at 0x10000)

+

+	movw	$TSTLOAD, %ax

+	movw	%ax, %es		# segment of 0x010000

+	call	read_it

+	call	kill_motor

+	call    turnoffcursor

+	call	print_nl

+

+# after that (everyting loaded), we jump to

+# the setup-routine loaded directly after

+# the bootblock:

+

+	ljmp	$SETUPSEG,$0

+

+# This routine loads the system at address 0x10000, making sure

+# no 64kB boundaries are crossed. We try to load it as fast as

+# possible, loading whole tracks whenever we can.

+#

+# in:	es - starting address segment (normally 0x1000)

+#

+sread:	.word 1+SETUPSECS	# sectors read of current track

+head:	.word 0			# current head

+track:	.word 0			# current track

+

+read_it:

+	movw	%es, %ax

+	testw	$0x0fff, %ax

+die:

+	jne	die		# es must be at 64kB boundary

+	xorw	%bx,%bx		# bx is starting address within segment

+rp_read:

+	movw	%es, %ax

+	subw	$TSTLOAD, %ax	# have we loaded all yet?

+	cmpw	syssize - _boot, %ax

+	jbe	ok1_read

+	ret

+ok1_read:

+	movw	%cs:sectors - _boot, %ax

+	subw	sread - _boot, %ax

+	movw	%ax, %cx

+	shlw	$9, %cx

+	addw	%bx, %cx

+	jnc	ok2_read

+	je	ok2_read

+	xorw	%ax, %ax

+	subw	%bx, %ax

+	shrw	$9, %ax

+ok2_read:

+	call	read_track

+	movw	%ax, %cx

+	add	sread - _boot, %ax

+	cmpw	%cs:sectors - _boot, %ax

+	jne	ok3_read

+	movw	$1, %ax

+	subw	head - _boot, %ax

+	jne	ok4_read

+	incw	track - _boot

+ok4_read:

+	movw	%ax, head - _boot

+	xorw	%ax, %ax

+ok3_read:

+	movw	%ax, sread - _boot

+	shlw	$9, %cx

+	addw	%cx, %bx

+	jnc	rp_read

+	movw	%es, %ax

+	addb	$0x10, %ah

+	movw	%ax, %es

+	xorw	%bx, %bx

+	jmp	rp_read

+

+read_track:

+	pusha

+	pusha

+	movw	$0xe2e, %ax 	# loading... message 2e = .

+	movw	$7, %bx

+	int	$0x10

+	popa

+

+	movw	track - _boot, %dx

+	movw	sread - _boot, %cx

+	incw	%cx

+	movb	%dl, %ch

+	movw	head - _boot, %dx

+	movb	%dl, %dh

+	andw	$0x0100, %dx

+	movb	$2, %ah

+

+	pushw	%dx				# save for error dump

+	pushw	%cx

+	pushw	%bx

+	pushw	%ax

+

+	int	$0x13

+	jc	bad_rt

+	addw	$8, %sp

+	popa

+	ret

+

+bad_rt:

+	pushw	%ax				# save error code

+	call	print_all			# ah = error, al = read

+

+	xorb	%ah, %ah

+	xorb	%dl, %dl

+	int	$0x13

+

+	addw	$10, %sp

+	popa

+	jmp read_track

+

+/*

+ *	print_all is for debugging purposes.

+ *	It will print out all of the registers.  The assumption is that this is

+ *	called from a routine, with a stack frame like

+ *	dx

+ *	cx

+ *	bx

+ *	ax

+ *	error

+ *	ret <- sp

+ *

+*/

+

+print_all:

+	movw	$5, %cx		# error code + 4 registers

+	movw	%sp, %bp

+

+print_loop:

+	pushw	%cx		# save count left

+	call	print_nl	# nl for readability

+

+	cmpb	5, %cl		# see if register name is needed

+	jae	no_reg

+

+	movw	$(0xe05	+ 'A' - 1), %ax

+	subb	%cl, %al

+	int	$0x10

+	movb	$'X', %al

+	int	$0x10

+	movb	$':', %al

+	int	$0x10

+

+no_reg:

+	addw	$2, %bp		# next register

+	call	print_hex	# print it

+	popw	%cx

+	loop	print_loop

+	ret

+

+print_nl:

+	movw	$0xe0d, %ax	# CR

+	int	$0x10

+	movb	$0x0a, %al	# LF

+	int	$0x10

+	ret

+

+/*

+ *	print_hex is for debugging purposes, and prints the word

+ *	pointed to by ss:bp in hexadecmial.

+ */

+

+print_hex:

+	movw	$4, %cx		# 4 hex digits

+	movw	(%bp), %dx	# load word into dx

+

+print_digit:

+	rolw	$4, %dx		# rotate so that lowest 4 bits are used

+	movb	$0xe, %ah

+	movb	%dl, %al	# mask off so we have only next nibble

+	andb	$0xf, %al

+	addb	$'0', %al	# convert to 0-based digit

+	cmpb	$'9', %al	# check for overflow

+	jbe	good_digit

+	addb	$('A' - '0' - 10), %al

+

+good_digit:

+	int	$0x10

+	loop	print_digit

+	ret

+

+

+/*

+ * This procedure turns off the floppy drive motor, so

+ * that we enter the kernel in a known state, and

+ * don't have to worry about it later.

+ */

+kill_motor:

+	pushw	%dx

+	movw	$0x3f2, %dx

+	xorb	%al, %al

+	outb	%al, %dx

+	popw	%dx

+	ret

+

+turnoffcursor:

+  movb  $0x01, %ah      # turn off the cursor

+  movb  $0x00, %bh

+  movw  $0x2000, %cx

+  int   $0x10 

+	ret

+	

+sectors:

+	.word 0

+

+msg1:

+	.byte 13,10

+	.ascii "Loading"

+

+.org 497

+setup_sects:

+	.byte SETUPSECS

+.org 500

+syssize:

+	.word _syssize

+.org 508

+root_dev:

+	.word ROOT_DEV

+boot_flag:

+	.word 0xAA55

+_eboot:

diff --git a/config.c b/config.c
new file mode 100644
index 0000000..11e4e9e
--- /dev/null
+++ b/config.c
@@ -0,0 +1,445 @@
+/* config.c - MemTest-86  Version 3.4
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ * ----------------------------------------------------
+ * MemTest86+ V1.11 Specific code (GPL V2.0)
+ * By Samuel DEMEULEMEESTER, sdemeule@memtest.org
+ * http://www.x86-secret.com - http://www.memtest.org
+ */
+#include "test.h"
+#include "screen_buffer.h"
+
+extern int bail;
+extern struct tseq tseq[];
+extern short e820_nr;
+void performance();
+extern volatile short cpu_mode;
+extern volatile int test;
+extern void find_chunks();
+
+char save[2][POP_H][POP_W];
+char save2[2][POP2_H][POP2_W];
+
+void get_config()
+{
+	int flag = 0, sflag = 0, i, prt = 0;
+        int reprint_screen = 0;
+	ulong page;
+
+	popup();
+	wait_keyup();
+	while(!flag) {
+		cprint(POP_Y+1,  POP_X+2, "Settings:");
+		cprint(POP_Y+3,  POP_X+6, "(1) Test Selection");
+		cprint(POP_Y+4,  POP_X+6, "(2) Address Range");
+		cprint(POP_Y+5,  POP_X+6, "(3) Error Report Mode");
+		cprint(POP_Y+6,  POP_X+6, "(4) CPU Selection Mode");
+		cprint(POP_Y+7,  POP_X+6, "(5) Refresh Screen");
+		cprint(POP_Y+9,POP_X+6,"(0) Continue");
+
+		/* Wait for key release */
+		/* Fooey! This nuts'es up the serial input. */
+		sflag = 0;
+		switch(get_key()) {
+		case 2:
+			/* 1 - Test Selection */
+			popclear();
+			cprint(POP_Y+1, POP_X+2, "Test Selection:");
+			cprint(POP_Y+3, POP_X+6, "(1) Default Tests");
+			cprint(POP_Y+4, POP_X+6, "(2) Skip Current Test");
+			cprint(POP_Y+5, POP_X+6, "(3) Select Test");
+			cprint(POP_Y+7, POP_X+6, "(0) Cancel");
+			if (v->testsel < 0) {
+				cprint(POP_Y+3, POP_X+5, ">");
+			} else {
+				cprint(POP_Y+5, POP_X+5, ">");
+			}
+			wait_keyup();
+			while (!sflag) {
+				switch(get_key()) {
+				case 2:
+					/* Default */
+					if (v->testsel >= 9) {
+						bail++;
+					}
+					v->testsel = -1;
+					find_ticks_for_pass();
+					sflag++;
+					cprint(LINE_INFO, 43, "Std");
+					break;
+				case 3:
+					/* Skip test */
+					bail++;
+					sflag++;
+					break;
+				case 4:
+					/* Select test */
+					popclear();
+					cprint(POP_Y+1, POP_X+3,
+						"Test Selection:");
+					cprint(POP_Y+4, POP_X+5,
+						"Test Number [0-11]: ");
+					i = getval(POP_Y+4, POP_X+24, 0);
+					if (i <= 11) {
+						if (i != v->testsel) {
+							v->pass = -1;
+							test = -1;
+						}
+						v->testsel = i;
+						find_ticks_for_pass();
+						sflag++;
+                            			bail++;
+						cprint(LINE_INFO, 43, "  #");
+                           			dprint(LINE_INFO, 43, i, 2, 0);
+					}
+					break;
+				case 11:
+				case 57:
+					sflag++;
+					break;
+				}
+			}
+			popclear();
+			break;
+		case 3:
+			/* 2 - Address Range */
+			popclear();
+			cprint(POP_Y+1, POP_X+2, "Test Address Range:");
+			cprint(POP_Y+3, POP_X+6, "(1) Set Lower Limit");
+			cprint(POP_Y+4, POP_X+6, "(2) Set Upper Limit");
+			cprint(POP_Y+5, POP_X+6, "(3) Test All Memory");
+			cprint(POP_Y+6, POP_X+6, "(0) Cancel");
+			wait_keyup();
+			while (!sflag) {
+				switch(get_key()) {
+				case 2:
+					/* Lower Limit */
+					popclear();
+					cprint(POP_Y+2, POP_X+4,
+						"Lower Limit: ");
+					cprint(POP_Y+4, POP_X+4,
+						"Current: ");
+					aprint(POP_Y+4, POP_X+13, v->plim_lower);
+					cprint(POP_Y+6, POP_X+4,
+						"New: ");
+					page = getval(POP_Y+6, POP_X+9, 12);
+					if (page + 1 <= v->plim_upper) {
+						v->plim_lower = page;
+						test--;
+						bail++;
+					}
+					adj_mem();
+					find_chunks();
+					find_ticks_for_pass();
+					sflag++;
+					break;
+				case 3:
+					/* Upper Limit */
+					popclear();
+					cprint(POP_Y+2, POP_X+4,
+						"Upper Limit: ");
+					cprint(POP_Y+4, POP_X+4,
+						"Current: ");
+					aprint(POP_Y+4, POP_X+13, v->plim_upper);
+					cprint(POP_Y+6, POP_X+4,
+						"New: ");
+					page = getval(POP_Y+6, POP_X+9, 12);
+					if  (page - 1 >= v->plim_lower) {
+						v->plim_upper = page;
+						bail++;
+						test--;
+					}
+					adj_mem();
+					find_chunks();
+					find_ticks_for_pass();
+					sflag++;
+					break;
+				case 4:
+					/* All of memory */
+					v->plim_lower = 0;
+					v->plim_upper =
+						v->pmap[v->msegs - 1].end;
+					test--;
+					bail++;
+					adj_mem();
+					find_chunks();
+					find_ticks_for_pass();
+					sflag++;
+					break;
+				case 11:
+				case 57:
+					/* 0/CR - Continue */
+					sflag++;
+					break;
+				}
+			}
+			popclear();
+			break;
+		case 4:
+			/* Error Mode */
+			popclear();
+			cprint(POP_Y+1, POP_X+2, "Printing Mode:");
+			cprint(POP_Y+3, POP_X+6, "(1) Error Summary");
+			cprint(POP_Y+4, POP_X+6, "(2) Individual Errors");
+			cprint(POP_Y+5, POP_X+6, "(3) BadRAM Patterns");
+			cprint(POP_Y+6, POP_X+6, "(4) Error Counts Only");
+			cprint(POP_Y+7, POP_X+6, "(0) Cancel");
+			cprint(POP_Y+3+v->printmode, POP_X+5, ">");
+			wait_keyup();
+			while (!sflag) {
+				switch(get_key()) {
+				case 2:
+					/* Error Summary */
+					v->printmode=PRINTMODE_SUMMARY;
+					v->erri.eadr = 0;
+					v->erri.hdr_flag = 0;
+					sflag++;
+					break;
+				case 3:
+					/* Separate Addresses */
+					v->printmode=PRINTMODE_ADDRESSES;
+					v->erri.eadr = 0;
+					v->erri.hdr_flag = 0;
+					v->msg_line = LINE_SCROLL-1;
+					sflag++;
+					break;
+				case 4:
+					/* BadRAM Patterns */
+					v->printmode=PRINTMODE_PATTERNS;
+					v->erri.hdr_flag = 0;
+					sflag++;
+					prt++;
+					break;
+				case 5:
+					/* Error Counts Only */
+					v->printmode=PRINTMODE_NONE;
+					v->erri.hdr_flag = 0;
+					sflag++;
+					break;
+				case 11:
+				case 57:
+					/* 0/CR - Continue */
+					sflag++;
+					break;
+				}
+			}
+			popclear();
+			break;
+		case 5:
+    			/* CPU Mode */
+			popclear();
+			cprint(POP_Y+1, POP_X+2, "CPU Selection Mode:");
+			cprint(POP_Y+3, POP_X+6, "(1) Parallel (All)");
+			cprint(POP_Y+4, POP_X+6, "(2) Round Robin");
+			cprint(POP_Y+5, POP_X+6, "(3) Sequential");
+			cprint(POP_Y+6, POP_X+6, "(0) Cancel");
+			cprint(POP_Y+2+cpu_mode, POP_X+5, ">");
+			wait_keyup();
+			while(!sflag) {
+				switch(get_key()) {
+				case 2:
+					if (cpu_mode != CPM_ALL) bail++;
+					cpu_mode = CPM_ALL;
+					sflag++;
+					cprint(7,72,"All   ");
+					break;
+				case 3:
+					if (cpu_mode != CPM_RROBIN) bail++;
+					cpu_mode = CPM_RROBIN;
+					sflag++;
+					cprint(7,72,"RRobin");
+					break;
+				case 4:
+					if (cpu_mode != CPM_SEQ) bail++;
+					cpu_mode = CPM_SEQ;
+					sflag++;
+					cprint(7,72,"Seq   ");
+					break;
+				case 11:
+				case 57:
+					/* 0/CR - Continue */
+					sflag++;
+					break;
+				}
+			}
+			popclear();
+			break;
+		case 6:
+			reprint_screen = 1;
+			flag++;
+			break;
+		case 11:
+		case 57:
+		case 28:
+			/* 0/CR/SP - Continue */
+			flag++;
+			break;
+		}
+	}
+	popdown();
+	if (prt) {
+		printpatn();
+	}
+        if (reprint_screen){
+            tty_print_screen();
+        }
+}
+
+void popup()
+{
+	int i, j;
+	char *pp;
+	
+	for (i=POP_Y; i<POP_Y + POP_H; i++) { 
+		for (j=POP_X; j<POP_X + POP_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+                        save[0][i-POP_Y][j-POP_X] = *pp;  /* Save screen */
+                        set_scrn_buf(i, j, ' ');
+			*pp = ' ';		/* Clear */                        
+			pp++;
+                        save[1][i-POP_Y][j-POP_X] = *pp;
+			*pp = 0x07;		/* Change Background to black */
+		}
+	}
+        tty_print_region(POP_Y, POP_X, POP_Y+POP_H, POP_X+POP_W);
+}
+
+void popdown()
+{
+	int i, j;
+	char *pp;
+	
+	for (i=POP_Y; i<POP_Y + POP_H; i++) { 
+		for (j=POP_X; j<POP_X + POP_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+			*pp = save[0][i-POP_Y][j-POP_X]; /* Restore screen */
+                        set_scrn_buf(i, j, save[0][i-POP_Y][j-POP_X]);
+			pp++;
+			*pp = save[1][i-POP_Y][j-POP_X]; /* Restore color */
+		}
+	}
+        tty_print_region(POP_Y, POP_X, POP_Y+POP_H, POP_X+POP_W);
+}
+
+void popclear()
+{
+	int i, j;
+	char *pp;
+	
+	for (i=POP_Y; i<POP_Y + POP_H; i++) { 
+		for (j=POP_X; j<POP_X + POP_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+			*pp = ' ';		/* Clear popup */
+                        set_scrn_buf(i, j, ' ');
+			pp++;
+		}
+	}
+        tty_print_region(POP_Y, POP_X, POP_Y+POP_H, POP_X+POP_W);
+}
+
+void pop2up()
+{
+	int i, j;
+	char *pp;
+
+	for (i=POP2_Y; i<POP2_Y + POP2_H; i++) { 
+		for (j=POP2_X; j<POP2_X + POP2_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+			save2[0][i-POP2_Y][j-POP2_X] = *pp;  /* Save screen */
+			set_scrn_buf(i, j, ' ');
+			*pp = ' ';		/* Clear */
+			pp++;
+			save2[1][i-POP2_Y][j-POP2_X] = *pp;
+			*pp = 0x07;		/* Change Background to black */
+		}
+	}
+        tty_print_region(POP2_Y, POP2_X, POP2_Y+POP2_H, POP2_X+POP2_W);
+}
+
+void pop2down()
+{
+	int i, j;
+	char *pp;
+
+	for (i=POP2_Y; i<POP2_Y + POP2_H; i++) { 
+		for (j=POP2_X; j<POP2_X + POP2_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+			*pp = save2[0][i-POP2_Y][j-POP2_X]; /* Restore screen */
+			set_scrn_buf(i, j, save2[0][i-POP2_Y][j-POP2_X]);
+			pp++;
+			*pp = save2[1][i-POP2_Y][j-POP2_X]; /* Restore color */
+		}
+	}
+        tty_print_region(POP2_Y, POP2_X, POP2_Y+POP2_H, POP2_X+POP2_W);
+}
+
+void pop2clear()
+{
+	int i, j;
+	char *pp;
+
+	for (i=POP2_Y; i<POP2_Y + POP2_H; i++) { 
+		for (j=POP2_X; j<POP2_X + POP2_W; j++) { 
+			pp = (char *)(SCREEN_ADR + (i * 160) + (j * 2));
+			*pp = ' ';		/* Clear popup */
+			set_scrn_buf(i, j, ' ');
+			pp++;
+		}
+	}
+        tty_print_region(POP2_Y, POP2_X, POP2_Y+POP2_H, POP2_X+POP2_W);
+}
+
+void clear_screen()
+{
+	int i;
+	volatile char *pp;
+
+	for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) {
+		*pp++ = ' ';
+		*pp++ = 0x07;
+	}
+}
+
+void adj_mem(void)
+{
+	int i;
+
+	v->selected_pages = 0;
+	for (i=0; i< v->msegs; i++) {
+		/* Segment inside limits ? */
+		if (v->pmap[i].start >= v->plim_lower &&
+				v->pmap[i].end <= v->plim_upper) {
+			v->selected_pages += (v->pmap[i].end - v->pmap[i].start);
+			continue;
+		}
+		/* Segment starts below limit? */
+		if (v->pmap[i].start < v->plim_lower) {
+			/* Also ends below limit? */
+			if (v->pmap[i].end < v->plim_lower) {
+				continue;
+			}
+			
+			/* Ends past upper limit? */
+			if (v->pmap[i].end > v->plim_upper) {
+				v->selected_pages += 
+					v->plim_upper - v->plim_lower;
+			} else {
+				/* Straddles lower limit */
+				v->selected_pages += 
+					(v->pmap[i].end - v->plim_lower);
+			}
+			continue;
+		}
+		/* Segment ends above limit? */
+		if (v->pmap[i].end > v->plim_upper) {
+			/* Also starts above limit? */
+			if (v->pmap[i].start > v->plim_upper) {
+				continue;
+			}
+			/* Straddles upper limit */
+			v->selected_pages += 
+				(v->plim_upper - v->pmap[i].start);
+		}
+	}
+}
diff --git a/config.h b/config.h
new file mode 100644
index 0000000..455d246
--- /dev/null
+++ b/config.h
@@ -0,0 +1,36 @@
+/* config.h - MemTest-86  Version 3.3
+ *
+ * Compile time configuration options
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+
+/* PARITY_MEM - Enables support for reporting memory parity errors */
+/*	Experimental, normally enabled */
+#define PARITY_MEM
+
+/* SERIAL_CONSOLE_DEFAULT -  The default state of the serial console. */
+/*	This is normally off since it slows down testing.  Change to a 1 */
+/*	to enable. */
+#define SERIAL_CONSOLE_DEFAULT 0
+
+/* SERIAL_TTY - The default serial port to use. 0=ttyS0, 1=ttyS1 */ 
+#define SERIAL_TTY 0
+
+/* SERIAL_BAUD_RATE - Baud rate for the serial console */
+#define SERIAL_BAUD_RATE 9600
+
+/* SCRN_DEBUG - extra check for SCREEN_BUFFER
+ */ 
+/* #define SCRN_DEBUG */
+
+/* APM - Turns off APM at boot time to avoid blanking the screen */
+/*	Normally enabled */
+#define APM_OFF
+
+/* USB_WAR - Enables a workaround for errors caused by BIOS USB keyboard */
+/*	and mouse support*/
+/*	Normally enabled */
+#define USB_WAR
+
diff --git a/cpuid.c b/cpuid.c
new file mode 100644
index 0000000..33562be
--- /dev/null
+++ b/cpuid.c
@@ -0,0 +1,72 @@
+/*
+ * cpuid.c --
+ *
+ *      Implements CPUID querying functions
+ *
+ */
+#include "stdin.h"
+#include "cpuid.h"
+
+struct cpu_ident cpu_id;
+
+void get_cpuid()
+{
+	unsigned int *v, dummy[3];
+	char *p, *q;
+
+	/* Get the largest CPUID number and vendor ID */
+	cpuid(0x00000000, &cpu_id.max_cpuid, &cpu_id.vend_id.uint32_array[0],
+	    &cpu_id.vend_id.uint32_array[2], &cpu_id.vend_id.uint32_array[1]);
+	cpu_id.vend_id.char_array[11] = 0;
+
+	/* Get the largest CPUID */
+	cpuid(0x80000000, &cpu_id.max_cpuid, &dummy[0], &dummy[1], &dummy[2]);
+
+	/* Get processor family information */
+	if (cpu_id.max_cpuid >= 1) {
+	    cpuid(1, &cpu_id.vers.flat, &cpu_id.info.flat,
+		&cpu_id.fid.uint32_array[1], &cpu_id.fid.uint32_array[0]);
+	}
+
+	/* Get the brand ID */
+	if (cpu_id.max_cpuid >= 4) {
+	    v = (unsigned int *)&cpu_id.brand_id;
+	    cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+	    cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+	    cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+	    cpu_id.brand_id.char_array[47] = 0;
+	}
+        /*
+         * Intel chips right-justify this string for some dumb reason;
+         * undo that brain damage:
+         */
+        p = q = &cpu_id.brand_id.char_array[0];
+        while (*p == ' ')
+                p++;
+        if (p != q) {
+                while (*p)
+                        *q++ = *p++;
+                while (q <= &cpu_id.brand_id.char_array[48])
+                        *q++ = '\0';    /* Zero-pad the rest */
+	}
+
+	/* Get cache information */
+	switch(cpu_id.vend_id.char_array[0]) {
+        case 'A':
+            /* AMD Processors */
+	    /* The cache information is only in ecx and edx so only save
+	     * those registers */
+	    if (cpu_id.max_cpuid >= 0x80000005) {
+		cpuid(0x80000005, &dummy[0], &dummy[1],
+		    &cpu_id.cache_info.uint[0], &cpu_id.cache_info.uint[1]);
+	    }
+	    if (cpu_id.max_cpuid >= 0x80000006) {
+		cpuid(0x80000006, &dummy[0], &dummy[1],
+		    &cpu_id.cache_info.uint[2], &cpu_id.cache_info.uint[3]);
+	    }
+	    break;
+	case 'G':
+                /* Intel Processors, Need to do this in init.c */
+	    break;
+	}
+}
diff --git a/cpuid.h b/cpuid.h
new file mode 100644
index 0000000..945c1b1
--- /dev/null
+++ b/cpuid.h
@@ -0,0 +1,226 @@
+/*
+ * cpuid.h --
+ *      contains the data structures required for CPUID 
+ *      implementation.
+ */
+
+#define CPUID_VENDOR_LENGTH     3               /* 3 GPRs hold vendor ID */
+#define CPUID_VENDOR_STR_LENGTH (CPUID_VENDOR_LENGTH * sizeof(uint32_t) + 1)
+#define CPUID_BRAND_LENGTH      12              /* 12 GPRs hold vendor ID */
+#define CPUID_BRAND_STR_LENGTH  (CPUID_BRAND_LENGTH * sizeof(uint32_t) + 1)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+                                unsigned int *ecx, unsigned int *edx)
+{
+        /* ecx is often an input as well as an output. */
+        asm volatile("\t"
+      	    "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
+            : "=a" (*eax),
+              "=D" (*ebx),
+              "=c" (*ecx),
+              "=d" (*edx)
+            : "0" (*eax), "2" (*ecx));
+}
+
+static inline void cpuid(unsigned int op,
+                         unsigned int *eax, unsigned int *ebx,
+                         unsigned int *ecx, unsigned int *edx)
+{
+        *eax = op;
+        *ecx = 0;
+        __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(unsigned int op, int count,
+                               unsigned int *eax, unsigned int *ebx,
+                               unsigned int *ecx, unsigned int *edx)
+{
+        *eax = op;
+        *ecx = count;
+        __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Typedef for storing the Cache Information */
+typedef union {
+   unsigned char ch[48];
+   uint32_t      uint[12];
+   struct {
+      uint32_t    fill1:24;      /* Bit 0 */
+      uint32_t    l1_i_sz:8;
+      uint32_t    fill2:24; 
+      uint32_t    l1_d_sz:8;
+      uint32_t    fill3:16; 
+      uint32_t    l2_sz:16;
+      uint32_t    fill4:18; 
+      uint32_t    l3_sz:14;
+      uint32_t    fill5[8];
+   } amd;
+} cpuid_cache_info_t;
+
+/* Typedef for storing the CPUID Vendor String */
+typedef union {
+   /* Note: the extra byte in the char array is for '\0'. */
+   char           char_array[CPUID_VENDOR_STR_LENGTH];
+   uint32_t       uint32_array[CPUID_VENDOR_LENGTH];
+} cpuid_vendor_string_t;
+
+/* Typedef for storing the CPUID Brand String */
+typedef union {
+   /* Note: the extra byte in the char array is for '\0'. */
+   char           char_array[CPUID_BRAND_STR_LENGTH];
+   uint32_t       uint32_array[CPUID_BRAND_LENGTH];
+} cpuid_brand_string_t;
+
+/* Typedef for storing CPUID Version */
+typedef union {
+   uint32_t flat;
+   struct {
+      uint32_t    stepping:4;      /* Bit 0 */
+      uint32_t    model:4;
+      uint32_t    family:4;
+      uint32_t    processorType:2;
+      uint32_t    reserved1514:2;
+      uint32_t    extendedModel:4;
+      uint32_t    extendedFamily:8;
+      uint32_t    reserved3128:4;  /* Bit 31 */
+   } bits;      
+} cpuid_version_t;
+
+/* Typedef for storing CPUID Processor Information */
+typedef union {
+   uint32_t flat;
+   struct {
+      uint32_t    brandIndex:8;    /* Bit 0 */
+      uint32_t    cflushLineSize:8;
+      uint32_t    logicalProcessorCount:8;
+      uint32_t    apicID:8;        /* Bit 31 */
+   } bits;      
+} cpuid_proc_info_t;
+
+/* Typedef for storing CPUID Feature flags */
+typedef union {
+   uint64_t       flat;
+   uint32_t       uint32_array[2];
+   struct {
+      uint32_t    fpu:1;           /* Bit 0 */
+      uint32_t    vme:1;
+      uint32_t    de:1;
+      uint32_t    pse:1;
+      uint32_t    tsc:1;
+      uint32_t    msr:1;
+      uint32_t    pae:1;
+      uint32_t    mce:1;
+      uint32_t    cx8:1;
+      uint32_t    apic:1;
+      uint32_t    reserved10:1;
+      uint32_t    sep:1;
+      uint32_t    mtrr:1;
+      uint32_t    pge:1;
+      uint32_t    mca:1;
+      uint32_t    cmov:1;
+      uint32_t    pat:1;
+      uint32_t    pse36:1;
+      uint32_t    psn:1;
+      uint32_t    cflush:1;
+      uint32_t    reserved20:1;
+      uint32_t    ds:1;
+      uint32_t    acpi:1;
+      uint32_t    mmx:1;
+      uint32_t    fxsr:1;
+      uint32_t    sse:1;
+      uint32_t    sse2:1;
+      uint32_t    ss:1;
+      uint32_t    htt:1;
+      uint32_t    tm:1;
+      uint32_t    reserved30:1;
+      uint32_t    pbe:1;           /* Bit 31 */
+      uint32_t    sse3:1;          /* Bit 32 */
+      uint32_t    reserved3433:2;
+      uint32_t    monitor:1;
+      uint32_t    dscpl:1;
+      uint32_t    reserved3937:3;
+      uint32_t    tm2:1;
+      uint32_t    reserved41:1;
+      uint32_t    cnxtid:1;
+      uint32_t    reserved4443:2;
+      uint32_t    cmpxchg16b:1;
+      uint32_t    reserved6346:18; /* Bit 63 */
+   } bits;
+} cpuid_feature_flags_t;
+
+/* Feature flags returned by extended CPUID node function 8000_0001. */
+typedef union {
+   uint64_t       flat;
+   uint32_t       uint32_array[2];
+   struct {
+      uint32_t    fpu:1;           /* Bit 0 */
+      uint32_t    vme:1;
+      uint32_t    de:1;
+      uint32_t    pse:1;
+      uint32_t    tsc:1;
+      uint32_t    msr:1;
+      uint32_t    pae:1;
+      uint32_t    mce:1;
+      uint32_t    cx8:1;
+      uint32_t    apic:1;
+      uint32_t    reserved10:1;
+      uint32_t    sep:1;
+      uint32_t    mtrr:1;
+      uint32_t    pge:1;
+      uint32_t    mca:1;
+      uint32_t    cmov:1;
+      uint32_t    pat:1;
+      uint32_t    pse36:1;
+      uint32_t    reserved1918:2;
+      uint32_t    nx:1;
+      uint32_t    reserved21:1;
+      uint32_t    mmxamd:1;
+      uint32_t    mmx:1;
+      uint32_t    fxsr:1;
+      uint32_t    ffxsr:1;
+      uint32_t    reserved26:1;
+      uint32_t    rdtscp:1;
+      uint32_t    reserved28:1;
+      uint32_t    lm:1;
+      uint32_t    threedeenowext:1;
+      uint32_t    threedeenow:1;   /* Bit 31 */
+      uint32_t    lahf:1;          /* Bit 32 */
+      uint32_t    cmplegacy:1;
+      uint32_t    reserved3534:2;
+      uint32_t    cr8avail:1;
+      uint32_t    reserved6337:27; /* Bit 63 */
+   } bits;
+} cpuid_ext_feature_flags_t;
+
+/* An overall structure to cache all of the CPUID information */
+struct cpu_ident {
+	uint32_t max_cpuid;
+	cpuid_version_t vers;
+	cpuid_proc_info_t info;
+	cpuid_feature_flags_t fid;
+	cpuid_vendor_string_t vend_id;
+	cpuid_brand_string_t brand_id;
+	cpuid_cache_info_t cache_info;
+};
+
+struct cpuid4_eax {
+	uint32_t	ctype:5;
+	uint32_t	level:3;
+	uint32_t	is_self_initializing:1;
+	uint32_t	is_fully_associative:1;
+	uint32_t	reserved:4;
+	uint32_t	num_threads_sharing:12;
+	uint32_t	num_cores_on_die:6;
+};
+
+struct cpuid4_ebx {
+	uint32_t	coherency_line_size:12;
+	uint32_t	physical_line_partition:10;
+	uint32_t	ways_of_associativity:10;
+};
+
+struct cpuid4_ecx {
+	uint32_t	number_of_sets:32;
+};
+
diff --git a/defs.h b/defs.h
new file mode 100644
index 0000000..3891699
--- /dev/null
+++ b/defs.h
@@ -0,0 +1,26 @@
+/* defs.h - MemTest-86 Version 3.3
+ * assembler/compiler definitions
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */ 
+
+#define SETUPSECS	4		/* Number of setup sectors */
+
+/*
+ * Caution!! There is magic in the build process.  Read
+ * README.build-process before you change anything.  
+ * Unlike earlier versions all of the settings are in defs.h
+ * so the build process should be more robust.
+ */
+#define LOW_TEST_ADR	0x00010000		/* Final adrs for test code */
+
+#define BOOTSEG		0x07c0			/* Segment adrs for inital boot */
+#define INITSEG		0x9000			/* Segment adrs for relocated boot */
+#define SETUPSEG	(INITSEG+0x20)		/* Segment adrs for relocated setup */
+#define TSTLOAD		0x1000			/* Segment adrs for load of test */
+
+#define KERNEL_CS	0x10			/* 32 bit segment adrs for code */
+#define KERNEL_DS	0x18			/* 32 bit segment adrs for data */
+#define REAL_CS		0x20			/* 16 bit segment adrs for code */
+#define REAL_DS		0x28			/* 16 bit segment adrs for data */
diff --git a/elf.h b/elf.h
new file mode 100644
index 0000000..72efaac
--- /dev/null
+++ b/elf.h
@@ -0,0 +1,590 @@
+#ifndef ELF_H
+#define ELF_H
+
+#define EI_NIDENT	16	/* Size of e_ident array. */
+
+/* Values for e_type. */
+#define ET_NONE		0	/* No file type */
+#define ET_REL		1	/* Relocatable file */
+#define ET_EXEC		2	/* Executable file */
+#define ET_DYN		3	/* Shared object file */
+#define ET_CORE		4	/* Core file */
+
+/* Values for e_machine (architecute). */
+#define EM_NONE		 0		/* No machine */
+#define EM_M32		 1		/* AT&T WE 32100 */
+#define EM_SPARC	 2		/* SUN SPARC */
+#define EM_386		 3		/* Intel 80386+ */
+#define EM_68K		 4		/* Motorola m68k family */
+#define EM_88K		 5		/* Motorola m88k family */
+#define EM_486		 6		/* Perhaps disused */
+#define EM_860		 7		/* Intel 80860 */
+#define EM_MIPS		 8		/* MIPS R3000 big-endian */
+#define EM_S370		 9		/* IBM System/370 */
+#define EM_MIPS_RS3_LE	10		/* MIPS R3000 little-endian */
+
+#define EM_PARISC	15		/* HPPA */
+#define EM_VPP500	17		/* Fujitsu VPP500 */
+#define EM_SPARC32PLUS	18		/* Sun's "v8plus" */
+#define EM_960		19		/* Intel 80960 */
+#define EM_PPC		20		/* PowerPC */
+#define EM_PPC64	21		/* PowerPC 64-bit */
+#define EM_S390		22		/* IBM S390 */
+
+#define EM_V800		36		/* NEC V800 series */
+#define EM_FR20		37		/* Fujitsu FR20 */
+#define EM_RH32		38		/* TRW RH-32 */
+#define EM_RCE		39		/* Motorola RCE */
+#define EM_ARM		40		/* ARM */
+#define EM_FAKE_ALPHA	41		/* Digital Alpha */
+#define EM_SH		42		/* Hitachi SH */
+#define EM_SPARCV9	43		/* SPARC v9 64-bit */
+#define EM_TRICORE	44		/* Siemens Tricore */
+#define EM_ARC		45		/* Argonaut RISC Core */
+#define EM_H8_300	46		/* Hitachi H8/300 */
+#define EM_H8_300H	47		/* Hitachi H8/300H */
+#define EM_H8S		48		/* Hitachi H8S */
+#define EM_H8_500	49		/* Hitachi H8/500 */
+#define EM_IA_64	50		/* Intel Merced */
+#define EM_MIPS_X	51		/* Stanford MIPS-X */
+#define EM_COLDFIRE	52		/* Motorola Coldfire */
+#define EM_68HC12	53		/* Motorola M68HC12 */
+#define EM_MMA		54		/* Fujitsu MMA Multimedia Accelerator*/
+#define EM_PCP		55		/* Siemens PCP */
+#define EM_NCPU		56		/* Sony nCPU embeeded RISC */
+#define EM_NDR1		57		/* Denso NDR1 microprocessor */
+#define EM_STARCORE	58		/* Motorola Start*Core processor */
+#define EM_ME16		59		/* Toyota ME16 processor */
+#define EM_ST100	60		/* STMicroelectronic ST100 processor */
+#define EM_TINYJ	61		/* Advanced Logic Corp. Tinyj emb.fam*/
+#define EM_X86_64	62		/* AMD x86-64 architecture */
+#define EM_PDSP		63		/* Sony DSP Processor */
+
+#define EM_FX66		66		/* Siemens FX66 microcontroller */
+#define EM_ST9PLUS	67		/* STMicroelectronics ST9+ 8/16 mc */
+#define EM_ST7		68		/* STmicroelectronics ST7 8 bit mc */
+#define EM_68HC16	69		/* Motorola MC68HC16 microcontroller */
+#define EM_68HC11	70		/* Motorola MC68HC11 microcontroller */
+#define EM_68HC08	71		/* Motorola MC68HC08 microcontroller */
+#define EM_68HC05	72		/* Motorola MC68HC05 microcontroller */
+#define EM_SVX		73		/* Silicon Graphics SVx */
+#define EM_AT19		74		/* STMicroelectronics ST19 8 bit mc */
+#define EM_VAX		75		/* Digital VAX */
+#define EM_CRIS		76		/* Axis Communications 32-bit embedded processor */
+#define EM_JAVELIN	77		/* Infineon Technologies 32-bit embedded processor */
+#define EM_FIREPATH	78		/* Element 14 64-bit DSP Processor */
+#define EM_ZSP		79		/* LSI Logic 16-bit DSP Processor */
+#define EM_MMIX		80		/* Donald Knuth's educational 64-bit processor */
+#define EM_HUANY	81		/* Harvard University machine-independent object files */
+#define EM_PRISM	82		/* SiTera Prism */
+#define EM_AVR		83		/* Atmel AVR 8-bit microcontroller */
+#define EM_FR30		84		/* Fujitsu FR30 */
+#define EM_D10V		85		/* Mitsubishi D10V */
+#define EM_D30V		86		/* Mitsubishi D30V */
+#define EM_V850		87		/* NEC v850 */
+#define EM_M32R		88		/* Mitsubishi M32R */
+#define EM_MN10300	89		/* Matsushita MN10300 */
+#define EM_MN10200	90		/* Matsushita MN10200 */
+#define EM_PJ		91		/* picoJava */
+#define EM_OPENRISC	92		/* OpenRISC 32-bit embedded processor */
+#define EM_ARC_A5	93		/* ARC Cores Tangent-A5 */
+#define EM_XTENSA	94		/* Tensilica Xtensa Architecture */
+#define EM_NUM		95
+
+/* Values for p_type. */
+#define PT_NULL		0	/* Unused entry. */
+#define PT_LOAD		1	/* Loadable segment. */
+#define PT_DYNAMIC	2	/* Dynamic linking information segment. */
+#define PT_INTERP	3	/* Pathname of interpreter. */
+#define PT_NOTE		4	/* Auxiliary information. */
+#define PT_SHLIB	5	/* Reserved (not used). */
+#define PT_PHDR		6	/* Location of program header itself. */
+
+/* Values for p_flags. */
+#define PF_X		0x1	/* Executable. */
+#define PF_W		0x2	/* Writable. */
+#define PF_R		0x4	/* Readable. */
+
+
+#define	ELF_PROGRAM_RETURNS_BIT	0x8000000	/* e_flags bit 31 */
+
+#define EI_MAG0		0
+#define ELFMAG0		0x7f
+
+#define EI_MAG1		1
+#define ELFMAG1		'E'
+
+#define EI_MAG2		2
+#define ELFMAG2		'L'
+
+#define EI_MAG3		3
+#define ELFMAG3		'F'
+
+#define ELFMAG		"\177ELF"
+
+#define EI_CLASS	4	/* File class byte index */
+#define ELFCLASSNONE	0	/* Invalid class */
+#define ELFCLASS32	1	/* 32-bit objects */
+#define ELFCLASS64	2	/* 64-bit objects */
+
+#define EI_DATA		5	/* Data encodeing byte index */
+#define ELFDATANONE	0	/* Invalid data encoding */
+#define ELFDATA2LSB	1	/* 2's complement little endian */
+#define ELFDATA2MSB	2	/* 2's complement big endian */
+
+#define EI_VERSION	6	/* File version byte index */
+				/* Value must be EV_CURRENT */
+
+#define EV_NONE		0	/* Invalid ELF Version */
+#define EV_CURRENT	1	/* Current version */
+
+#define ELF32_PHDR_SIZE (8*4)	/* Size of an elf program header */
+
+#ifndef ASSEMBLY
+
+#include "stdint.h"
+
+/*
+ * ELF definitions common to all 32-bit architectures.
+ */
+
+typedef uint32_t	Elf32_Addr;
+typedef uint16_t	Elf32_Half;
+typedef uint32_t	Elf32_Off;
+typedef uint16_t	Elf32_Section;
+typedef int32_t		Elf32_Sword;
+typedef uint32_t	Elf32_Word;
+typedef uint32_t	Elf32_Size;
+
+typedef uint64_t	Elf64_Addr;
+typedef uint16_t	Elf64_Half;
+typedef uint64_t	Elf64_Off;
+typedef uint16_t	Elf64_Section;
+typedef int32_t		Elf64_Sword;
+typedef uint32_t	Elf64_Word;
+typedef uint64_t	Elf64_Size;
+typedef uint64_t	Elf64_Xword;
+typedef	int64_t		Elf64_Sxword;
+
+/*
+ * ELF header.
+ */
+typedef struct {
+	unsigned char	e_ident[EI_NIDENT];	/* File identification. */
+	Elf32_Half	e_type;		/* File type. */
+	Elf32_Half	e_machine;	/* Machine architecture. */
+	Elf32_Word	e_version;	/* ELF format version. */
+	Elf32_Addr	e_entry;	/* Entry point. */
+	Elf32_Off	e_phoff;	/* Program header file offset. */
+	Elf32_Off	e_shoff;	/* Section header file offset. */
+	Elf32_Word	e_flags;	/* Architecture-specific flags. */
+	Elf32_Half	e_ehsize;	/* Size of ELF header in bytes. */
+	Elf32_Half	e_phentsize;	/* Size of program header entry. */
+	Elf32_Half	e_phnum;	/* Number of program header entries. */
+	Elf32_Half	e_shentsize;	/* Size of section header entry. */
+	Elf32_Half	e_shnum;	/* Number of section header entries. */
+	Elf32_Half	e_shstrndx;	/* Section name strings section. */
+} Elf32_Ehdr;
+
+typedef struct {
+	unsigned char	e_ident[EI_NIDENT];	/* File identification. */
+	Elf64_Half	e_type;		/* File type. */
+	Elf64_Half	e_machine;	/* Machine architecture. */
+	Elf64_Word	e_version;	/* ELF format version. */
+	Elf64_Addr	e_entry;	/* Entry point. */
+	Elf64_Off	e_phoff;	/* Program header file offset. */
+	Elf64_Off	e_shoff;	/* Section header file offset. */
+	Elf64_Word	e_flags;	/* Architecture-specific flags. */
+	Elf64_Half	e_ehsize;	/* Size of ELF header in bytes. */
+	Elf64_Half	e_phentsize;	/* Size of program header entry. */
+	Elf64_Half	e_phnum;	/* Number of program header entries. */
+	Elf64_Half	e_shentsize;	/* Size of section header entry. */
+	Elf64_Half	e_shnum;	/* Number of section header entries. */
+	Elf64_Half	e_shstrndx;	/* Section name strings section. */
+} Elf64_Ehdr;
+
+/*
+ * Program header.
+ */
+typedef struct {
+	Elf32_Word	p_type;		/* Entry type. */
+	Elf32_Off	p_offset;	/* File offset of contents. */
+	Elf32_Addr	p_vaddr;	/* Virtual address (not used). */
+	Elf32_Addr	p_paddr;	/* Physical address. */
+	Elf32_Size	p_filesz;	/* Size of contents in file. */
+	Elf32_Size	p_memsz;	/* Size of contents in memory. */
+	Elf32_Word	p_flags;	/* Access permission flags. */
+	Elf32_Size	p_align;	/* Alignment in memory and file. */
+} Elf32_Phdr;
+
+typedef struct {
+	Elf64_Word	p_type;		/* Entry type. */
+	Elf64_Word	p_flags;	/* Access permission flags. */
+	Elf64_Off	p_offset;	/* File offset of contents. */
+	Elf64_Addr	p_vaddr;	/* Virtual address (not used). */
+	Elf64_Addr	p_paddr;	/* Physical address. */
+	Elf64_Size	p_filesz;	/* Size of contents in file. */
+	Elf64_Size	p_memsz;	/* Size of contents in memory. */
+	Elf64_Size	p_align;	/* Alignment in memory and file. */
+} Elf64_Phdr;
+
+
+/* Dynamic section entry.  */
+
+typedef struct
+{
+	Elf32_Sword	d_tag;			/* Dynamic entry type */
+	union
+	{
+		Elf32_Word d_val;			/* Integer value */
+		Elf32_Addr d_ptr;			/* Address value */
+	} d_un;
+} Elf32_Dyn;
+
+typedef struct
+{
+	Elf64_Sxword	d_tag;			/* Dynamic entry type */
+	union
+	{
+		Elf64_Xword d_val;		/* Integer value */
+		Elf64_Addr d_ptr;		/* Address value */
+	} d_un;
+} Elf64_Dyn;
+
+/* Legal values for d_tag (dynamic entry type).  */
+
+#define DT_NULL		0		/* Marks end of dynamic section */
+#define DT_NEEDED	1		/* Name of needed library */
+#define DT_PLTRELSZ	2		/* Size in bytes of PLT relocs */
+#define DT_PLTGOT	3		/* Processor defined value */
+#define DT_HASH		4		/* Address of symbol hash table */
+#define DT_STRTAB	5		/* Address of string table */
+#define DT_SYMTAB	6		/* Address of symbol table */
+#define DT_RELA		7		/* Address of Rela relocs */
+#define DT_RELASZ	8		/* Total size of Rela relocs */
+#define DT_RELAENT	9		/* Size of one Rela reloc */
+#define DT_STRSZ	10		/* Size of string table */
+#define DT_SYMENT	11		/* Size of one symbol table entry */
+#define DT_INIT		12		/* Address of init function */
+#define DT_FINI		13		/* Address of termination function */
+#define DT_SONAME	14		/* Name of shared object */
+#define DT_RPATH	15		/* Library search path (deprecated) */
+#define DT_SYMBOLIC	16		/* Start symbol search here */
+#define DT_REL		17		/* Address of Rel relocs */
+#define DT_RELSZ	18		/* Total size of Rel relocs */
+#define DT_RELENT	19		/* Size of one Rel reloc */
+#define DT_PLTREL	20		/* Type of reloc in PLT */
+#define DT_DEBUG	21		/* For debugging; unspecified */
+#define DT_TEXTREL	22		/* Reloc might modify .text */
+#define DT_JMPREL	23		/* Address of PLT relocs */
+#define	DT_BIND_NOW	24		/* Process relocations of object */
+#define	DT_INIT_ARRAY	25		/* Array with addresses of init fct */
+#define	DT_FINI_ARRAY	26		/* Array with addresses of fini fct */
+#define	DT_INIT_ARRAYSZ	27		/* Size in bytes of DT_INIT_ARRAY */
+#define	DT_FINI_ARRAYSZ	28		/* Size in bytes of DT_FINI_ARRAY */
+#define DT_RUNPATH	29		/* Library search path */
+#define DT_FLAGS	30		/* Flags for the object being loaded */
+#define DT_ENCODING	32		/* Start of encoded range */
+#define DT_PREINIT_ARRAY 32		/* Array with addresses of preinit fct*/
+#define DT_PREINIT_ARRAYSZ 33		/* size in bytes of DT_PREINIT_ARRAY */
+#define	DT_NUM		34		/* Number used */
+#define DT_LOOS		0x6000000d	/* Start of OS-specific */
+#define DT_HIOS		0x6ffff000	/* End of OS-specific */
+#define DT_LOPROC	0x70000000	/* Start of processor-specific */
+#define DT_HIPROC	0x7fffffff	/* End of processor-specific */
+#define	DT_PROCNUM	0x32		/* Most used by any processor */
+
+/* DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
+   Dyn.d_un.d_val field of the Elf*_Dyn structure.  This follows Sun's
+   approach.  */
+#define DT_VALRNGLO	0x6ffffd00
+#define DT_GNU_PRELINKED 0x6ffffdf5	/* Prelinking timestamp */
+#define DT_GNU_CONFLICTSZ 0x6ffffdf6	/* Size of conflict section */
+#define DT_GNU_LIBLISTSZ 0x6ffffdf7	/* Size of library list */
+#define DT_CHECKSUM	0x6ffffdf8
+#define DT_PLTPADSZ	0x6ffffdf9
+#define DT_MOVEENT	0x6ffffdfa
+#define DT_MOVESZ	0x6ffffdfb
+#define DT_FEATURE_1	0x6ffffdfc	/* Feature selection (DTF_*).  */
+#define DT_POSFLAG_1	0x6ffffdfd	/* Flags for DT_* entries, effecting
+					   the following DT_* entry.  */
+#define DT_SYMINSZ	0x6ffffdfe	/* Size of syminfo table (in bytes) */
+#define DT_SYMINENT	0x6ffffdff	/* Entry size of syminfo */
+#define DT_VALRNGHI	0x6ffffdff
+#define DT_VALTAGIDX(tag)	(DT_VALRNGHI - (tag))	/* Reverse order! */
+#define DT_VALNUM 12
+
+/* DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
+   Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
+
+   If any adjustment is made to the ELF object after it has been
+   built these entries will need to be adjusted.  */
+#define DT_ADDRRNGLO	0x6ffffe00
+#define DT_GNU_CONFLICT	0x6ffffef8	/* Start of conflict section */
+#define DT_GNU_LIBLIST	0x6ffffef9	/* Library list */
+#define DT_CONFIG	0x6ffffefa	/* Configuration information.  */
+#define DT_DEPAUDIT	0x6ffffefb	/* Dependency auditing.  */
+#define DT_AUDIT	0x6ffffefc	/* Object auditing.  */
+#define	DT_PLTPAD	0x6ffffefd	/* PLT padding.  */
+#define	DT_MOVETAB	0x6ffffefe	/* Move table.  */
+#define DT_SYMINFO	0x6ffffeff	/* Syminfo table.  */
+#define DT_ADDRRNGHI	0x6ffffeff
+#define DT_ADDRTAGIDX(tag)	(DT_ADDRRNGHI - (tag))	/* Reverse order! */
+#define DT_ADDRNUM 10
+
+/* The versioning entry types.  The next are defined as part of the
+   GNU extension.  */
+#define DT_VERSYM	0x6ffffff0
+
+#define DT_RELACOUNT	0x6ffffff9
+#define DT_RELCOUNT	0x6ffffffa
+
+/* These were chosen by Sun.  */
+#define DT_FLAGS_1	0x6ffffffb	/* State flags, see DF_1_* below.  */
+#define	DT_VERDEF	0x6ffffffc	/* Address of version definition
+					   table */
+#define	DT_VERDEFNUM	0x6ffffffd	/* Number of version definitions */
+#define	DT_VERNEED	0x6ffffffe	/* Address of table with needed
+					   versions */
+#define	DT_VERNEEDNUM	0x6fffffff	/* Number of needed versions */
+#define DT_VERSIONTAGIDX(tag)	(DT_VERNEEDNUM - (tag))	/* Reverse order! */
+#define DT_VERSIONTAGNUM 16
+
+/* Sun added these machine-independent extensions in the "processor-specific"
+   range.  Be compatible.  */
+#define DT_AUXILIARY    0x7ffffffd      /* Shared object to load before self */
+#define DT_FILTER       0x7fffffff      /* Shared object to get values from */
+#define DT_EXTRATAGIDX(tag)	((Elf32_Word)-((Elf32_Sword) (tag) <<1>>1)-1)
+#define DT_EXTRANUM	3
+
+/* Values of `d_un.d_val' in the DT_FLAGS entry.  */
+#define DF_ORIGIN	0x00000001	/* Object may use DF_ORIGIN */
+#define DF_SYMBOLIC	0x00000002	/* Symbol resolutions starts here */
+#define DF_TEXTREL	0x00000004	/* Object contains text relocations */
+#define DF_BIND_NOW	0x00000008	/* No lazy binding for this object */
+#define DF_STATIC_TLS	0x00000010	/* Module uses the static TLS model */
+
+/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1
+   entry in the dynamic section.  */
+#define DF_1_NOW	0x00000001	/* Set RTLD_NOW for this object.  */
+#define DF_1_GLOBAL	0x00000002	/* Set RTLD_GLOBAL for this object.  */
+#define DF_1_GROUP	0x00000004	/* Set RTLD_GROUP for this object.  */
+#define DF_1_NODELETE	0x00000008	/* Set RTLD_NODELETE for this object.*/
+#define DF_1_LOADFLTR	0x00000010	/* Trigger filtee loading at runtime.*/
+#define DF_1_INITFIRST	0x00000020	/* Set RTLD_INITFIRST for this object*/
+#define DF_1_NOOPEN	0x00000040	/* Set RTLD_NOOPEN for this object.  */
+#define DF_1_ORIGIN	0x00000080	/* $ORIGIN must be handled.  */
+#define DF_1_DIRECT	0x00000100	/* Direct binding enabled.  */
+#define DF_1_TRANS	0x00000200
+#define DF_1_INTERPOSE	0x00000400	/* Object is used to interpose.  */
+#define DF_1_NODEFLIB	0x00000800	/* Ignore default lib search path.  */
+#define DF_1_NODUMP	0x00001000	/* Object can't be dldump'ed.  */
+#define DF_1_CONFALT	0x00002000	/* Configuration alternative created.*/
+#define DF_1_ENDFILTEE	0x00004000	/* Filtee terminates filters search. */
+#define	DF_1_DISPRELDNE	0x00008000	/* Disp reloc applied at build time. */
+#define	DF_1_DISPRELPND	0x00010000	/* Disp reloc applied at run-time.  */
+
+/* Flags for the feature selection in DT_FEATURE_1.  */
+#define DTF_1_PARINIT	0x00000001
+#define DTF_1_CONFEXP	0x00000002
+
+/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry.  */
+#define DF_P1_LAZYLOAD	0x00000001	/* Lazyload following object.  */
+#define DF_P1_GROUPPERM	0x00000002	/* Symbols from next object are not
+					   generally available.  */
+
+/* Special section indices.  */
+
+#define SHN_UNDEF	0		/* Undefined section */
+#define SHN_LORESERVE	0xff00		/* Start of reserved indices */
+#define SHN_LOPROC	0xff00		/* Start of processor-specific */
+#define SHN_HIPROC	0xff1f		/* End of processor-specific */
+#define SHN_LOOS	0xff20		/* Start of OS-specific */
+#define SHN_HIOS	0xff3f		/* End of OS-specific */
+#define SHN_ABS		0xfff1		/* Associated symbol is absolute */
+#define SHN_COMMON	0xfff2		/* Associated symbol is common */
+#define SHN_XINDEX	0xffff		/* Index is in extra table.  */
+#define SHN_HIRESERVE	0xffff		/* End of reserved indices */
+
+/* Legal values for sh_type (section type).  */
+
+#define SHT_NULL	  0		/* Section header table entry unused */
+#define SHT_PROGBITS	  1		/* Program data */
+#define SHT_SYMTAB	  2		/* Symbol table */
+#define SHT_STRTAB	  3		/* String table */
+#define SHT_RELA	  4		/* Relocation entries with addends */
+#define SHT_HASH	  5		/* Symbol hash table */
+#define SHT_DYNAMIC	  6		/* Dynamic linking information */
+#define SHT_NOTE	  7		/* Notes */
+#define SHT_NOBITS	  8		/* Program space with no data (bss) */
+#define SHT_REL		  9		/* Relocation entries, no addends */
+#define SHT_SHLIB	  10		/* Reserved */
+#define SHT_DYNSYM	  11		/* Dynamic linker symbol table */
+#define SHT_INIT_ARRAY	  14		/* Array of constructors */
+#define SHT_FINI_ARRAY	  15		/* Array of destructors */
+#define SHT_PREINIT_ARRAY 16		/* Array of pre-constructors */
+#define SHT_GROUP	  17		/* Section group */
+#define SHT_SYMTAB_SHNDX  18		/* Extended section indeces */
+#define	SHT_NUM		  19		/* Number of defined types.  */
+#define SHT_LOOS	  0x60000000	/* Start OS-specific */
+#define SHT_GNU_LIBLIST	  0x6ffffff7	/* Prelink library list */
+#define SHT_CHECKSUM	  0x6ffffff8	/* Checksum for DSO content.  */
+#define SHT_LOSUNW	  0x6ffffffa	/* Sun-specific low bound.  */
+#define SHT_SUNW_move	  0x6ffffffa
+#define SHT_SUNW_COMDAT   0x6ffffffb
+#define SHT_SUNW_syminfo  0x6ffffffc
+#define SHT_GNU_verdef	  0x6ffffffd	/* Version definition section.  */
+#define SHT_GNU_verneed	  0x6ffffffe	/* Version needs section.  */
+#define SHT_GNU_versym	  0x6fffffff	/* Version symbol table.  */
+#define SHT_HISUNW	  0x6fffffff	/* Sun-specific high bound.  */
+#define SHT_HIOS	  0x6fffffff	/* End OS-specific type */
+#define SHT_LOPROC	  0x70000000	/* Start of processor-specific */
+#define SHT_HIPROC	  0x7fffffff	/* End of processor-specific */
+#define SHT_LOUSER	  0x80000000	/* Start of application-specific */
+#define SHT_HIUSER	  0x8fffffff	/* End of application-specific */
+
+/* Legal values for sh_flags (section flags).  */
+
+#define SHF_WRITE	     (1 << 0)	/* Writable */
+#define SHF_ALLOC	     (1 << 1)	/* Occupies memory during execution */
+#define SHF_EXECINSTR	     (1 << 2)	/* Executable */
+#define SHF_MERGE	     (1 << 4)	/* Might be merged */
+#define SHF_STRINGS	     (1 << 5)	/* Contains nul-terminated strings */
+#define SHF_INFO_LINK	     (1 << 6)	/* `sh_info' contains SHT index */
+#define SHF_LINK_ORDER	     (1 << 7)	/* Preserve order after combining */
+#define SHF_OS_NONCONFORMING (1 << 8)	/* Non-standard OS specific handling
+					   required */
+#define SHF_GROUP	     (1 << 9)	/* Section is member of a group.  */
+#define SHF_TLS		     (1 << 10)	/* Section hold thread-local data.  */
+#define SHF_MASKOS	     0x0ff00000	/* OS-specific.  */
+#define SHF_MASKPROC	     0xf0000000	/* Processor-specific */
+
+/* Section group handling.  */
+#define GRP_COMDAT	0x1		/* Mark group as COMDAT.  */
+
+/* Symbol table entry.  */
+
+typedef struct
+{
+	Elf32_Word	st_name;	/* Symbol name (string tbl index) */
+	Elf32_Addr	st_value;	/* Symbol value */
+	Elf32_Word	st_size;	/* Symbol size */
+	unsigned char	st_info;	/* Symbol type and binding */
+	unsigned char	st_other;	/* Symbol visibility */
+	Elf32_Section	st_shndx;	/* Section index */
+} Elf32_Sym;
+
+typedef struct
+{
+	Elf64_Word	st_name;	/* Symbol name (string tbl index) */
+	unsigned char	st_info;	/* Symbol type and binding */
+	unsigned char st_other;		/* Symbol visibility */
+	Elf64_Section	st_shndx;	/* Section index */
+	Elf64_Addr	st_value;	/* Symbol value */
+	Elf64_Xword	st_size;	/* Symbol size */
+} Elf64_Sym;
+
+/* Relocation table entry without addend (in section of type SHT_REL).  */
+
+typedef struct
+{
+	Elf32_Addr	r_offset;	/* Address */
+	Elf32_Word	r_info;		/* Relocation type and symbol index */
+} Elf32_Rel;
+
+/* I have seen two different definitions of the Elf64_Rel and
+   Elf64_Rela structures, so we'll leave them out until Novell (or
+   whoever) gets their act together.  */
+/* The following, at least, is used on Sparc v9, MIPS, and Alpha.  */
+
+typedef struct
+{
+	Elf64_Addr	r_offset;	/* Address */
+	Elf64_Xword	r_info;		/* Relocation type and symbol index */
+} Elf64_Rel;
+
+/* Relocation table entry with addend (in section of type SHT_RELA).  */
+
+typedef struct
+{
+	Elf32_Addr	r_offset;	/* Address */
+	Elf32_Word	r_info;		/* Relocation type and symbol index */
+	Elf32_Sword	r_addend;	/* Addend */
+} Elf32_Rela;
+
+typedef struct
+{
+	Elf64_Addr	r_offset;	/* Address */
+	Elf64_Xword	r_info;		/* Relocation type and symbol index */
+	Elf64_Sxword	r_addend;	/* Addend */
+} Elf64_Rela;
+
+/* How to extract and insert information held in the r_info field.  */
+
+#define ELF32_R_SYM(val)		((val) >> 8)
+#define ELF32_R_TYPE(val)		((val) & 0xff)
+#define ELF32_R_INFO(sym, type)		(((sym) << 8) + ((type) & 0xff))
+
+#define ELF64_R_SYM(i)			((i) >> 32)
+#define ELF64_R_TYPE(i)			((i) & 0xffffffff)
+#define ELF64_R_INFO(sym,type)		((((Elf64_Xword) (sym)) << 32) + (type))
+
+
+/* Intel 80386 specific definitions.  */
+
+/* i386 relocs.  */
+
+#define R_386_NONE	   0		/* No reloc */
+#define R_386_32	   1		/* Direct 32 bit  */
+#define R_386_PC32	   2		/* PC relative 32 bit */
+#define R_386_GOT32	   3		/* 32 bit GOT entry */
+#define R_386_PLT32	   4		/* 32 bit PLT address */
+#define R_386_COPY	   5		/* Copy symbol at runtime */
+#define R_386_GLOB_DAT	   6		/* Create GOT entry */
+#define R_386_JMP_SLOT	   7		/* Create PLT entry */
+#define R_386_RELATIVE	   8		/* Adjust by program base */
+#define R_386_GOTOFF	   9		/* 32 bit offset to GOT */
+#define R_386_GOTPC	   10		/* 32 bit PC relative offset to GOT */
+#define R_386_32PLT	   11
+#define R_386_TLS_TPOFF	   14		/* Offset in static TLS block */
+#define R_386_TLS_IE	   15		/* Address of GOT entry for static TLS
+					   block offset */
+#define R_386_TLS_GOTIE	   16		/* GOT entry for static TLS block
+					   offset */
+#define R_386_TLS_LE	   17		/* Offset relative to static TLS
+					   block */
+#define R_386_TLS_GD	   18		/* Direct 32 bit for GNU version of
+					   general dynamic thread local data */
+#define R_386_TLS_LDM	   19		/* Direct 32 bit for GNU version of
+					   local dynamic thread local data
+					   in LE code */
+#define R_386_16	   20
+#define R_386_PC16	   21
+#define R_386_8		   22
+#define R_386_PC8	   23
+#define R_386_TLS_GD_32	   24		/* Direct 32 bit for general dynamic
+					   thread local data */
+#define R_386_TLS_GD_PUSH  25		/* Tag for pushl in GD TLS code */
+#define R_386_TLS_GD_CALL  26		/* Relocation for call to
+					   __tls_get_addr() */
+#define R_386_TLS_GD_POP   27		/* Tag for popl in GD TLS code */
+#define R_386_TLS_LDM_32   28		/* Direct 32 bit for local dynamic
+					   thread local data in LE code */
+#define R_386_TLS_LDM_PUSH 29		/* Tag for pushl in LDM TLS code */
+#define R_386_TLS_LDM_CALL 30		/* Relocation for call to
+					   __tls_get_addr() in LDM code */
+#define R_386_TLS_LDM_POP  31		/* Tag for popl in LDM TLS code */
+#define R_386_TLS_LDO_32   32		/* Offset relative to TLS block */
+#define R_386_TLS_IE_32	   33		/* GOT entry for negated static TLS
+					   block offset */
+#define R_386_TLS_LE_32	   34		/* Negated offset relative to static
+					   TLS block */
+#define R_386_TLS_DTPMOD32 35		/* ID of module containing symbol */
+#define R_386_TLS_DTPOFF32 36		/* Offset in TLS block */
+#define R_386_TLS_TPOFF32  37		/* Negated offset in static TLS block */
+/* Keep this the last entry.  */
+#define R_386_NUM	   38
+
+#endif /* ASSEMBLY */
+
+#endif /* ELF_H */
diff --git a/error.c b/error.c
new file mode 100644
index 0000000..d4ed505
--- /dev/null
+++ b/error.c
@@ -0,0 +1,553 @@
+/* error.c - MemTest-86  Version 3.4
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+#include "stddef.h"
+#include "test.h"
+#include "config.h"
+#include "smp.h"
+
+extern struct barrier_s *barr;
+extern int test_ticks, nticks;
+extern struct tseq tseq[];
+extern volatile int test;
+void poll_errors();
+
+static void update_err_counts(void);
+static void print_err_counts(void);
+static void common_err();
+static int syn, chan, len=1;
+
+/*
+ * Display data error message. Don't display duplicate errors.
+ */
+void error(ulong *adr, ulong good, ulong bad)
+{
+	ulong xor;
+
+	spin_lock(&barr->mutex);
+	xor = good ^ bad;
+#ifdef USB_WAR
+	/* Skip any errrors that appear to be due to the BIOS using location
+	 * 0x4e0 for USB keyboard support.  This often happens with Intel
+         * 810, 815 and 820 chipsets.  It is possible that we will skip
+	 * a real error but the odds are very low.
+	 */
+	if ((ulong)adr == 0x4e0 || (ulong)adr == 0x410) {
+		return;
+	}
+#endif
+	common_err(adr, good, bad, xor, 0);
+	spin_unlock(&barr->mutex);
+}
+
+/*
+ * Display address error message.
+ * Since this is strictly an address test, trying to create BadRAM
+ * patterns does not make sense.  Just report the error.
+ */
+void ad_err1(ulong *adr1, ulong *mask, ulong bad, ulong good)
+{
+	spin_lock(&barr->mutex);
+	common_err(adr1, good, bad, (ulong)mask, 1);
+	spin_unlock(&barr->mutex);
+}
+
+/*
+ * Display address error message.
+ * Since this type of address error can also report data errors go
+ * ahead and generate BadRAM patterns.
+ */
+void ad_err2(ulong *adr, ulong bad)
+{
+	spin_lock(&barr->mutex);
+	common_err(adr, (ulong)adr, bad, ((ulong)adr) ^ bad, 0);
+	spin_unlock(&barr->mutex);
+}
+
+static void update_err_counts(void)
+{
+	if (v->pass && v->ecount == 0) {
+		cprint(LINE_MSG, COL_MSG,
+			"                                            ");
+	}
+	++(v->ecount);
+	tseq[test].errors++;
+		
+}
+
+static void print_err_counts(void)
+{
+	int i;
+	char *pp;
+
+	if ((v->ecount > 4096) && (v->ecount % 256 != 0)) return;
+
+	dprint(LINE_INFO, 70, v->ecount, 6, 0);
+/*
+	dprint(LINE_INFO, 56, v->ecc_ecount, 6, 0);
+*/
+
+	/* Paint the error messages on the screen red to provide a vivid */
+	/* indicator that an error has occured */ 
+	if ((v->printmode == PRINTMODE_ADDRESSES ||
+			v->printmode == PRINTMODE_PATTERNS) &&
+			v->msg_line < 24) {
+		for(i=0, pp=(char *)((SCREEN_ADR+v->msg_line*160+1));
+				 i<76; i++, pp+=2) {
+			*pp = 0x47;
+		}
+	}
+}
+
+/*
+ * Print an individual error
+ */
+void common_err( ulong *adr, ulong good, ulong bad, ulong xor, int type) 
+{
+	int i, n, x, flag=0;
+	ulong page, offset;
+	int patnchg;
+	ulong mb;
+
+	update_err_counts();
+
+	switch(v->printmode) {
+	case PRINTMODE_SUMMARY:
+		/* Don't do anything for a parity error. */
+		if (type == 3) {
+			return;
+		}
+
+		/* Address error */
+		if (type == 1) {
+			xor = good ^ bad;
+		}
+
+		/* Ecc correctable errors */
+		if (type == 2) {
+			/* the bad value is the corrected flag */
+			if (bad) {
+				v->erri.cor_err++;
+			}
+			page = (ulong)adr;
+			offset = good;
+		} else {
+			page = page_of(adr);
+			offset = (ulong)adr & 0xFFF;
+		}
+			
+		/* Calc upper and lower error addresses */
+		if (v->erri.low_addr.page > page) {
+			v->erri.low_addr.page = page;
+			v->erri.low_addr.offset = offset;
+			flag++;
+		} else if (v->erri.low_addr.page == page &&
+				v->erri.low_addr.offset > offset) {
+			v->erri.low_addr.offset = offset;
+			v->erri.high_addr.offset = offset;
+			flag++;
+		} else if (v->erri.high_addr.page < page) {
+			v->erri.high_addr.page = page;
+			flag++;
+		}
+		if (v->erri.high_addr.page == page &&
+				v->erri.high_addr.offset < offset) {
+			v->erri.high_addr.offset = offset;
+			flag++;
+		}
+
+		/* Calc bits in error */
+		for (i=0, n=0; i<32; i++) {
+			if (xor>>i & 1) {
+				n++;
+			}
+		}
+		v->erri.tbits += n;
+		if (n > v->erri.max_bits) {
+			v->erri.max_bits = n;
+			flag++;
+		}
+		if (n < v->erri.min_bits) {
+			v->erri.min_bits = n;
+			flag++;
+		}
+		if (v->erri.ebits ^ xor) {
+			flag++;
+		}
+		v->erri.ebits |= xor;
+
+	 	/* Calc max contig errors */
+		len = 1;
+		if ((ulong)adr == (ulong)v->erri.eadr+4 ||
+				(ulong)adr == (ulong)v->erri.eadr-4 ) {
+			len++;
+		}
+		if (len > v->erri.maxl) {
+			v->erri.maxl = len;
+			flag++;
+		}
+		v->erri.eadr = (ulong)adr;
+
+		if (v->erri.hdr_flag == 0) {
+			clear_scroll();
+			cprint(LINE_HEADER+0, 1,  "Error Confidence Value:");
+			cprint(LINE_HEADER+1, 1,  "  Lowest Error Address:");
+			cprint(LINE_HEADER+2, 1,  " Highest Error Address:");
+			cprint(LINE_HEADER+3, 1,  "    Bits in Error Mask:");
+			cprint(LINE_HEADER+4, 1,  " Bits in Error - Total:");
+			cprint(LINE_HEADER+4, 29,  "Min:    Max:    Avg:");
+			cprint(LINE_HEADER+5, 1,  " Max Contiguous Errors:");
+			x = 24;
+
+			cprint(LINE_HEADER+0, 64,   "Test  Errors");
+			v->erri.hdr_flag++;
+		}
+		if (flag) {
+		  /* Calc bits in error */
+		  for (i=0, n=0; i<32; i++) {
+			if (v->erri.ebits>>i & 1) {
+				n++;
+			}
+		  }
+		  page = v->erri.low_addr.page;
+		  offset = v->erri.low_addr.offset;
+		  mb = page >> 8;
+		  hprint(LINE_HEADER+1, 25, page);
+		  hprint2(LINE_HEADER+1, 33, offset, 3);
+		  cprint(LINE_HEADER+1, 36, " -      . MB");
+		  dprint(LINE_HEADER+1, 39, mb, 5, 0);
+		  dprint(LINE_HEADER+1, 45, ((page & 0xF)*10)/16, 1, 0);
+		  page = v->erri.high_addr.page;
+		  offset = v->erri.high_addr.offset;
+		  mb = page >> 8;
+		  hprint(LINE_HEADER+2, 25, page);
+		  hprint2(LINE_HEADER+2, 33, offset, 3);
+		  cprint(LINE_HEADER+2, 36, " -      . MB");
+		  dprint(LINE_HEADER+2, 39, mb, 5, 0);
+		  dprint(LINE_HEADER+2, 45, ((page & 0xF)*10)/16, 1, 0);
+		  hprint(LINE_HEADER+3, 25, v->erri.ebits);
+		  dprint(LINE_HEADER+4, 25, n, 2, 1);
+		  dprint(LINE_HEADER+4, 34, v->erri.min_bits, 2, 1);
+		  dprint(LINE_HEADER+4, 42, v->erri.max_bits, 2, 1);
+		  dprint(LINE_HEADER+4, 50, v->erri.tbits/v->ecount, 2, 1);
+		  dprint(LINE_HEADER+5, 25, v->erri.maxl, 7, 1);
+		  x = 28;
+			
+		  for (i=0; tseq[i].msg != NULL; i++) {
+			dprint(LINE_HEADER+1+i, 66, i, 2, 0);
+			dprint(LINE_HEADER+1+i, 68, tseq[i].errors, 8, 0);
+	  	  }
+		}
+		if (v->erri.cor_err) {
+		  dprint(LINE_HEADER+6, 25, v->erri.cor_err, 8, 1);
+		}
+		break;
+
+	case PRINTMODE_ADDRESSES:
+		/* Don't display duplicate errors */
+		if ((ulong)adr == (ulong)v->erri.eadr &&
+				 xor == v->erri.exor) {
+			return;
+		}
+		if (v->erri.hdr_flag == 0) {
+			clear_scroll();
+			cprint(LINE_HEADER, 0,
+"Tst  Pass   Failing Address          Good       Bad     Err-Bits  Count CPU");
+			cprint(LINE_HEADER+1, 0,
+"---  ----  -----------------------  --------  --------  --------  ----- ----");
+			v->erri.hdr_flag++;
+		}
+		/* Check for keyboard input */
+		check_input();
+		scroll();
+	
+		if ( type == 2 || type == 3) {
+			page = (ulong)adr;
+			offset = good;
+		} else {
+			page = page_of(adr);
+			offset = ((unsigned long)adr) & 0xFFF;
+		}
+		mb = page >> 8;
+		dprint(v->msg_line, 0, test, 3, 0);
+		dprint(v->msg_line, 4, v->pass, 5, 0);
+		hprint(v->msg_line, 11, page);
+		hprint2(v->msg_line, 19, offset, 3);
+		cprint(v->msg_line, 22, " -      . MB");
+		dprint(v->msg_line, 25, mb, 5, 0);
+		dprint(v->msg_line, 31, ((page & 0xF)*10)/16, 1, 0);
+
+		if (type == 3) {
+			/* ECC error */
+			cprint(v->msg_line, 36, 
+			  bad?"corrected           ": "uncorrected         ");
+			hprint2(v->msg_line, 60, syn, 4);
+			cprint(v->msg_line, 68, "ECC"); 
+			dprint(v->msg_line, 74, chan, 2, 0);
+		} else if (type == 2) {
+			cprint(v->msg_line, 36, "Parity error detected                ");
+		} else {
+			hprint(v->msg_line, 36, good);
+			hprint(v->msg_line, 46, bad);
+			hprint(v->msg_line, 56, xor);
+			dprint(v->msg_line, 66, v->ecount, 5, 0);
+			dprint(v->msg_line, 74, smp_my_cpu_num(), 2,1);
+			v->erri.exor = xor;
+		}
+		v->erri.eadr = (ulong)adr;
+		print_err_counts();
+		break;
+
+	case PRINTMODE_PATTERNS:
+		if (v->erri.hdr_flag == 0) {
+			clear_scroll();
+			v->erri.hdr_flag++;
+		}
+		/* Do not do badram patterns from test 0 or 5 */
+		if (test == 0 || test == 5) {
+			return;
+		}
+		/* Only do patterns for data errors */
+		if ( type != 0) {
+			return;
+		}
+		/* Process the address in the pattern administration */
+		patnchg=insertaddress ((ulong) adr);
+		if (patnchg) { 
+			printpatn();
+		}
+		break;
+
+	case PRINTMODE_NONE:
+		if (v->erri.hdr_flag == 0) {
+			clear_scroll();
+			v->erri.hdr_flag++;
+		}
+		break;
+	}
+}
+
+/*
+ * Print an ecc error
+ */
+void print_ecc_err(unsigned long page, unsigned long offset, 
+	int corrected, unsigned short syndrome, int channel)
+{
+	++(v->ecc_ecount);
+	syn = syndrome;
+	chan = channel;
+	common_err((ulong *)page, offset, corrected, 0, 2);
+}
+
+#ifdef PARITY_MEM
+/*
+ * Print a parity error message
+ */
+void parity_err( unsigned long edi, unsigned long esi) 
+{
+	unsigned long addr;
+
+	if (test == 5) {
+		addr = esi;
+	} else {
+		addr = edi;
+	}
+	common_err((ulong *)addr, addr & 0xFFF, 0, 0, 3);
+}
+#endif
+
+/*
+ * Print the pattern array as a LILO boot option addressing BadRAM support.
+ */
+void printpatn (void)
+{
+       int idx=0;
+       int x;
+
+	/* Check for keyboard input */
+	check_input();
+
+       if (v->numpatn == 0)
+               return;
+
+       scroll();
+
+       cprint (v->msg_line, 0, "badram=");
+       x=7;
+
+       for (idx = 0; idx < v->numpatn; idx++) {
+
+               if (x > 80-22) {
+                       scroll();
+                       x=7;
+               }
+               cprint (v->msg_line, x, "0x");
+               hprint (v->msg_line, x+2,  v->patn[idx].adr );
+               cprint (v->msg_line, x+10, ",0x");
+               hprint (v->msg_line, x+13, v->patn[idx].mask);
+               if (idx+1 < v->numpatn)
+                       cprint (v->msg_line, x+21, ",");
+               x+=22;
+       }
+}
+	
+/*
+ * Show progress by displaying elapsed time and update bar graphs
+ */
+short spin_idx[MAX_CPUS];
+char spin[4] = {'|','/','-','\\'};
+
+void do_tick(int me)
+{
+	int i, pct;
+	ulong h, l, n, t;
+	extern int mstr_cpu;
+
+	if (++spin_idx[me] > 3) {
+		spin_idx[me] = 0;
+	}
+	cplace(8, 2*me+7, spin[spin_idx[me]]);
+	
+	/* Check for keyboard input */
+	if (me == mstr_cpu) {
+		check_input();
+	}
+	/* A barrier here holds the other CPUs until the configuration
+	 * changes are done */
+	s_barrier();
+
+	/* Only the first selected CPU does the update */
+	if (me !=  mstr_cpu) {
+		return;
+	}
+
+	/* FIXME only print serial error messages from the tick handler */
+	if (v->ecount) {
+		print_err_counts();
+	}
+	
+	nticks++;
+	v->total_ticks++;
+
+	if (test_ticks) {
+		pct = 100*nticks/test_ticks;
+		if (pct > 100) {
+			pct = 100;
+		}
+	} else {
+		pct = 0;
+	}
+	dprint(2, COL_MID+4, pct, 3, 0);
+	i = (BAR_SIZE * pct) / 100;
+	while (i > v->tptr) {
+		if (v->tptr >= BAR_SIZE) {
+			break;
+		}
+		cprint(2, COL_MID+9+v->tptr, "#");
+		v->tptr++;
+	}
+	
+	if (v->pass_ticks) {
+		pct = 100*v->total_ticks/v->pass_ticks;
+		if (pct > 100) {
+			pct = 100;
+		}
+	} else {
+		pct = 0;
+        }
+	dprint(1, COL_MID+4, pct, 3, 0);
+	i = (BAR_SIZE * pct) / 100;
+	while (i > v->pptr) {
+		if (v->pptr >= BAR_SIZE) {
+			break;
+		}
+		cprint(1, COL_MID+9+v->pptr, "#");
+		v->pptr++;
+	}
+
+	if (v->ecount && v->printmode == PRINTMODE_SUMMARY) {
+		/* Compute confidence score */
+		pct = 0;
+
+		/* If there are no errors within 1mb of start - end addresses */
+		h = v->pmap[v->msegs - 1].end - 0x100;
+		if (v->erri.low_addr.page >  0x100 &&
+				 v->erri.high_addr.page < h) {
+			pct += 8;
+		}
+
+		/* Errors for only some tests */
+		if (v->pass) {
+			for (i=0, n=0; tseq[i].msg != NULL; i++) {
+				if (tseq[i].errors == 0) {
+					n++;
+				}
+			}
+			pct += n*3;
+		} else {
+			for (i=0, n=0; i<test; i++) {
+				if (tseq[i].errors == 0) {
+					n++;
+				}
+			}
+			pct += n*2;
+			
+		}
+
+		/* Only some bits in error */
+		n = 0;
+		if (v->erri.ebits & 0xf) n++;
+		if (v->erri.ebits & 0xf0) n++;
+		if (v->erri.ebits & 0xf00) n++;
+		if (v->erri.ebits & 0xf000) n++;
+		if (v->erri.ebits & 0xf0000) n++;
+		if (v->erri.ebits & 0xf00000) n++;
+		if (v->erri.ebits & 0xf000000) n++;
+		if (v->erri.ebits & 0xf0000000) n++;
+		pct += (8-n)*2;
+
+		/* Adjust the score */
+		pct = pct*100/22;
+/*
+		if (pct > 100) {
+			pct = 100;
+		}
+*/
+		dprint(LINE_HEADER+0, 25, pct, 3, 1);
+	}
+		
+
+	/* We can't do the elapsed time unless the rdtsc instruction
+	 * is supported
+	 */
+	if (v->rdtsc) {
+		asm __volatile__(
+			"rdtsc":"=a" (l),"=d" (h));
+		asm __volatile__ (
+			"subl %2,%0\n\t"
+			"sbbl %3,%1"
+			:"=a" (l), "=d" (h)
+			:"g" (v->startl), "g" (v->starth),
+			"0" (l), "1" (h));
+		t = h * ((unsigned)0xffffffff / v->clks_msec) / 1000;
+		t += (l / v->clks_msec) / 1000;
+		i = t % 60;
+		dprint(LINE_INFO, COL_INF1-2, i%10, 1, 0);
+		dprint(LINE_INFO, COL_INF1-3, i/10, 1, 0);
+		t /= 60;
+		i = t % 60;
+		dprint(LINE_INFO, COL_INF1-5, i % 10, 1, 0);
+		dprint(LINE_INFO, COL_INF1-6, i / 10, 1, 0);
+		t /= 60;
+		dprint(LINE_INFO, COL_INF1-11, t, 4, 0);
+	}
+
+
+	/* Poll for ECC errors */
+/*
+	poll_errors();
+*/
+}
+
diff --git a/extra.c b/extra.c
new file mode 100644
index 0000000..ff580f2
--- /dev/null
+++ b/extra.c
@@ -0,0 +1,981 @@
+// This is the extra stuff added to the memtest+ from memtest.org
+// Code from Eric Nelson and Wee
+// (Checked without vendor-specific optimization before adding)
+/* extra.c -
+ *
+ * Released under version 2 of the Gnu Public License.
+ *
+ */
+
+#include "test.h"
+#include "screen_buffer.h"
+#include "pci.h"
+#include "extra.h"
+
+static int ctrl = -1;
+
+struct memory_controller {
+	unsigned vendor;
+	unsigned device;
+	int worked;
+	void (*change_timing)(int cas, int rcd, int rp, int ras);
+};
+
+static struct memory_controller mem_ctr[] = {
+
+	/* AMD 64*/
+	{ 0x1022, 0x1100,  1, change_timing_amd64}, //AMD64 hypertransport link
+
+	/* nVidia */
+	{ 0x10de, 0x01E0,  0, change_timing_nf2},  // nforce2
+
+	/* Intel */
+	{ 0x8086, 0x2570,  0, change_timing_i875}, //Intel i848/i865
+	{ 0x8086, 0x2578,  0, change_timing_i875}, //Intel i875P
+	{ 0x8086, 0x2580,  0, change_timing_i925}, //Intel i915P/G
+	{ 0x8086, 0x2584,  0, change_timing_i925}, //Intel i925X
+	{ 0x8086, 0x2770,  0, change_timing_i925}, //Intel Lakeport
+	{ 0x8086, 0x3580,  0, change_timing_i852}, //Intel i852GM - i855GM/GME (But not i855PM)
+};
+
+struct drc {
+	unsigned t_rwt;
+	unsigned t_wrt;
+	unsigned t_ref;
+	unsigned t_en2t;
+	unsigned t_rwqb;
+	unsigned t_rct;
+	unsigned t_rrd;
+	unsigned t_wr;
+};
+
+static struct drc a64;
+
+void find_memctr(void)  // Basically copy from the find_controller function
+{
+	unsigned long vendor;
+	unsigned long device;
+	unsigned long a64;
+	int i= 0;
+	int result;
+
+	result = pci_conf_read(0, 0, 0, PCI_VENDOR_ID, 2, &vendor);
+	result = pci_conf_read(0, 0, 0, PCI_DEVICE_ID, 2, &device);
+
+	pci_conf_read(0, 24, 0, 0x00, 4, &a64);
+
+	if( a64 == 0x11001022) 	{
+		ctrl = 0;
+		return;
+	}
+
+	if (result == 0) {
+		for(i = 1; i < sizeof(mem_ctr)/sizeof(mem_ctr[0]); i++) {
+			if ((mem_ctr[i].vendor == vendor) &&
+				(mem_ctr[i].device == device))
+			{
+				ctrl = i;
+				return;
+			}
+		}
+	}
+	ctrl = -1;
+}
+
+void a64_parameter(void)
+{
+
+	ulong dramtlr;
+
+	if ( 0 == pci_conf_read(0, 24, 2, 0x88, 4, &dramtlr) )
+	{
+		a64.t_rct = 7 + ((dramtlr>>4) & 0x0F);
+		a64.t_rrd = 0 + ((dramtlr>>16) & 0x7);
+		a64.t_wr  = 2 + ((dramtlr>>28) & 0x1);
+	}
+
+	if ( 0 == pci_conf_read(0, 24, 2, 0x8C, 4, &dramtlr) )
+	{
+		a64.t_rwt = 1 + ((dramtlr>>4) & 0x07);
+		a64.t_wrt = 1 +  (dramtlr      & 0x1);
+		a64.t_ref = 1 + ((dramtlr>>11) & 0x3);
+	}
+
+	if ( 0 == pci_conf_read(0, 24, 2, 0x90, 4, &dramtlr) )
+	{
+		a64.t_en2t = 1 + ((dramtlr>>28) & 0x1);
+		a64.t_rwqb = 2 << ((dramtlr>>14) & 0x3);
+	}
+}
+
+
+
+void change_timing(int cas, int rcd, int rp, int ras)
+{
+	find_memctr();
+	if ((ctrl == -1) || ( ctrl > sizeof(mem_ctr)/sizeof(mem_ctr[0])))
+	{
+		return;
+	}
+
+	mem_ctr[ctrl].change_timing(cas, rcd, rp, ras);
+	restart();
+}
+
+void amd64_option()
+{
+	int rwt=0, wrt=0, ref=0, en2t=0, rct=0, rrd=0, rwqb=0, wr = 0, flag=0;
+
+	if ((ctrl == -1) || ( ctrl > sizeof(mem_ctr)/sizeof(mem_ctr[0])))
+	{
+		return;
+	}
+
+	if (mem_ctr[ctrl].worked)
+	{
+		a64_parameter();
+		cprint(POP_Y+1, POP_X+4, "AMD64 options");
+
+		cprint(POP_Y+3, POP_X+4, "(1) Rd-Wr Delay   : ");
+		dprint(POP_Y+3, POP_X+24, a64.t_rwt, 2, 0);
+
+		cprint(POP_Y+4, POP_X+4, "(2) Wr-Rd Delay   : ");
+		dprint(POP_Y+4, POP_X+24, a64.t_wrt, 2, 0);
+
+		cprint(POP_Y+5, POP_X+4, "(3) Rd/Wr Bypass  : ");
+		dprint(POP_Y+5, POP_X+24, a64.t_rwqb, 2, 0);
+
+		cprint(POP_Y+6, POP_X+4, "(4) Refresh Rate  : ");
+		switch ( a64.t_ref)
+		{
+		case 1 : cprint(POP_Y+6, POP_X+23, "15.6us"); break;
+		case 2 : cprint(POP_Y+6, POP_X+23, " 7.8us"); break;
+		case 3 : cprint(POP_Y+6, POP_X+23, " 3.9us"); break;
+		}
+		cprint(POP_Y+7, POP_X+4,  "(5) Command Rate  :");
+		dprint(POP_Y+7, POP_X+24, a64.t_en2t, 2, 0);
+		cprint(POP_Y+7, POP_X+26, "T ");
+
+		cprint(POP_Y+8, POP_X+4,  "(6) Row Cycle Time: ");
+		dprint(POP_Y+8, POP_X+24, a64.t_rct, 2, 0);
+
+		cprint(POP_Y+9, POP_X+4, "(7) RAS-RAS Delay : ");
+		dprint(POP_Y+9, POP_X+24, a64.t_rrd, 2, 0);
+
+		cprint(POP_Y+10, POP_X+4, "(8) Write Recovery: ");
+		dprint(POP_Y+10, POP_X+24, a64.t_wr, 2, 0);
+
+		cprint(POP_Y+11, POP_X+4,"(0) Cancel   ");
+
+		while(!flag)
+		{
+			switch(get_key())
+			{
+			case 2:
+				popclear();
+				// read-to-write delay
+				cprint(POP_Y+3, POP_X+4, "Rd-Wr delay ");
+				cprint(POP_Y+4, POP_X+4, " (2 - 6 cycles)");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_rwt, 4, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				rwt = getval(POP_Y+7, POP_X+12, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 3:
+				popclear();
+				// read-to-write delay
+				cprint(POP_Y+3, POP_X+4, "Wr-Rd delay ");
+				cprint(POP_Y+4, POP_X+4, " (1 - 2 cycles)");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_wrt, 4, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				wrt = getval(POP_Y+7, POP_X+12, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 4:
+				popclear();
+				// Read write queue bypass count
+				cprint(POP_Y+3, POP_X+4, "Rd/Wr bypass ");
+				cprint(POP_Y+4, POP_X+4, " (2, 4 or 8 )");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_rwqb, 2, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				rwqb = getval(POP_Y+7, POP_X+11, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 5:
+				popclear();
+				// refresh rate
+				cprint(POP_Y+3, POP_X+4, "Refresh rate ");
+				cprint(POP_Y+4, POP_X+4, "Current: ");
+				switch ( a64.t_ref){
+				case 1 : cprint(POP_Y+4, POP_X+14, "15.6us"); break;
+				case 2 : cprint(POP_Y+4, POP_X+14, "7.8us "); break;
+				case 3 : cprint(POP_Y+4, POP_X+14, "3.9us "); break;
+				}
+				cprint(POP_Y+6, POP_X+4, "New: ");
+				cprint(POP_Y+7, POP_X+4, "(1) 15.6us");
+				cprint(POP_Y+8, POP_X+4, "(2) 7.8us ");
+				cprint(POP_Y+9, POP_X+4, "(3) 3.9us ");
+				ref = getval(POP_Y+6, POP_X+11, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 6:
+				popclear();
+				//Enable 2T command and addressing
+				cprint(POP_Y+3, POP_X+4, "Command rate:");
+				cprint(POP_Y+5, POP_X+4, "(1) 1T "); //only supoprted by CG revision and later
+				cprint(POP_Y+6, POP_X+4, "(2) 2T ");
+				en2t = getval(POP_Y+3, POP_X+22, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 7:
+				popclear();
+				//Row cycle time
+				cprint(POP_Y+3, POP_X+4, "Row cycle time: ");
+				cprint(POP_Y+4, POP_X+4, " (7 - 20 cycles)");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_rct, 4, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				rct = getval(POP_Y+7, POP_X+12, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 8:
+				popclear();
+				//Active-to-Active RAS Delay
+				cprint(POP_Y+3, POP_X+4, "RAS-RAS Delay: ");
+				cprint(POP_Y+4, POP_X+4, " (2 - 4 cycles)");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_rrd, 2, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				rrd = getval(POP_Y+7, POP_X+12, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 9:
+				popclear();
+				//Active-to-Active RAS Delay
+				cprint(POP_Y+3, POP_X+4, "Write Recovery: ");
+				cprint(POP_Y+4, POP_X+4, " (2 - 3 cycles)");
+				cprint(POP_Y+5, POP_X+4, "Current: ");
+				dprint(POP_Y+5, POP_X+14, a64.t_wr, 2, 0);
+				cprint(POP_Y+7, POP_X+4, "New: ");
+				wr = getval(POP_Y+7, POP_X+12, 0);
+				amd64_tweak(rwt, wrt, ref,en2t, rct, rrd, rwqb, wr);
+				break;
+
+			case 11:
+			case 57:
+				flag++;
+				/* 0/CR - Cancel */
+				break;
+			}
+		}
+	}
+}
+
+void get_option()
+{
+	int cas =0, rp=0, rcd=0, ras=0, sflag = 0 ;
+
+	while(!sflag)
+	{
+		switch(get_key())
+		{
+		case 2:
+			popclear();
+			cas = get_cas();
+			popclear();
+
+			cprint(POP_Y+3, POP_X+8, "tRCD: ");
+			rcd = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+
+			cprint(POP_Y+3, POP_X+8, "tRP: ");
+			rp = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+
+			cprint(POP_Y+3, POP_X+8, "tRAS: ");
+			ras = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+			change_timing(cas, rcd, rp, ras);
+			break;
+
+		case 3:
+			popclear();
+			cas = get_cas();
+			change_timing(cas, 0, 0, 0);
+			sflag++;
+			break;
+
+		case 4:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRCD: ");
+			rcd =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, rcd, 0, 0);
+			sflag++;
+			break;
+
+		case 5:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRP: ");
+			rp =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, 0, rp, 0);
+			sflag++;
+			break;
+
+		case 6:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRAS: ");
+			ras =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, 0, 0, ras);
+			sflag++;
+			break;
+
+		case 7:
+			popclear();
+			amd64_option();
+			sflag++;
+			popclear();
+			break;
+
+		case 8:
+			break;
+
+		case 11:
+		case 57:
+			sflag++;
+			/* 0/CR - Cancel */
+			break;
+		}
+	}
+}
+
+void get_option_1()
+{
+	int rp=0, rcd=0, ras=0, sflag = 0 ;
+
+	while(!sflag)
+	{
+		switch(get_key())
+		{
+		case 2:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRCD: ");
+			rcd = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+
+			cprint(POP_Y+3, POP_X+8, "tRP: ");
+			rp = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+
+			cprint(POP_Y+3, POP_X+8, "tRAS: ");
+			ras = getval(POP_Y+3, POP_X+15, 0);
+			popclear();
+			change_timing(0, rcd, rp, ras);
+			break;
+
+		case 3:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRCD: ");
+			rcd =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, rcd, 0, 0);
+			break;
+
+		case 4:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRP: ");
+			rp =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, 0, rp, 0);
+			break;
+
+		case 5:
+			popclear();
+			cprint(POP_Y+3, POP_X+8, "tRAS: ");
+			ras =getval(POP_Y+3, POP_X+15, 0);
+			change_timing(0, 0, 0, ras);
+			break;
+
+		case 6:
+			popclear();
+			amd64_option();
+			sflag++;
+			popclear();
+			break;
+
+		case 7:
+			break;
+
+		case 11:
+		case 57:
+			sflag++;
+			/* 0/CR - Cancel */
+			break;
+		}
+	}
+}
+
+
+void get_menu(void)
+{
+	int menu ;
+
+	find_memctr();
+
+	switch(ctrl)
+	{
+	case 0: menu = 2;	break;
+	case 1:
+	case 2:
+	case 3:
+	case 4:	menu = 0;	break;
+	case 5: menu = 1;	break;
+	case 6: menu = 0;	break;
+	default: menu = -1;	break;
+	}
+
+	if (menu == -1)
+	{
+		popclear();
+	}
+	else if (menu == 0)
+	{
+		cprint(POP_Y+1, POP_X+2, "Modify Timing:");
+		cprint(POP_Y+3, POP_X+5, "(1) Modify All   ");
+		cprint(POP_Y+4, POP_X+5, "(2) Modify tCAS  ");
+		cprint(POP_Y+5, POP_X+5, "(3) Modify tRCD  ");
+		cprint(POP_Y+6, POP_X+5, "(4) Modify tRP   ");
+		cprint(POP_Y+7, POP_X+5, "(5) Modify tRAS  ");
+		cprint(POP_Y+8, POP_X+5, "(0) Cancel");
+		wait_keyup();
+	 	get_option();
+	}
+	else if (menu == 1)
+	{
+		cprint(POP_Y+1, POP_X+2, "Modify Timing:");
+		cprint(POP_Y+3, POP_X+5, "(1) Modify All   ");
+		cprint(POP_Y+4, POP_X+5, "(2) Modify tRCD  ");
+		cprint(POP_Y+5, POP_X+5, "(3) Modify tRP   ");
+		cprint(POP_Y+6, POP_X+5, "(4) Modify tRAS  ");
+		cprint(POP_Y+7, POP_X+5, "(0) Cancel");
+		wait_keyup();
+	 	get_option();
+	}
+	else  // AMD64 special menu
+	{
+		cprint(POP_Y+1, POP_X+2, "Modify Timing:");
+		cprint(POP_Y+3, POP_X+5, "(1) Modify All   ");
+		cprint(POP_Y+4, POP_X+5, "(2) Modify tRCD  ");
+		cprint(POP_Y+5, POP_X+5, "(3) Modify tRP   ");
+		cprint(POP_Y+6, POP_X+5, "(4) Modify tRAS  ");
+		cprint(POP_Y+7, POP_X+5, "(5) AMD64 Options");
+		cprint(POP_Y+8, POP_X+5, "(0) Cancel");
+		wait_keyup();
+	 	get_option_1();
+	}
+}
+
+int get_cas(void)
+{
+	int i852=0, cas=0;
+	ulong drc, ddr;
+	long *ptr;
+
+	switch(ctrl)
+	{
+	case 0: ddr = 1; break;
+	case 1:
+	case 2:
+	case 3:	ddr = 1; break;
+	case 4:
+		pci_conf_read( 0, 0, 0, 0x44, 4, &ddr);
+		ddr &= 0xFFFFC000;
+		ptr=(long*)(ddr+0x120);
+		drc = *ptr;
+
+		if ((drc & 3) == 2) ddr = 2;
+		else ddr = 1;
+		break;
+	case 5: ddr = 2; break;
+	case 6: ddr = 1; i852 = 1; break;
+	default: ddr = 1;
+	}
+
+	if (ddr == 1)
+	{
+		cprint(POP_Y+3, POP_X+8, "tCAS:  ");
+		cprint(POP_Y+5, POP_X+8, "(1) CAS 2.5 ");
+		cprint(POP_Y+6, POP_X+8, "(2) CAS 2   ");
+		if(!i852) {
+			cprint(POP_Y+7, POP_X+8, "(3) CAS 3   ");
+		}
+		cas = getval(POP_Y+3, POP_X+15, 0);
+	}
+	else if (ddr == 2)
+	{
+		cprint(POP_Y+3, POP_X+8, "tCAS:  ");
+		cprint(POP_Y+5, POP_X+8, "(1) CAS 4 ");
+		cprint(POP_Y+6, POP_X+8, "(2) CAS 3 ");
+		cprint(POP_Y+7, POP_X+8, "(3) CAS 5 ");
+		cas = getval(POP_Y+3, POP_X+15, 0);
+	}
+	else
+	{
+		cas = -1;
+	}
+
+	popclear();
+	return (cas);
+}
+
+/////////////////////////////////////////////////////////
+// here we go for the exciting timing change part...   //
+/////////////////////////////////////////////////////////
+
+void change_timing_i852(int cas, int rcd, int rp, int ras) {
+
+	ulong dramtlr;
+	ulong int1, int2;
+
+	pci_conf_read(0, 0, 1, 0x60, 4, &dramtlr);
+
+	// CAS Latency (tCAS)
+	int1 = dramtlr & 0xFF9F;
+	if      (cas == 2) { int2 = int1 ^ 0x20; }
+	else if (cas == 1) { int2 = int1; }
+	else		   { int2 = dramtlr; }
+
+
+	// RAS-To-CAS (tRCD)
+	int1 = int2 & 0xFFF3;
+	if      (rcd == 2) { int2 = int1 ^ 0x8; }
+	else if (rcd == 3) { int2 = int1 ^ 0x4; }
+	else if (rcd == 4) { int2 = int1; }
+	// else		   { int2 = int2; }
+
+
+	// RAS Precharge (tRP)
+	int1 = int2 & 0xFFFC;
+	if      (rp == 2) { int2 = int1 ^ 0x2; }
+	else if (rp == 3) { int2 = int1 ^ 0x1; }
+	else if (rp == 4) { int2 = int1; }
+	// else		  { int2 = int2; }
+
+
+	// RAS Active to precharge (tRAS)
+	int1 = int2 & 0xF9FF;
+	if      (ras == 5)  { int2 = int1 ^ 0x0600; }
+	else if (ras == 6)  { int2 = int1 ^ 0x0400; }
+	else if (ras == 7)  { int2 = int1 ^ 0x0200; }
+	else if (ras == 8)  { int2 = int1; }
+	// else		    { int2 = int2; }
+
+	pci_conf_write(0, 0, 1, 0x60, 4, int2);
+	__delay(500);
+}
+
+void change_timing_i925(int cas, int rcd, int rp, int ras)
+{
+	ulong int1, dev0, temp;
+	long *ptr;
+
+	//read MMRBAR
+	pci_conf_read( 0, 0, 0, 0x44, 4, &dev0);
+	dev0 &= 0xFFFFC000;
+
+	ptr=(long*)(dev0+0x114);
+	temp = *ptr;
+
+	// RAS-To-CAS (tRCD)
+	int1 = temp | 0x70;
+	if      (rcd == 2) { temp = int1 ^ 0x70; }
+	else if (rcd == 3) { temp = int1 ^ 0x60; }
+	else if (rcd == 4) { temp = int1 ^ 0x50; }
+	else if (rcd == 5) { temp = int1 ^ 0x40; }
+	// else		   { temp = temp;}
+
+	//RAS precharge (tRP)
+	int1 = temp | 0x7;
+	if      (rp == 2) { temp = int1 ^ 0x7; }
+	else if (rp == 3) { temp = int1 ^ 0x6; }
+	else if (rp == 4) { temp = int1 ^ 0x5; }
+	else if (rp == 5) { temp = int1 ^ 0x4; }
+	// else		  { temp = temp;}
+
+	if (mem_ctr[ctrl].device == 0x2770 )	// Lakeport?
+	{
+		// RAS Active to precharge (tRAS)
+		int1 = temp | 0xF80000;	// bits 23:19
+		if      (ras == 4)  { temp = int1 ^ 0xD80000; }
+		else if (ras == 5)  { temp = int1 ^ 0xD00000; }
+		else if (ras == 6)  { temp = int1 ^ 0xC80000; }
+		else if (ras == 7)  { temp = int1 ^ 0xC00000; }
+		else if (ras == 8)  { temp = int1 ^ 0xB80000; }
+		else if (ras == 9)  { temp = int1 ^ 0xB00000; }
+		else if (ras == 10) { temp = int1 ^ 0xA80000; }
+		else if (ras == 11) { temp = int1 ^ 0xA00000; }
+		else if (ras == 12) { temp = int1 ^ 0x980000; }
+		else if (ras == 13) { temp = int1 ^ 0x900000; }
+		else if (ras == 14) { temp = int1 ^ 0x880000; }
+		else if (ras == 15) { temp = int1 ^ 0x800000; }
+		// else		    { temp = temp;}
+	}
+	else
+	{
+		// RAS Active to precharge (tRAS)
+		int1 = temp | 0xF00000;	// bits 23:20
+		if      (ras == 4)  { temp = int1 ^ 0xB00000; }
+		else if (ras == 5)  { temp = int1 ^ 0xA00000; }
+		else if (ras == 6)  { temp = int1 ^ 0x900000; }
+		else if (ras == 7)  { temp = int1 ^ 0x800000; }
+		else if (ras == 8)  { temp = int1 ^ 0x700000; }
+		else if (ras == 9)  { temp = int1 ^ 0x600000; }
+		else if (ras == 10) { temp = int1 ^ 0x500000; }
+		else if (ras == 11) { temp = int1 ^ 0x400000; }
+		else if (ras == 12) { temp = int1 ^ 0x300000; }
+		else if (ras == 13) { temp = int1 ^ 0x200000; }
+		else if (ras == 14) { temp = int1 ^ 0x100000; }
+		else if (ras == 15) { temp = int1 ^ 0x000000; }
+		// else		    { temp = temp;}
+	}
+
+	// CAS Latency (tCAS)
+	int1 = temp | 0x0300;
+	if      (cas == 1) { temp = int1 ^ 0x200; }   // cas 2.5
+	else if (cas == 2) { temp = int1 ^ 0x100; }
+	else if (cas == 3) { temp = int1 ^ 0x300; }
+	// else		   { temp = temp;}
+
+	*ptr = temp;
+	__delay(500);
+	return;
+}
+
+void change_timing_Lakeport(int cas, int rcd, int rp, int ras)
+{
+	ulong int1, dev0, temp;
+	long *ptr;
+
+	//read MMRBAR
+	pci_conf_read( 0, 0, 0, 0x44, 4, &dev0);
+	dev0 &= 0xFFFFC000;
+
+	ptr=(long*)(dev0+0x114);
+	temp = *ptr;
+
+	// RAS-To-CAS (tRCD)
+	int1 = temp | 0x70;
+	if      (rcd == 2) { temp = int1 ^ 0x70; }
+	else if (rcd == 3) { temp = int1 ^ 0x60; }
+	else if (rcd == 4) { temp = int1 ^ 0x50; }
+	else if (rcd == 5) { temp = int1 ^ 0x40; }
+	// else		   { temp = temp;}
+
+	//RAS precharge (tRP)
+	int1 = temp | 0x7;
+	if      (rp == 2) { temp = int1 ^ 0x7; }
+	else if (rp == 3) { temp = int1 ^ 0x6; }
+	else if (rp == 4) { temp = int1 ^ 0x5; }
+	else if (rp == 5) { temp = int1 ^ 0x4; }
+	// else		  { temp = temp;}
+
+
+	// CAS Latency (tCAS)
+	int1 = temp | 0x0300;
+	if      (cas == 1) { temp = int1 ^ 0x200; }   // cas 2.5
+	else if (cas == 2) { temp = int1 ^ 0x100; }
+	else if (cas == 3) { temp = int1 ^ 0x300; }
+	// else		   { temp = temp;}
+
+	*ptr = temp;
+	__delay(500);
+	return;
+}
+
+void change_timing_i875(int cas, int rcd, int rp, int ras){
+
+	ulong int1, dev6, temp;
+	long *ptr;
+
+	/* Read the MMR Base Address & Define the pointer from the BAR6 overflow register */
+	pci_conf_read( 0, 6, 0, 0x10, 4, &dev6);
+
+	ptr=(long*)(dev6+0x60);
+
+	temp = *ptr;
+
+	// RAS-To-CAS (tRCD)
+	int1 = temp | 0xC;
+	if      (rcd == 2) { temp = int1 ^ 0x4; }
+	else if (rcd == 3) { temp = int1 ^ 0x8; }
+	else if (rcd == 4) { temp = int1 ^ 0xC; }
+	else if (rcd == 5) { temp = int1 ^ 0xC; }
+	// else		   { temp = temp;}
+
+
+	//RAS precharge (tRP)
+	int1 = temp | 0x3;
+	if      (rp == 2) { temp = int1 ^ 0x1; }
+	else if (rp == 3) { temp = int1 ^ 0x2; }
+	else if (rp == 4) { temp = int1 ^ 0x3; }
+	else if (rp == 5) { temp = int1 ^ 0x3; }
+	// else		  { temp = temp;}
+
+
+	// RAS Active to precharge (tRAS)
+	int1 = temp | 0x380;
+	if      (ras == 5)  { temp = int1 ^ 0x100; }
+	else if (ras == 6)  { temp = int1 ^ 0x180; }
+	else if (ras == 7)  { temp = int1 ^ 0x200; }
+	else if (ras == 8)  { temp = int1 ^ 0x280; }
+	else if (ras == 9)  { temp = int1 ^ 0x300; }
+	else if (ras == 10) { temp = int1 ^ 0x380; }
+	// else		    { temp = temp;}
+
+	// CAS Latency (tCAS)
+	int1 = temp | 0x60;
+	if      (cas == 1) { temp = int1 ^ 0x60; }   // cas 2.5
+	else if (cas == 2) { temp = int1 ^ 0x40; }
+	else if (cas == 3) { temp = int1 ^ 0x20; }
+	// else		   { temp = temp; }
+
+	*ptr = temp;
+	__delay(500);
+	return;
+}
+
+
+void change_timing_nf2(int cas, int rcd, int rp, int ras) {
+
+	ulong dramtlr, dramtlr2;
+	ulong int1, int2;
+
+	pci_conf_read(0, 0, 1, 0x90, 4, &dramtlr);
+	pci_conf_read(0, 0, 1, 0xA0, 4, &dramtlr2);
+
+
+	// CAS Latency (tCAS)
+	int1 = dramtlr2 | 0x0070;
+	if      (cas == 1) { int2 = int1 ^ 0x10; }  // cas = 2.5
+	else if (cas == 2) { int2 = int1 ^ 0x50; }
+	else if (cas == 3) { int2 = int1 ^ 0x40; }
+	else		   { int2 = dramtlr2; }
+
+	pci_conf_write(0, 0, 1, 0xA0, 4, int2);
+
+	// RAS-To-CAS (tRCD)
+
+	int1 = dramtlr | 0x700000;
+	if      (rcd == 2) { int2 = int1 ^ 0x500000; }
+	else if (rcd == 3) { int2 = int1 ^ 0x400000; }
+	else if (rcd == 4) { int2 = int1 ^ 0x300000; }
+	else if (rcd == 5) { int2 = int1 ^ 0x200000; }
+	else if (rcd == 6) { int2 = int1 ^ 0x100000; }
+	else		   { int2 = dramtlr;}
+
+
+	// RAS Precharge (tRP)
+	int1 = int2 | 0x70000000;
+	if      (rp == 2) { int2 = int1 ^ 0x50000000; }
+	else if (rp == 3) { int2 = int1 ^ 0x40000000; }
+	else if (rp == 4) { int2 = int1 ^ 0x30000000; }
+	else if (rp == 5) { int2 = int1 ^ 0x20000000; }
+	else if (rp == 6) { int2 = int1 ^ 0x10000000; }
+	// else		  { int2 = int2;}
+
+
+	// RAS Active to precharge (tRAS)
+
+	int1 = int2 | 0x78000;
+	if      (ras == 4)  { int2 = int1 ^ 0x58000; }
+	else if (ras == 5)  { int2 = int1 ^ 0x50000; }
+	else if (ras == 6)  { int2 = int1 ^ 0x48000; }
+	else if (ras == 7)  { int2 = int1 ^ 0x40000; }
+	else if (ras == 8)  { int2 = int1 ^ 0x38000; }
+	else if (ras == 9)  { int2 = int1 ^ 0x30000; }
+	else if (ras == 10) { int2 = int1 ^ 0x28000; }
+	else if (ras == 11) { int2 = int1 ^ 0x20000; }
+	else if (ras == 12) { int2 = int1 ^ 0x18000; }
+	else if (ras == 13) { int2 = int1 ^ 0x10000; }
+	else if (ras == 14) { int2 = int1 ^ 0x08000; }
+	// else		    { int2 = int2;}
+
+
+	pci_conf_write(0, 0, 1, 0x90, 4, int2);
+	__delay(500);
+}
+
+
+void change_timing_amd64(int cas, int rcd, int rp, int ras) {
+
+	ulong dramtlr;
+	ulong int1= 0x0;
+
+	pci_conf_read(0, 24, 2, 0x88, 4, &dramtlr);
+
+	// RAS-To-CAS (tRCD)
+	int1 = dramtlr | 0x7000;
+	if      (rcd == 2) { dramtlr = int1 ^ 0x5000; }
+	else if (rcd == 3) { dramtlr = int1 ^ 0x4000; }
+	else if (rcd == 4) { dramtlr = int1 ^ 0x3000; }
+	else if (rcd == 5) { dramtlr = int1 ^ 0x2000; }
+	else if (rcd == 6) { dramtlr = int1 ^ 0x1000; }
+	else if (rcd == 1) { dramtlr = int1 ^ 0x6000; }
+	// else		   { dramtlr = dramtlr;}
+
+
+	//RAS precharge (tRP)
+	int1 = dramtlr | 0x7000000;
+	if      (rp == 2) { dramtlr = int1 ^ 0x5000000; }
+	else if (rp == 3) { dramtlr = int1 ^ 0x4000000; }
+	else if (rp == 1) { dramtlr = int1 ^ 0x6000000; }
+	else if (rp == 4) { dramtlr = int1 ^ 0x3000000; }
+	else if (rp == 5) { dramtlr = int1 ^ 0x2000000; }
+	else if (rp == 6) { dramtlr = int1 ^ 0x1000000; }
+	// else		  { dramtlr = dramtlr;}
+
+
+	// RAS Active to precharge (tRAS)
+	int1 = dramtlr | 0xF00000;
+	if      (ras == 5)  { dramtlr = int1 ^ 0xA00000; }
+	else if (ras == 6)  { dramtlr = int1 ^ 0x900000; }
+	else if (ras == 7)  { dramtlr = int1 ^ 0x800000; }
+	else if (ras == 8)  { dramtlr = int1 ^ 0x700000; }
+	else if (ras == 9)  { dramtlr = int1 ^ 0x600000; }
+	else if (ras == 10) { dramtlr = int1 ^ 0x500000; }
+	else if (ras == 11) { dramtlr = int1 ^ 0x400000; }
+	else if (ras == 12) { dramtlr = int1 ^ 0x300000; }
+	else if (ras == 13) { dramtlr = int1 ^ 0x200000; }
+	else if (ras == 14) { dramtlr = int1 ^ 0x100000; }
+	// else		    { dramtlr = dramtlr;}
+
+
+	// CAS Latency (tCAS)
+	int1 = dramtlr | 0x7;	// some changes will cause the system hang, tried Draminit to no avail
+	if      (cas == 1) { dramtlr = int1 ^ 0x2; }   // cas 2.5
+	else if (cas == 2) { dramtlr = int1 ^ 0x6; }
+	else if (cas == 3) { dramtlr = int1 ^ 0x5; }
+	else if (cas == 4) { dramtlr = int1 ^ 0x7; } //cas 1.5 on a64
+	// else		   { dramtlr = dramtlr; }
+
+//	pci_conf_read(0, 24, 2, 0x90, 4, &dramcr);// use dram init
+	pci_conf_write(0, 24, 2, 0x88, 4, dramtlr);
+	__delay(500);
+
+////////////////////////////////////////////////////////////////
+// trying using the draminit, but do not work
+}
+
+// copy from lib.c code to add delay to chipset timing modification
+void __delay(ulong loops)
+{
+	int d0;
+	__asm__ __volatile__(
+		"\tjmp 1f\n"
+		".align 16\n"
+		"1:\tjmp 2f\n"
+		".align 16\n"
+		"2:\tdecl %0\n\tjns 2b"
+		:"=&a" (d0)
+		:"0" (loops));
+}
+
+void amd64_tweak(int rwt, int wrt, int ref, int en2t, int rct, int rrd, int rwqb, int wr)
+{
+	ulong dramtlr;
+	ulong int1= 0x0;
+
+	pci_conf_read(0, 24, 2, 0x88, 4, &dramtlr);
+
+	// Row Cycle time
+	int1 = dramtlr | 0xF0;
+	if      (rct == 7 ) { dramtlr = int1 ^ 0xF0; }
+	else if (rct == 8 ) { dramtlr = int1 ^ 0xE0; }
+	else if (rct == 9 ) { dramtlr = int1 ^ 0xD0; }
+	else if (rct == 10) { dramtlr = int1 ^ 0xC0; }
+	else if (rct == 11) { dramtlr = int1 ^ 0xB0; }
+	else if (rct == 12) { dramtlr = int1 ^ 0xA0; }
+	else if (rct == 13) { dramtlr = int1 ^ 0x90; }
+	else if (rct == 14) { dramtlr = int1 ^ 0x80; }
+	else if (rct == 15) { dramtlr = int1 ^ 0x70; }
+	else if (rct == 16) { dramtlr = int1 ^ 0x60; }
+	else if (rct == 17) { dramtlr = int1 ^ 0x50; }
+	else if (rct == 18) { dramtlr = int1 ^ 0x40; }
+	else if (rct == 19) { dramtlr = int1 ^ 0x30; }
+	else if (rct == 20) { dramtlr = int1 ^ 0x20; }
+	// else		    { dramtlr = dramtlr;}
+
+	//Active-avtive ras-ras delay
+	int1 = dramtlr | 0x70000;
+	if      (rrd == 2) { dramtlr = int1 ^ 0x50000; } // 2 bus clocks
+	else if (rrd == 3) { dramtlr = int1 ^ 0x40000; } // 3 bus clocks
+	else if (rrd == 4) { dramtlr = int1 ^ 0x30000; } // 4 bus clocks
+	// else		   { dramtlr = dramtlr;}
+
+	//Write recovery time
+	int1 = dramtlr | 0x10000000;
+	if      (wr == 2) { dramtlr = int1 ^ 0x10000000; } // 2 bus clocks
+	else if (wr == 3) { dramtlr = int1 ^ 0x00000000; } // 3 bus clocks
+	// else		  { dramtlr = dramtlr;}
+
+	pci_conf_write(0, 24, 2, 0x88, 4, dramtlr);
+	__delay(500);
+	//////////////////////////////////////////////
+
+	pci_conf_read(0, 24, 2, 0x8C, 4, &dramtlr);
+
+	// Write-to read delay
+	int1 = dramtlr | 0x1;
+	if      (wrt == 2) { dramtlr = int1 ^ 0x0; }
+	else if (wrt == 1) { dramtlr = int1 ^ 0x1; }
+	// else		   { dramtlr = dramtlr;}
+
+	// Read-to Write delay
+	int1 = dramtlr | 0x70;
+	if      (rwt == 1) { dramtlr = int1 ^ 0x70; }
+	else if (rwt == 2) { dramtlr = int1 ^ 0x60; }
+	else if (rwt == 3) { dramtlr = int1 ^ 0x50; }
+	else if (rwt == 4) { dramtlr = int1 ^ 0x40; }
+	else if (rwt == 5) { dramtlr = int1 ^ 0x30; }
+	else if (rwt == 6) { dramtlr = int1 ^ 0x20; }
+	// else		   { dramtlr = dramtlr;}
+
+	//Refresh Rate
+	int1 = dramtlr | 0x1800;
+	if      (ref == 1) { dramtlr = int1 ^ 0x1800; } // 15.6us
+	else if (ref == 2) { dramtlr = int1 ^ 0x1000; } // 7.8us
+	else if (ref == 3) { dramtlr = int1 ^ 0x0800; } // 3.9us
+	// else		   { dramtlr = dramtlr;}
+
+	pci_conf_write(0, 24, 2, 0x8c, 4, dramtlr);
+	__delay(500);
+	/////////////////////////////////////
+
+	pci_conf_read(0, 24, 2, 0x90, 4, &dramtlr);
+
+	// Enable 2t command
+	int1 = dramtlr | 0x10000000;
+	if      (en2t == 2) { dramtlr = int1 ^ 0x00000000; } // 2T
+	else if (en2t == 1) { dramtlr = int1 ^ 0x10000000; } // 1T
+	// else		    { dramtlr = dramtlr;}
+
+	// Read Write queue bypass count
+	int1 = dramtlr | 0xC000;
+	if      (rwqb == 2)  { dramtlr = int1 ^ 0xC000; }
+	else if (rwqb == 4)  { dramtlr = int1 ^ 0x8000; }
+	else if (rwqb == 8)  { dramtlr = int1 ^ 0x4000; }
+	else if (rwqb == 16) { dramtlr = int1 ^ 0x0000; }
+	// else		     { dramtlr = dramtlr;}
+
+	pci_conf_write(0, 24, 2, 0x90, 4, dramtlr);
+	__delay(500);
+	restart();
+}
+
diff --git a/extra.h b/extra.h
new file mode 100644
index 0000000..9bd7045
--- /dev/null
+++ b/extra.h
@@ -0,0 +1,27 @@
+// This is the extra stuff added to the memtest+ from memtest.org
+// Code from Eric Nelson and Wee
+/* extra.c
+ *
+ * Released under version 2 of the Gnu Public License.
+ *
+ */
+
+#ifndef MEMTEST_EXTRA_H
+#define MEMTEST_EXTRA_H
+
+void change_timing(int cas, int rcd, int rp, int ras);
+void find_memctr(void);
+void disclaimer(void); 
+void get_option(void);
+void get_menu(void);
+void a64_parameter(void);
+int get_cas(void);
+void change_timing_i852(int cas, int rcd, int rp, int ras);
+void change_timing_i925(int cas, int rcd, int rp, int ras);
+void change_timing_i875(int cas, int rcd, int rp, int ras);
+void change_timing_nf2(int cas, int rcd, int rp, int ras);
+void change_timing_amd64(int cas, int rcd, int rp, int ras);
+void amd64_tweak(int rwt, int wrt, int ref, int en2t, int rct, int rrd, int rwqb, int wr);
+void __delay(ulong loops);
+
+#endif /* MEMTEST_EXTRA_H */
diff --git a/head.S b/head.S
new file mode 100644
index 0000000..5cc7a06
--- /dev/null
+++ b/head.S
@@ -0,0 +1,819 @@
+/*
+ *  linux/boot/head.S
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  head.S contains the 32-bit startup code.
+ *
+ *  1-Jan-96 Modified by Chris Brady for use as a boot/loader for MemTest-86.
+ *  Setup the memory management for flat non-paged linear addressing.
+ *  17 May 2004 : Added X86_PWRCAP for AMD64 (Memtest86+ - Samuel D.)
+ */
+
+.text
+#define __ASSEMBLY__
+#include "defs.h"
+#include "config.h"
+#include "test.h"
+
+	.code32
+	.globl startup_32
+startup_32:
+	cld
+	cli
+
+	/* Ensure I have a boot_stack pointer */
+	testl	%esp, %esp
+	jnz 0f
+	movl	$(LOW_TEST_ADR + _GLOBAL_OFFSET_TABLE_), %esp
+	leal	boot_stack_top@GOTOFF(%esp), %esp
+0:
+
+	/* Load the GOT pointer */
+	call	0f
+0:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-0b], %ebx
+
+	/* Pick the appropriate boot_stack address */
+	leal	boot_stack_top@GOTOFF(%ebx), %esp
+
+	/* Reload all of the segment registers */
+	leal	gdt@GOTOFF(%ebx), %eax
+	movl	%eax, 2 + gdt_descr@GOTOFF(%ebx)
+	lgdt	gdt_descr@GOTOFF(%ebx)
+	leal	flush@GOTOFF(%ebx), %eax
+	pushl	$KERNEL_CS
+	pushl	%eax
+	lret
+flush:	movl	$KERNEL_DS, %eax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %gs
+	movw	%ax, %ss
+
+/*
+ *  Zero BSS
+ */
+	cmpl	$1, zerobss@GOTOFF(%ebx)
+	jnz	zerobss_done
+	xorl	%eax, %eax
+	leal	_bss@GOTOFF(%ebx), %edi
+	leal	_end@GOTOFF(%ebx), %ecx
+	subl	%edi, %ecx
+1:	movl	%eax, (%edi)
+	addl	$4, %edi
+	subl	$4, %ecx
+	jnz	1b
+	movl	$0, zerobss@GOTOFF(%ebx)
+zerobss_done:
+
+/*
+ * Setup an exception handler
+ */
+	leal	idt@GOTOFF(%ebx), %edi
+
+	leal	vec0@GOTOFF(%ebx), %edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx, %ax	/* selector = 0x0010 = cs */
+	movw	$0x8E00, %dx	/* interrupt gate - dpl=0, present */
+	movl	%eax, (%edi)
+	movl	%edx, 4(%edi)
+	addl	$8, %edi
+
+	leal	vec1@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec2@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec3@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec4@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec5@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec6@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec7@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec8@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec9@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec10@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec11@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec12@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec13@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec14@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec15@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec16@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec17@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec18@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	leal	vec19@GOTOFF(%ebx),%edx
+	movl	$(KERNEL_CS << 16),%eax
+	movw	%dx,%ax		   /* selector = 0x0010 = cs */
+	movw	$0x8E00,%dx	   /* interrupt gate - dpl=0, present */
+	movl	%eax,(%edi)
+	movl	%edx,4(%edi)
+	addl	$8,%edi
+
+	/* Now that it is initialized load the interrupt descriptor table */
+	leal	idt@GOTOFF(%ebx), %eax
+	movl	%eax, 2 + idt_descr@GOTOFF(%ebx)
+	lidt	idt_descr@GOTOFF(%ebx)
+
+	leal	_dl_start@GOTOFF(%ebx), %eax
+	call	*%eax
+
+	call	test_start
+	/* In case we return simulate an exception */
+	pushfl
+	pushl	%cs
+	call	0f
+0:	pushl	$0 /* error code */
+	pushl	$257 /* vector */
+	jmp	int_hand
+
+vec0:
+	pushl	$0 /* error code */
+	pushl	$0 /* vector */
+	jmp int_hand
+vec1:
+	pushl	$0 /* error code */
+	pushl	$1 /* vector */
+	jmp int_hand
+
+vec2:
+	pushl	$0 /* error code */
+	pushl	$2 /* vector */
+	jmp int_hand
+
+vec3:
+	pushl	$0 /* error code */
+	pushl	$3 /* vector */
+	jmp	int_hand
+
+vec4:
+	pushl	$0 /* error code */
+	pushl	$4 /* vector */
+	jmp	int_hand
+
+vec5:
+	pushl	$0 /* error code */
+	pushl	$5 /* vector */
+	jmp	int_hand
+
+vec6:
+	pushl	$0 /* error code */
+	pushl	$6 /* vector */
+	jmp	int_hand
+
+vec7:
+	pushl	$0 /* error code */
+	pushl	$7 /* vector */
+	jmp	int_hand
+
+vec8:
+	/* error code */
+	pushl	$8 /* vector */
+	jmp	int_hand
+
+vec9:
+	pushl	$0 /* error code */
+	pushl	$9 /* vector */
+	jmp int_hand
+
+vec10:
+	/* error code */
+	pushl	$10 /* vector */
+	jmp	int_hand
+
+vec11:
+	/* error code */
+	pushl	$11 /* vector */
+	jmp	int_hand
+
+vec12:
+	/* error code */
+	pushl	$12 /* vector */
+	jmp	int_hand
+
+vec13:
+	/* error code */
+	pushl	$13 /* vector */
+	jmp	int_hand
+
+vec14:
+	/* error code */
+	pushl	$14 /* vector */
+	jmp	int_hand
+
+vec15:
+	pushl	$0 /* error code */
+	pushl	$15 /* vector */
+	jmp	int_hand
+
+vec16:
+	pushl	$0 /* error code */
+	pushl	$16 /* vector */
+	jmp	int_hand
+
+vec17:
+	/* error code */
+	pushl	$17 /* vector */
+	jmp	int_hand
+
+vec18:
+	pushl	$0 /* error code */
+	pushl	$18 /* vector */
+	jmp	int_hand
+
+vec19:
+	pushl	$0 /* error code */
+	pushl	$19 /* vector */
+	jmp	int_hand
+
+int_hand:
+	pushl	%eax
+	pushl	%ebx
+	pushl	%ecx
+	pushl	%edx
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebp
+
+	/* original boot_stack pointer */
+	leal	20(%esp), %eax
+	pushl	%eax
+
+	pushl	%esp /* pointer to structure on the boot_stack */
+	pushl	%ds  
+	pushl	%ss 
+	call	inter
+	addl	$8, %esp
+
+	popl	%ebp
+	popl	%esi
+	popl	%edi
+	popl	%edx
+	popl	%ecx
+	popl	%ebx
+	popl	%eax
+	iret
+
+/*
+ * The interrupt descriptor table has room for 32 idt's
+ */
+.align 4
+.word 0
+idt_descr:
+	.word 20*8-1	       # idt contains 32 entries
+	.long 0
+
+idt:
+	.fill 20,8,0	       # idt is uninitialized
+
+gdt_descr:
+	.word gdt_end - gdt - 1
+	.long 0
+
+.align 4
+.globl gdt, gdt_end
+gdt:
+	.quad 0x0000000000000000	/* NULL descriptor */
+	.quad 0x0000000000000000	/* not used */
+	.quad 0x00cf9a000000ffff	/* 0x10 main 4gb code at 0x000000 */
+	.quad 0x00cf92000000ffff	/* 0x18 main 4gb data at 0x000000 */
+
+	.word	0xFFFF				# 16bit 64KB - (0x10000*1 = 64KB)
+	.word	0				# base address = SETUPSEG
+	.byte	0x00, 0x9b			# code read/exec/accessed
+	.byte	0x00, 0x00			# granularity = bytes
+
+
+	.word	0xFFFF				# 16bit 64KB - (0x10000*1 = 64KB)
+	.word	0				# base address = SETUPSEG
+	.byte	0x00, 0x93			# data read/write/accessed
+	.byte	0x00, 0x00			# granularity = bytes
+
+gdt_end:
+
+.data
+
+.macro ptes64 start, count=64
+.quad \start + 0x0000000 + 0xE3
+.quad \start + 0x0200000 + 0xE3
+.quad \start + 0x0400000 + 0xE3
+.quad \start + 0x0600000 + 0xE3
+.quad \start + 0x0800000 + 0xE3
+.quad \start + 0x0A00000 + 0xE3
+.quad \start + 0x0C00000 + 0xE3
+.quad \start + 0x0E00000 + 0xE3
+.if \count-1
+ptes64 "(\start+0x01000000)",\count-1
+.endif
+.endm
+
+.macro maxdepth depth=1
+.if \depth-1
+maxdepth \depth-1
+.endif
+.endm
+
+maxdepth
+
+.balign 4096
+.globl pd0
+pd0:
+	ptes64 0x0000000000000000
+
+.balign 4096
+.globl pd1
+pd1:
+	ptes64 0x0000000040000000
+
+.balign 4096
+.globl pd2
+pd2:
+	ptes64 0x0000000080000000
+
+.balign 4096
+.globl pd3
+pd3:
+	ptes64 0x00000000C0000000
+
+.balign 4096
+.globl pdp
+pdp:
+	.long pd0 + 1
+	.long 0
+	.long pd1 + 1
+	.long 0
+
+	.long pd2 + 1
+	.long 0
+
+	.long pd3 + 1
+	.long 0
+.previous
+
+#define RSTART startup_32
+
+	.globl query_pcbios
+query_pcbios:
+	/* Save the caller save registers */
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebp
+	call	1f
+1:	popl	%ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+
+	/* Compute the reloc address */
+	leal	RSTART@GOTOFF(%ebx), %esi
+
+	/* Fixup real code pointer */
+	movl	%esi, %eax
+	shrl	$4, %eax
+	movw	%ax, 2 + realptr@GOTOFF(%ebx)
+
+	/* Fixup protected code pointer */
+	leal	prot@GOTOFF(%ebx), %eax
+	movl	%eax, protptr@GOTOFF(%ebx)
+
+	/* Compute the gdt fixup */
+	movl	%esi, %eax
+	shll	$16, %eax	# Base low
+
+	movl	%esi, %ecx
+	shrl	$16, %ecx
+	andl	$0xff, %ecx
+
+	movl	%esi, %edx
+	andl	$0xff000000, %edx
+	orl	%edx, %ecx
+
+	/* Fixup the gdt */
+	andl	$0x0000ffff, REAL_CS + 0 + gdt@GOTOFF(%ebx)
+	orl	%eax,        REAL_CS + 0 + gdt@GOTOFF(%ebx)
+	andl	$0x00ffff00, REAL_CS + 4 + gdt@GOTOFF(%ebx)
+	orl	%ecx,        REAL_CS + 4 + gdt@GOTOFF(%ebx)
+	andl	$0x0000ffff, REAL_DS + 0 + gdt@GOTOFF(%ebx)
+	orl	%eax,        REAL_DS + 0 + gdt@GOTOFF(%ebx)
+	andl	$0x00ffff00, REAL_DS + 4 + gdt@GOTOFF(%ebx)
+	orl	%ecx,        REAL_DS + 4 + gdt@GOTOFF(%ebx)
+
+	/* Fixup the gdt_descr */
+	leal	gdt@GOTOFF(%ebx), %eax
+	movl	%eax, 2 + gdt_descr@GOTOFF(%ebx)
+
+	lidt	idt_real@GOTOFF(%ebx)
+
+	/* Don't disable the a20 line */
+
+	/* Load 16bit data segments, to ensure the segment limits are set */
+	movl	$REAL_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %ss
+	movl	%eax, %fs
+	movl	%eax, %gs
+
+	/* Compute the boot_stack base */
+	leal	boot_stack@GOTOFF(%ebx), %ecx
+	/* Compute the address of meminfo */
+	leal	mem_info@GOTOFF(%ebx), %edi
+
+	/* switch to 16bit mode */
+	ljmp	$REAL_CS, $1f - RSTART
+1:
+	.code16
+	/* Disable Paging and protected mode */
+	/* clear the PG & PE bits of CR0 */
+	movl	%cr0,%eax
+	andl	$~((1 << 31)|(1<<0)),%eax
+	movl	%eax,%cr0
+
+	/* make intersegment jmp to flush the processor pipeline
+	 * and reload %cs:%eip (to clear upper 16 bits of %eip).
+	 */
+	ljmp	*(realptr - RSTART)
+real:
+	/* we are in real mode now
+	 * set up the real mode segment registers : %ds, %ss, %es, %gs, %fs
+	 */
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %gs
+	movw	%ax, %ss
+
+	/* Adjust the boot_stack pointer */
+	movl	%ecx, %eax
+	shrl	$4, %eax
+	movw	%ax, %ss
+	subl	%ecx, %esp
+
+	/* Save my base pointer */
+	pushl	%ebx
+
+	/* Setup %ds to point to my data area */
+	shrl	$4, %edi
+	movl	%edi, %ds
+
+	/* Enable interrupts or BIOS's go crazy */
+	sti
+
+# Get memory size (extended mem, kB)
+
+#define SMAP	0x534d4150
+
+	xorl	%eax, %eax
+	movl	%eax, (E88)
+	movl	%eax, (E801)
+	movl	%eax, (E820NR)
+
+# Try three different memory detection schemes.  First, try
+# e820h, which lets us assemble a memory map, then try e801h,
+# which returns a 32-bit memory size, and finally 88h, which
+# returns 0-64m
+
+# method E820H:
+# the memory map from hell.  e820h returns memory classified into
+# a whole bunch of different types, and allows memory holes and
+# everything.  We scan through this memory map and build a list
+# of the first 32 memory areas, which we return at [E820MAP].
+# This is documented at http://www.teleport.com/~acpi/acpihtml/topic245.htm
+
+meme820:
+	xorl	%ebx, %ebx			# continuation counter
+	movw	$E820MAP, %di			# point into the whitelist
+						# so we can have the bios
+						# directly write into it.
+
+jmpe820:
+	movl	$0x0000e820, %eax		# e820, upper word zeroed
+	movl	$SMAP, %edx			# ascii 'SMAP'
+	movl	$20, %ecx			# size of the e820rec
+	pushw	%ds				# data record.
+	popw	%es
+	int	$0x15				# make the call
+	jc	bail820				# fall to e801 if it fails
+
+	cmpl	$SMAP, %eax			# check the return is `SMAP'
+	jne	bail820				# fall to e801 if it fails
+
+#	cmpl	$1, 16(%di)			# is this usable memory?
+#	jne	again820
+
+	# If this is usable memory, we save it by simply advancing %di by
+	# sizeof(e820rec).
+	#
+good820:
+	movb	(E820NR), %al			# up to 32 entries
+	cmpb	$E820MAX, %al
+	jnl	bail820
+
+	incb	(E820NR)
+	movw	%di, %ax
+	addw	$E820ENTRY_SIZE, %ax
+	movw	%ax, %di
+again820:
+	cmpl	$0, %ebx			# check to see if
+	jne	jmpe820				# %ebx is set to EOF
+bail820:
+
+
+# method E801H:
+# memory size is in 1k chunksizes, to avoid confusing loadlin.
+# we store the 0xe801 memory size in a completely different place,
+# because it will most likely be longer than 16 bits.
+
+meme801:
+	stc					# fix to work around buggy
+	xorw	%cx,%cx				# BIOSes which dont clear/set
+	xorw	%dx,%dx				# carry on pass/error of
+						# e801h memory size call
+						# or merely pass cx,dx though
+						# without changing them.
+	movw	$0xe801, %ax
+	int	$0x15
+	jc	mem88
+
+	cmpw	$0x0, %cx			# Kludge to handle BIOSes
+	jne	e801usecxdx			# which report their extended
+	cmpw	$0x0, %dx			# memory in AX/BX rather than
+	jne	e801usecxdx			# CX/DX.  The spec I have read
+	movw	%ax, %cx			# seems to indicate AX/BX
+	movw	%bx, %dx			# are more reasonable anyway...
+
+e801usecxdx:
+	andl	$0xffff, %edx			# clear sign extend
+	shll	$6, %edx			# and go from 64k to 1k chunks
+	movl	%edx, (E801)			# store extended memory size
+	andl	$0xffff, %ecx			# clear sign extend
+ 	addl	%ecx, (E801)			# and add lower memory into
+						# total size.
+
+# Ye Olde Traditional Methode.  Returns the memory size (up to 16mb or
+# 64mb, depending on the bios) in ax.
+mem88:
+
+	movb	$0x88, %ah
+	int	$0x15
+	movw	%ax, (E88)
+
+#ifdef APM_OFF
+# check for APM BIOS
+	movw	$0x5300, %ax    # APM BIOS installation check
+	xorw	%bx, %bx
+	int	$0x15
+	jc	done_apm_bios   # error -> no APM BIOS
+
+	cmpw	$0x504d, %bx    # check for "PM" signature
+	jne	done_apm_bios   # no signature -> no APM BIOS
+
+	movw	$0x5304, %ax    # Disconnect first just in case
+	xorw	%bx, %bx
+	int	$0x15           # ignore return code
+
+	movw	$0x5301, %ax    # Real Mode connect
+	xorw	%bx, %bx
+	int	$0x15
+	jc	done_apm_bios   # error
+
+	movw	$0x5308, %ax    # Disable APM
+	mov	$0xffff, %bx
+	xorw	%cx, %cx
+	int	$0x15
+
+done_apm_bios:
+#endif
+
+	/* O.k. the BIOS query is done switch back to protected mode */
+	cli
+
+	/* Restore my saved variables */
+	popl	%ebx
+
+	/* Get an convinient %ds */
+	movw	%cs, %ax
+	movw	%ax, %ds
+
+	/* Load the global descriptor table */
+	addr32 lgdt	gdt_descr - RSTART
+
+	/* Turn on protected mode */
+	/* Set the PE bit in CR0 */
+	movl	%cr0,%eax
+	orl	$(1<<0),%eax
+	movl	%eax,%cr0
+
+	/* flush the prefetch queue, and relaod %cs:%eip */
+	data32 ljmp	*(protptr - RSTART)
+prot:
+	.code32
+	/* Reload other segment registers */
+	movl	$KERNEL_DS, %eax
+	movl	%eax, %ds
+	movl	%eax, %es
+	movl	%eax, %fs
+	movl	%eax, %gs
+	movl	%eax, %ss
+
+	/* Adjust the boot_stack pointer */
+	leal	boot_stack@GOTOFF(%ebx), %eax
+	addl	%eax, %esp
+
+	/* Restore the caller saved registers */
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	movl	$1, %eax
+	ret
+
+realptr:
+	.word	real - RSTART
+	.word	0x0000
+protptr:
+	.long	0
+	.long	KERNEL_CS
+
+idt_real:
+	.word	0x400 - 1			# idt limit ( 256 entries)
+	.word	0, 0				# idt base = 0L
+
+/* _ap_trampoline_start is the entry point for cpus other than the
+ * bootstrap cpu. The code between _ap_trampoline_start to
+ * _ap_trampoline_protmode is copied to BootCodeStart(0x9000).
+ * The ljmp after turning on CR0.PE will jump to the
+ * relocatable code which usually resides at 0x10000 + _ap_trampoline_protmode.
+ *
+ * The trampoline code uses a temporary GDT. The entries of this temporary
+ * GDT must match the first few entries of the GDT used by the relocatble
+ * memtest code(see 'gdt' sybmol in this file).
+ *
+ */
+	.globl _ap_trampoline_start
+	.globl _ap_trampoline_protmode
+	.code16
+_ap_trampoline_start:
+	lgdt    0x0 /* will be fixed up later, see smp.c:BootAP()*/
+	movl	%cr0, %eax
+	orl	$1, %eax
+	movl	%eax, %cr0
+	data32 ljmp    $KERNEL_CS, $_ap_trampoline_protmode
+_ap_trampoline_protmode:
+	.code32
+	movw	$KERNEL_DS, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %gs
+	movw	%ax, %ss
+	movl	$(LOW_TEST_ADR + _GLOBAL_OFFSET_TABLE_), %esp
+	leal	boot_stack_top@GOTOFF(%esp), %esp
+	pushl   $0
+	popf
+	call    startup_32
+	/* if we ever return, we'll just loop forever */
+	cli
+2:	hlt
+	jmp 2b	
+.data
+zerobss:	.long	1
+.previous
+.data
+.balign 16
+	.globl mem_info
+mem_info:
+	. = . + MEMINFO_SIZE
+.previous
+.bss
+.balign 16
+boot_stack:
+	.globl boot_stack
+	. = . + 4096
+boot_stack_top:
+	.globl boot_stack_top
+.previous
diff --git a/init.c b/init.c
new file mode 100644
index 0000000..84926e8
--- /dev/null
+++ b/init.c
@@ -0,0 +1,1035 @@
+/* init.c - MemTest-86  Version 3.6
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ * ----------------------------------------------------
+ * MemTest86+ V1.11 Specific code (GPL V2.0)
+ * By Samuel DEMEULEMEESTER, sdemeule@memtest.org
+ * http://www.x86-secret.com - http://www.memtest.org
+ */
+
+#include "stddef.h"
+#include "stdin.h"
+#include "cpuid.h"
+#include "test.h"
+#include "defs.h"
+#include "config.h"
+#include "smp.h"
+#include "io.h"
+
+extern struct tseq tseq[];
+extern short memsz_mode;
+extern int num_cpus;
+extern int found_cpus;
+
+/* Here we store all of the cpuid data */
+extern struct cpu_ident cpu_id;
+
+int l1_cache=0, l2_cache=0, l3_cache=0;
+int tsc_invariable = 0;
+ulong extclock;
+
+ulong memspeed(ulong src, ulong len, int iter);
+static void cpu_type(void);
+static int cpuspeed(void);
+static void get_cache_size();
+static void cpu_cache_speed();
+void get_cpuid();
+
+static void display_init(void)
+{
+	int i;
+	volatile char *pp;
+
+	serial_echo_init();
+        serial_echo_print("INE_SCROLL;24r"); /* Set scroll area row 7-23 */
+        serial_echo_print("");   /* Clear Screen */
+        serial_echo_print("");
+        serial_echo_print("");
+        serial_echo_print("");
+
+	/* Clear screen & set background to blue */
+	for(i=0, pp=(char *)(SCREEN_ADR); i<80*24; i++) {
+		*pp++ = ' ';
+		*pp++ = 0x17;
+	}
+
+	/* Make the name background red */
+	for(i=0, pp=(char *)(SCREEN_ADR+1); i<TITLE_WIDTH; i++, pp+=2) {
+		*pp = 0x47;
+	}
+
+	cprint(0, 0, "       Memtest-86 v4.0a      ");
+
+	/* Do reverse video for the bottom display line */
+	for(i=0, pp=(char *)(SCREEN_ADR+1+(24 * 160)); i<80; i++, pp+=2) {
+		*pp = 0x71;
+	}
+
+        serial_echo_print("");
+}
+
+/*
+ * Initialize test, setup screen and find out how much memory there is.
+ */
+void init(void)
+{
+	int i;
+
+	outb(0x8, 0x3f2);  /* Kill Floppy Motor */
+
+	/* Turn on cache */
+	set_cache(1);
+
+	/* Setup the display */
+	display_init();
+	cprint(1, COL_MID,"Pass   %");
+	cprint(2, COL_MID,"Test   %");
+	cprint(3, COL_MID,"Test #");
+	cprint(4, COL_MID,"Testing: ");
+	cprint(5, COL_MID,"Pattern: ");
+	cprint(1, 0, "CPU Clk :         ");
+	cprint(2, 0, "L1 Cache: Unknown ");
+	cprint(3, 0, "L2 Cache: Unknown ");
+     	cprint(4, 0, "L3 Cache:  None    ");
+     	cprint(5, 0, "Memory  :         ");
+     	cprint(6, 0, "------------------------------------------------------------------------------");
+	cprint(7, 0, "CPU:");
+	cprint(8, 0, "State:");
+	cprint(7, 39, "| CPUs_Started:     CPU_Select:   All");
+	cprint(8, 39, "| CPUs_Active:      CPUs_Found:   ");
+	for (i = 0; i <num_cpus; i++) {
+		dprint(7, 2*i+7, i, 1, 0);
+		cprint(8, 2*i+7, "S");
+	}
+	dprint(7, 54, num_cpus, 2, 0);
+	dprint(8, 72, found_cpus, 2, 0);
+     	cprint(9, 0, "------------------------------------------------------------------------------");
+	for(i=1; i < 6; i++) {
+		cprint(i, COL_MID-2, "| ");
+	}
+	cprint(LINE_INFO, 0,
+"Time:  0:00:00   Iterations:     Test_Sel: Std   Pass:     0   Errors:     0");
+	footer();
+
+     	aprint(5, 10, v->test_pages);
+
+        v->pass = 0;
+        v->msg_line = 0;
+        v->ecount = 0;
+        v->ecc_ecount = 0;
+	v->testsel = -1;
+	v->msg_line = LINE_SCROLL-1;
+	v->scroll_start = v->msg_line * 160;
+	v->erri.low_addr.page = 0x7fffffff;
+	v->erri.low_addr.offset = 0xfff;
+	v->erri.high_addr.page = 0;
+	v->erri.high_addr.offset = 0;
+	v->erri.min_bits = 32;
+	v->erri.max_bits = 0;
+	v->erri.min_bits = 32;
+	v->erri.max_bits = 0;
+	v->erri.maxl = 0;
+	v->erri.cor_err = 0;
+	v->erri.ebits = 0;
+	v->erri.hdr_flag = 0;
+	v->erri.tbits = 0;
+	for (i=0; tseq[i].msg != NULL; i++) {
+		tseq[i].errors = 0;
+	}
+
+	/* Get the cpu and cache information */
+	get_cpuid();
+
+	get_cache_size();
+
+	cpu_type();
+
+	cpu_cache_speed();
+
+	/* Record the start time */
+        asm __volatile__ ("rdtsc":"=a" (v->startl),"=d" (v->starth));
+        v->snapl = v->startl;
+        v->snaph = v->starth;
+	if (l1_cache == 0) { l1_cache = 66; }
+	if (l2_cache == 0) { l1_cache = 666; }
+	v->printmode=PRINTMODE_ADDRESSES;
+	v->numpatn=0;
+}
+
+/* Get cache sizes for most AMD and Intel CPUs, exceptions for old CPUs are
+ * handled in CPU detection */
+void get_cache_size()
+{
+	int i, j, n, size;
+	unsigned int v[4];
+	unsigned char *dp = (unsigned char *)v;
+	struct cpuid4_eax *eax = (struct cpuid4_eax *)&v[0];
+	struct cpuid4_ebx *ebx = (struct cpuid4_ebx *)&v[1];
+	struct cpuid4_ecx *ecx = (struct cpuid4_ecx *)&v[2];
+
+	switch(cpu_id.vend_id.char_array[0]) {
+	/* AMD Processors */
+	case 'A':
+		l1_cache = cpu_id.cache_info.amd.l1_i_sz;
+		l1_cache += cpu_id.cache_info.amd.l1_d_sz;
+		l2_cache = cpu_id.cache_info.amd.l2_sz;
+		l3_cache = cpu_id.cache_info.amd.l3_sz;
+     		l3_cache *= 512;
+		break;
+	case 'G':
+		/* Intel Processors */
+		l1_cache = 0;
+		l2_cache = 0;
+		l3_cache = 0;
+
+		/* Use CPUID(4) if it is available */
+		if (cpu_id.max_cpuid > 3) {
+
+		    /* figure out how many cache leaves */
+		    n = -1;
+		    do {
+			++n;
+			/* Do cpuid(4) loop to find out num_cache_leaves */
+			cpuid_count(4, n, &v[0], &v[1], &v[2], &v[3]);
+		    } while ((eax->ctype) != 0);
+
+		    /* loop through all of the leaves */
+		    for (i=0; i<n; i++) {
+			cpuid_count(4, i, &v[0], &v[1], &v[2], &v[3]);
+
+			/* Check for a valid cache type */
+			if (eax->ctype > 0 && eax->ctype < 4) {
+
+			    /* Compute the cache size */
+			    size = (ecx->number_of_sets + 1) *
+                          	  (ebx->coherency_line_size + 1) *
+                          	  (ebx->physical_line_partition + 1) *
+                          	  (ebx->ways_of_associativity + 1);
+			    size /= 1024;
+
+			    switch (eax->level) {
+			    case 1:
+				l1_cache += size;
+				break;
+			    case 2:
+				l2_cache += size;
+				break;
+			    case 3:
+				l3_cache += size;
+				break;
+			    }
+			}
+		    }
+		    return;
+		}
+
+		/* No CPUID(4) so we use the older CPUID(2) method */
+		/* Get number of times to iterate */
+		cpuid(2, &v[0], &v[1], &v[2], &v[3]);
+		n = v[0] & 0xff;
+                for (i=0 ; i<n ; i++) {
+                    cpuid(2, &v[0], &v[1], &v[2], &v[3]);
+
+                    /* If bit 31 is set, this is an unknown format */
+                    for (j=0 ; j<3 ; j++) {
+                            if (v[j] & (1 << 31)) {
+                                    v[j] = 0;
+			    }
+		    }
+
+                    /* Byte 0 is level count, not a descriptor */
+                    for (j = 1 ; j < 16 ; j++) {
+			switch(dp[j]) {
+			case 0x6:
+			case 0xa:
+			case 0x66:
+				l1_cache += 8;
+				break;
+			case 0x8:
+			case 0xc:
+			case 0xd:
+			case 0x60:
+			case 0x67:
+				l1_cache += 16;
+				break;
+			case 0xe:
+				l1_cache += 24;
+				break;
+			case 0x9:
+			case 0x2c:
+			case 0x30:
+			case 0x68:
+				l1_cache += 32;
+				break;
+			case 0x39:
+			case 0x3b:
+			case 0x41:
+			case 0x79:
+				l2_cache += 128;
+				break;
+			case 0x3a:
+				l2_cache += 192;
+				break;
+			case 0x21:
+			case 0x3c:
+			case 0x3f:
+			case 0x42:
+			case 0x7a:
+			case 0x82:
+				l2_cache += 256;
+				break;
+			case 0x3d:
+				l2_cache += 384;
+				break;
+			case 0x3e:
+			case 0x43:
+			case 0x7b:
+			case 0x7f:
+			case 0x80:
+			case 0x83:
+			case 0x86:
+				l2_cache += 512;
+				break;
+			case 0x44:
+			case 0x78:
+			case 0x7c:
+			case 0x84:
+			case 0x87:
+				l2_cache += 1024;
+				break;
+			case 0x45:
+			case 0x7d:
+			case 0x85:
+				l2_cache += 2048;
+				break;
+			case 0x48:
+				l2_cache += 3072;
+				break;
+			case 0x4e:
+				l2_cache += 6144;
+				break;
+			case 0x23:
+			case 0xd0:
+				l3_cache += 512;
+				break;
+			case 0xd1:
+			case 0xd6:
+				l3_cache += 1024;
+				break;
+			case 0x25:
+			case 0xd2:
+			case 0xd7:
+			case 0xdc:
+			case 0xe2:
+				l3_cache += 2048;
+				break;
+			case 0x29:
+			case 0x46:
+			case 0x49:
+			case 0xd8:
+			case 0xdd:
+			case 0xe3:
+				l3_cache += 4096;
+				break;
+			case 0x4a:
+				l3_cache += 6144;
+				break;
+			case 0x47:
+			case 0x4b:
+			case 0xde:
+			case 0xe4:
+				l3_cache += 8192;
+				break;	
+			case 0x4c:
+			case 0xea:
+				l3_cache += 12288;
+				break;	
+			case 0x4d:
+				l3_cache += 16384;
+				break;	
+			case 0xeb:
+				l3_cache += 18432;
+				break;	
+			case 0xec:
+				l3_cache += 24576;
+				break;	
+			} /* end switch */
+		    } /* end for 1-16 */
+		} /* end for 0 - n */
+	}
+}
+
+/*
+ * Find CPU type
+ */
+void cpu_type(void)
+{
+	v->rdtsc = 0;
+	v->pae = 0;
+
+	/* See if we have pae support */
+	if (cpu_id.fid.bits.pae) {
+		v->pae = 1;
+	}
+
+	/* See if we have rdtsc nstruction support */
+	if (cpu_id.fid.bits.tsc) {
+		v->rdtsc = 1;
+	}
+
+
+	/* If we can get a brand string use it, and we are done */
+	if (cpu_id.max_cpuid >= 4) {
+		cprint(0, COL_MID, cpu_id.brand_id.char_array);
+		return;
+	}
+
+	/* The brand string is not available so we need to figure out 
+	 * CPU what we have */
+	switch(cpu_id.vend_id.char_array[0]) {
+	/* AMD Processors */
+	case 'A':
+		switch(cpu_id.vers.bits.family) {
+		case 4:
+			switch(cpu_id.vers.bits.model) {
+			case 3:
+				cprint(0, COL_MID, "AMD 486DX2");
+				break;
+			case 7:
+				cprint(0, COL_MID, "AMD 486DX2-WB");
+				break;
+			case 8:
+				cprint(0, COL_MID, "AMD 486DX4");
+				break;
+			case 9:
+				cprint(0, COL_MID, "AMD 486DX4-WB");
+				break;
+			case 14:
+				cprint(0, COL_MID, "AMD 5x86-WT");
+				break;
+			case 15:
+				cprint(0, COL_MID, "AMD 5x86-WB");
+				break;
+			}
+			/* Since we can't get CPU speed or cache info return */
+			return;
+		case 5:
+			switch(cpu_id.vers.bits.model) {
+			case 0:
+			case 1:
+			case 2:
+			case 3:
+				cprint(0, COL_MID, "AMD K5");
+				l1_cache = 8;
+				break;
+			case 6:
+			case 7:
+				cprint(0, COL_MID, "AMD K6");
+				break;
+			case 8:
+				cprint(0, COL_MID, "AMD K6-2");
+				break;
+			case 9:
+				cprint(0, COL_MID, "AMD K6-III");
+				break;
+			case 13: 
+				cprint(0, COL_MID, "AMD K6-III+"); 
+				break;
+			}
+			break;
+		case 6:
+
+			switch(cpu_id.vers.bits.model) {
+			case 1:
+				cprint(0, COL_MID, "AMD Athlon (0.25)");
+				break;
+			case 2:
+			case 4:
+				cprint(0, COL_MID, "AMD Athlon (0.18)");
+				break;
+			case 6:
+				if (l2_cache == 64) {
+					cprint(0, COL_MID, "AMD Duron (0.18)");
+				} else {
+					cprint(0, COL_MID, "Athlon XP (0.18)");
+				}
+				break;
+			case 8:
+			case 10:
+				if (l2_cache == 64) {
+					cprint(0, COL_MID, "AMD Duron (0.13)");
+				} else {
+					cprint(0, COL_MID, "Athlon XP (0.13)");
+				}
+				break;
+			case 3:
+			case 7:
+				cprint(0, COL_MID, "AMD Duron");
+				/* Duron stepping 0 CPUID for L2 is broken */
+				/* (AMD errata T13)*/
+				if (cpu_id.vers.bits.stepping == 0) { /* stepping 0 */
+					/* Hard code the right L2 size */
+					l2_cache = 64;
+				} else {
+				}
+				break;
+			}
+			break;
+
+			/* All AMD family values >= 10 have the Brand ID
+			 * feature so we don't need to find the CPU type */
+		}
+		break;
+
+	/* Intel or Transmeta Processors */
+	case 'G':
+		if ( cpu_id.vend_id.char_array[7] == 'T' ) { /* GenuineTMx86 */
+			if (cpu_id.vers.bits.family == 5) {
+				cprint(0, COL_MID, "TM 5x00");
+			} else if (cpu_id.vers.bits.family == 15) {
+				cprint(0, COL_MID, "TM 8x00");
+			}
+			l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
+			l2_cache = (cpu_id.cache_info.ch[11]*256) + cpu_id.cache_info.ch[10];
+		} else {				/* GenuineIntel */
+			if (cpu_id.vers.bits.family == 4) {
+			switch(cpu_id.vers.bits.model) {
+			case 0:
+			case 1:
+				cprint(0, COL_MID, "Intel 486DX");
+				break;
+			case 2:
+				cprint(0, COL_MID, "Intel 486SX");
+				break;
+			case 3:
+				cprint(0, COL_MID, "Intel 486DX2");
+				break;
+			case 4:
+				cprint(0, COL_MID, "Intel 486SL");
+				break;
+			case 5:
+				cprint(0, COL_MID, "Intel 486SX2");
+				break;
+			case 7:
+				cprint(0, COL_MID, "Intel 486DX2-WB");
+				break;
+			case 8:
+				cprint(0, COL_MID, "Intel 486DX4");
+				break;
+			case 9:
+				cprint(0, COL_MID, "Intel 486DX4-WB");
+				break;
+			}
+			/* Since we can't get CPU speed or cache info return */
+			return;
+		}
+
+
+		switch(cpu_id.vers.bits.family) {
+		case 5:
+			switch(cpu_id.vers.bits.model) {
+			case 0:
+			case 1:
+			case 2:
+			case 3:
+			case 7:
+				cprint(0, COL_MID, "Pentium");
+				if (l1_cache == 0) {
+					l1_cache = 8;
+				}
+				break;
+			case 4:
+			case 8:
+				cprint(0, COL_MID, "Pentium-MMX");
+				if (l1_cache == 0) {
+					l1_cache = 16;
+				}
+				break;
+			}
+			break;
+		case 6:
+			switch(cpu_id.vers.bits.model) {
+			case 0:
+			case 1:
+				cprint(0, COL_MID, "Pentium Pro");
+				break;
+			case 3:
+			case 4:
+				cprint(0, COL_MID, "Pentium II");
+				break;
+			case 5:
+				if (l2_cache == 0) {
+					cprint(0, COL_MID, "Celeron");
+				} else {
+					cprint(0, COL_MID, "Pentium II");
+				}
+				break;
+			case 6:
+				  if (l2_cache == 128) {
+					cprint(0, COL_MID, "Celeron");
+				  } else {
+					cprint(0, COL_MID, "Pentium II");
+				  }
+				}
+				break;
+			case 7:
+			case 8:
+			case 11:
+				if (l2_cache == 128) {
+					cprint(0, COL_MID, "Celeron");
+				} else {
+					cprint(0, COL_MID, "Pentium III");
+				}
+				break;
+			case 9:
+				if (l2_cache == 512) {
+					cprint(0, COL_MID, "Celeron M (0.13)");
+				} else {
+					cprint(0, COL_MID, "Pentium M (0.13)");
+				}
+				break;
+     			case 10:
+				cprint(0, COL_MID, "Pentium III Xeon");
+				break;
+			case 12:
+				l1_cache = 24;
+				cprint(0, COL_MID, "Atom (0.045)");
+				break;					
+			case 13:
+				if (l2_cache == 1024) {
+					cprint(0, COL_MID, "Celeron M (0.09)");
+				} else {
+					cprint(0, COL_MID, "Pentium M (0.09)");
+				}
+				break;
+			case 14:
+				cprint(0, COL_MID, "Intel Core");
+				break;				
+			case 15:
+				if (l2_cache == 1024) {
+					cprint(0, COL_MID, "Pentium E");
+				} else {
+					cprint(0, COL_MID, "Intel Core 2");
+				}
+				break;
+			}
+			break;
+		case 15:
+			switch(cpu_id.vers.bits.model) {
+			case 0:
+			case 1:			
+			case 2:
+				if (l2_cache == 128) {
+					cprint(0, COL_MID, "Celeron");
+				} else {
+					cprint(0, COL_MID, "Pentium 4");
+				}
+				break;
+			case 3:
+			case 4:
+				if (l2_cache == 256) {
+					cprint(0, COL_MID, "Celeron (0.09)");
+				} else {
+					cprint(0, COL_MID, "Pentium 4 (0.09)");
+				}
+				break;
+			case 6:
+				cprint(0, COL_MID, "Pentium D (65nm)");
+				break;
+			default:
+				cprint(0, COL_MID, "Unknown Intel");
+ 				break;
+			break;
+		    }
+
+		}
+		break;
+
+	/* VIA/Cyrix/Centaur Processors with CPUID */
+	case 'C':
+		if ( cpu_id.vend_id.char_array[1] == 'e' ) { /* CentaurHauls */
+			l1_cache = cpu_id.cache_info.ch[3] + cpu_id.cache_info.ch[7];
+			l2_cache = cpu_id.cache_info.ch[11];
+			switch(cpu_id.vers.bits.family){
+			case 5:
+				cprint(0, COL_MID, "Centaur 5x86");
+				break;
+			case 6: // VIA C3
+				switch(cpu_id.vers.bits.model){
+				default:
+				    if (cpu_id.vers.bits.stepping < 8) {
+					cprint(0, COL_MID, "VIA C3 Samuel2");
+				    } else {
+					cprint(0, COL_MID, "VIA C3 Eden");
+				    }
+				break;
+				case 10:
+					cprint(0, COL_MID, "VIA C7 (C5J)");
+					l1_cache = 64;
+					l2_cache = 128;
+					break;
+				case 13:
+					cprint(0, COL_MID, "VIA C7 (C5R)");
+					l1_cache = 64;
+					l2_cache = 128;
+					break;
+				case 15:
+					cprint(0, COL_MID, "VIA Isaiah (CN)");
+					l1_cache = 64;
+					l2_cache = 128;
+					break;
+				}
+			}
+		} else {				/* CyrixInstead */
+			switch(cpu_id.vers.bits.family) {
+			case 5:
+				switch(cpu_id.vers.bits.model) {
+				case 0:
+					cprint(0, COL_MID, "Cyrix 6x86MX/MII");
+					break;
+				case 4:
+					cprint(0, COL_MID, "Cyrix GXm");
+					break;
+				}
+				return;
+
+			case 6: // VIA C3
+				switch(cpu_id.vers.bits.model) {
+				case 6:
+					cprint(0, COL_MID, "Cyrix III");
+					break;
+				case 7:
+					if (cpu_id.vers.bits.stepping < 8) {
+						cprint(0, COL_MID, "VIA C3 Samuel2");
+					} else {
+						cprint(0, COL_MID, "VIA C3 Ezra-T");
+					}
+					break;
+				case 8:
+					cprint(0, COL_MID, "VIA C3 Ezra-T");
+					break;
+				case 9:
+					cprint(0, COL_MID, "VIA C3 Nehemiah");
+					break;
+				}
+				// L1 = L2 = 64 KB from Cyrix III to Nehemiah
+				l1_cache = 64;
+				l2_cache = 64;
+				break;
+			}
+		}
+		break;
+	/* Unknown processor */
+	default:
+		/* Make a guess at the family */
+		switch(cpu_id.vers.bits.family) {
+		case 5:
+			cprint(0, COL_MID, "586");
+		case 6:
+			cprint(0, COL_MID, "686");
+		default:
+			cprint(0, COL_MID, "Unidentified Processor");
+		}
+	}
+}
+
+#define STEST_ADDR 0x100000	/* Measure memory speed starting at 1MB */
+
+/* Measure and display CPU and cache sizes and speeds */
+void cpu_cache_speed()
+{
+	int i, off = 10;
+	ulong speed;
+
+
+	/* Print CPU speed */
+	if ((speed = cpuspeed()) > 0) {
+		if (speed < 999499) {
+			speed += 50; /* for rounding */
+			cprint(1, off, "    . MHz");
+			dprint(1, off+1, speed/1000, 3, 1);
+			dprint(1, off+5, (speed/100)%10, 1, 0);
+		} else {
+			speed += 500; /* for rounding */
+			cprint(1, off, "      MHz");
+			dprint(1, off, speed/1000, 5, 0);
+		}
+		extclock = speed;
+	}
+
+	/* Print out L1 cache info */
+	/* To measure L1 cache speed we use a block size that is 1/4th */
+	/* of the total L1 cache size since half of it is for instructions */
+	if (l1_cache) {
+		cprint(2, 0, "L1 Cache:     K  ");
+		dprint(2, 11, l1_cache, 3, 0);
+		if ((speed=memspeed(STEST_ADDR, (l1_cache/4)*1024, 200))) {
+			cprint(2, 16, "       MB/s");
+			dprint(2, 16, speed, 6, 0);
+		}
+	}
+
+	/* Print out L2 cache info */
+	/* We measure the L2 cache speed by using a block size that is */
+	/* the size of the L1 cache.  We have to fudge if the L1 */
+	/* cache is bigger than the L2 */
+	if (l2_cache) {
+		cprint(3, 0, "L2 Cache:     K  ");
+		dprint(3, 10, l2_cache, 4, 0);
+
+		if (l2_cache < l1_cache) {
+			i = l1_cache / 4 + l2_cache / 4;
+		} else {
+			i = l1_cache;
+		}
+		if ((speed=memspeed(STEST_ADDR, i*1024, 200))) {
+			cprint(3, 16, "       MB/s");
+			dprint(3, 16, speed, 6, 0);
+		}
+	}
+	/* Print out L3 cache info */
+	/* We measure the L3 cache speed by using a block size that is */
+	/* 2X the size of the L2 cache. */
+
+	if (l3_cache) {
+		cprint(4, 0, "L3 Cache:     K  ");
+    		dprint(4, 10, l3_cache, 4, 0);
+    		dprint(4, 10, l3_cache, 4, 0);
+    
+    		i = l2_cache*2;
+    
+    		if ((speed=memspeed(STEST_ADDR, i*1024, 150))) {
+    			cprint(4, 16, "       MB/s");
+    			dprint(4, 16, speed, 6, 0);
+    		}
+    	}
+}
+
+/* Measure and display memory speed, multitasked using all CPUs */
+ulong spd[MAX_CPUS];
+void get_mem_speed(int me, int ncpus)
+{
+	int i;
+	ulong speed=0;
+	ulong start, len;
+
+    	/* Determine memory speed.  To find the memory speed we use 
+    	 * A block size that is the sum of all the L1, L2 & L3 caches
+	 * in all cpus * 6 */
+    	i = (l3_cache + l2_cache*ncpus + l1_cache*ncpus) * 6;
+
+	/* Make sure that we have enough memory to do the test */
+	/* If not use all we have */
+	if ((1 + (i * 2)) > (v->plim_upper << 2)) {
+		i = ((v->plim_upper <<2) - 1) / 2;
+	}
+	/* Divide up the memory block among the CPUs */
+	len = i * 1024 / ncpus;
+	start = STEST_ADDR + (len * me);
+	
+	barrier();
+	spd[me] = memspeed(start, len, 50);
+	barrier();
+	if (me == 0) {
+		for (i=0; i<ncpus; i++) {
+			speed += spd[i];
+		}
+		cprint(5, 16, "       MB/s");
+		dprint(5, 16, speed, 6, 0);
+	}
+}
+
+/* #define TICKS 5 * 11832 (count = 6376)*/
+/* #define TICKS (65536 - 12752) */
+#define TICKS 59659	/* 50 ms */
+
+/* Returns CPU clock in khz */
+ulong stlow, sthigh;
+static int cpuspeed(void)
+{
+	int loops;
+	ulong end_low, end_high;
+
+	if (v->rdtsc == 0 ) {
+		return(-1);
+	}
+
+	/* Setup timer */
+	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+	outb(0xb0, 0x43); 
+	outb(TICKS & 0xff, 0x42);
+	outb(TICKS >> 8, 0x42);
+
+	asm __volatile__ ("rdtsc":"=a" (stlow),"=d" (sthigh));
+
+	loops = 0;
+	do {
+		loops++;
+	} while ((inb(0x61) & 0x20) == 0);
+
+	asm __volatile__ (
+		"rdtsc\n\t" \
+		"subl stlow,%%eax\n\t" \
+		"sbbl sthigh,%%edx\n\t" \
+		:"=a" (end_low), "=d" (end_high)
+	);
+
+	/* Make sure we have a credible result */
+	if (loops < 4 || end_low < 50000) {
+		return(-1);
+	}
+	v->clks_msec = end_low/50;
+/*
+	if (tsc_invariable) end_low = correct_tsc(end_low);
+*/
+	return(v->clks_msec);
+}
+
+/* Measure cache speed by copying a block of memory. */
+/* Returned value is kbytes/second */
+ulong memspeed(ulong src, ulong len, int iter)
+{
+	int i;
+	ulong dst, wlen;
+	ulong st_low, st_high;
+	ulong end_low, end_high;
+	ulong cal_low, cal_high;
+
+	if (v->rdtsc == 0 ) {
+		return(-1);
+	}
+	if (len == 0) return(-2);
+
+	dst = src + len;
+	wlen = len / 4;  /* Length is bytes */
+
+	/* Calibrate the overhead with a zero word copy */
+	asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
+	for (i=0; i<iter; i++) {
+		asm __volatile__ (
+			"movl %0,%%esi\n\t" \
+       		 	"movl %1,%%edi\n\t" \
+       		 	"movl %2,%%ecx\n\t" \
+       		 	"cld\n\t" \
+       		 	"rep\n\t" \
+       		 	"movsl\n\t" \
+				:: "g" (src), "g" (dst), "g" (0)
+			: "esi", "edi", "ecx"
+		);
+	}
+	asm __volatile__ ("rdtsc":"=a" (cal_low),"=d" (cal_high));
+
+	/* Compute the overhead time */
+	asm __volatile__ (
+		"subl %2,%0\n\t"
+		"sbbl %3,%1"
+		:"=a" (cal_low), "=d" (cal_high)
+		:"g" (st_low), "g" (st_high),
+		"0" (cal_low), "1" (cal_high)
+	);
+
+
+	/* Now measure the speed */
+	/* Do the first copy to prime the cache */
+	asm __volatile__ (
+		"movl %0,%%esi\n\t" \
+		"movl %1,%%edi\n\t" \
+       	 	"movl %2,%%ecx\n\t" \
+       	 	"cld\n\t" \
+       	 	"rep\n\t" \
+       	 	"movsl\n\t" \
+		:: "g" (src), "g" (dst), "g" (wlen)
+		: "esi", "edi", "ecx"
+	);
+	asm __volatile__ ("rdtsc":"=a" (st_low),"=d" (st_high));
+	for (i=0; i<iter; i++) {
+	        asm __volatile__ (
+			"movl %0,%%esi\n\t" \
+			"movl %1,%%edi\n\t" \
+       		 	"movl %2,%%ecx\n\t" \
+       		 	"cld\n\t" \
+       		 	"rep\n\t" \
+       		 	"movsl\n\t" \
+			:: "g" (src), "g" (dst), "g" (wlen)
+			: "esi", "edi", "ecx"
+		);
+	}
+	asm __volatile__ ("rdtsc":"=a" (end_low),"=d" (end_high));
+
+	/* Compute the elapsed time */
+	asm __volatile__ (
+		"subl %2,%0\n\t"
+		"sbbl %3,%1"
+		:"=a" (end_low), "=d" (end_high)
+		:"g" (st_low), "g" (st_high),
+		"0" (end_low), "1" (end_high)
+	);
+	/* Subtract the overhead time */
+	asm __volatile__ (
+		"subl %2,%0\n\t"
+		"sbbl %3,%1"
+		:"=a" (end_low), "=d" (end_high)
+		:"g" (cal_low), "g" (cal_high),
+		"0" (end_low), "1" (end_high)
+	);
+
+	/* Make sure that the result fits in 32 bits */
+	if (end_high) {
+		return(-3);
+	}
+	end_low /= 2;
+
+	/* Convert to clocks/KB */
+	end_low /= len;
+	end_low *= 1024;
+	end_low /= iter;
+	if (end_low == 0) {
+		return(-4);
+	}
+
+	/* Convert to kbytes/sec */
+/*
+	if (tsc_invariable) end_low = correct_tsc(end_low);
+*/
+	return((v->clks_msec)/end_low);
+}
+
+#define rdmsr(msr,val1,val2) \
+	__asm__ __volatile__("rdmsr" \
+		  : "=a" (val1), "=d" (val2) \
+		  : "c" (msr))
+
+/*
+ulong correct_tsc(ulong el_org)
+{
+	float coef_now, coef_max;
+	int msr_lo, msr_hi, is_xe;
+	
+	rdmsr(0x198, msr_lo, msr_hi);
+	is_xe = (msr_lo >> 31) & 0x1;		
+	
+	if(is_xe){
+		rdmsr(0x198, msr_lo, msr_hi);
+		coef_max = ((msr_hi >> 8) & 0x1F);	
+		if ((msr_hi >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
+	} else {
+		rdmsr(0x17, msr_lo, msr_hi);
+		coef_max = ((msr_lo >> 8) & 0x1F);
+		if ((msr_lo >> 14) & 0x1) { coef_max = coef_max + 0.5f; }
+	}
+	
+	if((cpu_id.feature_flag >> 7) & 1) {
+		rdmsr(0x198, msr_lo, msr_hi);
+		coef_now = ((msr_lo >> 8) & 0x1F);
+		if ((msr_lo >> 14) & 0x1) { coef_now = coef_now + 0.5f; }
+	} else {
+		rdmsr(0x2A, msr_lo, msr_hi);
+		coef_now = (msr_lo >> 22) & 0x1F;
+	}
+	if(coef_max && coef_now) {
+		el_org = (ulong)(el_org * coef_now / coef_max);
+	}
+	return el_org;
+}
+*/
diff --git a/io.h b/io.h
new file mode 100644
index 0000000..4fda2de
--- /dev/null
+++ b/io.h
@@ -0,0 +1,118 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO __asm__ __volatile__("jmp 1f\n1:\tjmp 1f\n1:")
+#else
+#define __SLOW_DOWN_IO __asm__ __volatile__("outb %al,$0x80")
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define SLOW_DOWN_IO { __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; }
+#else
+#define SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+/*
+ * Talk about misusing macros..
+ */
+
+#define __OUT1(s,x) \
+extern inline void __out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "d" (port)); } \
+__OUT1(s##c,x) __OUT2(s,s1,"") : : "a" (value), "id" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") : : "a" (value), "d" (port)); SLOW_DOWN_IO; } \
+__OUT1(s##c_p,x) __OUT2(s,s1,"") : : "a" (value), "id" (port)); SLOW_DOWN_IO; }
+
+#define __IN1(s) \
+extern inline RETURN_TYPE __in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "d" (port) ,##i ); return _v; } \
+__IN1(s##c) __IN2(s,s1,"") : "=a" (_v) : "id" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") : "=a" (_v) : "d" (port) ,##i ); SLOW_DOWN_IO; return _v; } \
+__IN1(s##c_p) __IN2(s,s1,"") : "=a" (_v) : "id" (port) ,##i ); SLOW_DOWN_IO; return _v; }
+
+#define __OUTS(s) \
+extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("cld ; rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+/* __IN(b,"b","0" (0)) */
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+/* __IN(w,"w","0" (0)) */
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+/*
+ * Note that due to the way __builtin_constant_p() works, you
+ *  - can't use it inside a inline function (it will never be true)
+ *  - you don't have to worry about side effects within the __builtin..
+ */
+#define outb(val,port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__outbc((val),(port)) : \
+	__outb((val),(port)))
+
+#define inb(port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__inbc(port) : \
+	__inb(port))
+
+
+#define outw(val,port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__outwc((val),(port)) : \
+	__outw((val),(port)))
+
+#define inw(port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__inwc(port) : \
+	__inw(port))
+
+
+#define outl(val,port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__outlc((val),(port)) : \
+	__outl((val),(port)))
+
+#define inl(port) \
+((__builtin_constant_p((port)) && (port) < 256) ? \
+	__inlc(port) : \
+	__inl(port))
+#endif
diff --git a/lib.c b/lib.c
new file mode 100644
index 0000000..851db76
--- /dev/null
+++ b/lib.c
@@ -0,0 +1,1095 @@
+/* lib.c - MemTest-86  Version 3.4
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+#include "io.h"
+#include "serial.h"
+#include "test.h"
+#include "config.h"
+#include "screen_buffer.h"
+#include "smp.h"
+
+int slock = 0, lsr = 0;
+short serial_cons = SERIAL_CONSOLE_DEFAULT;
+#if SERIAL_TTY != 0 && SERIAL_TTY != 1
+#error Bad SERIAL_TTY. Only ttyS0 and ttyS1 are supported.
+#endif
+short serial_tty = SERIAL_TTY;
+const short serial_base_ports[] = {0x3f8, 0x2f8};
+
+#if ((115200%SERIAL_BAUD_RATE) != 0)
+#error Bad default baud rate
+#endif
+int serial_baud_rate = SERIAL_BAUD_RATE;
+unsigned char serial_parity = 0;
+unsigned char serial_bits = 8;
+
+struct ascii_map_str {
+        int ascii;
+        int keycode;
+};
+
+char *codes[] = {
+	"  Divide",
+	"   Debug",
+	"     NMI",
+	"  Brkpnt",
+	"Overflow",
+	"   Bound",
+	"  Inv_Op",
+	" No_Math",
+	"Double_Fault",
+	"Seg_Over",
+	" Inv_TSS",
+	"  Seg_NP",
+	"Stack_Fault",
+	"Gen_Prot",
+	"Page_Fault",
+	"   Resvd",
+	"     FPE",
+	"Alignment",
+	" Mch_Chk",
+	"SIMD FPE"
+};
+inline void reboot(void)
+{
+	/* tell the BIOS to do a warm start */
+	*((unsigned short *)0x472) = 0x1234;
+	outb(0xfe,0x64);
+}
+
+struct eregs {
+	ulong ss;
+	ulong ds;
+	ulong esp;
+	ulong ebp;
+	ulong esi;
+	ulong edi;
+	ulong edx;
+	ulong ecx;
+	ulong ebx;
+	ulong eax;
+	ulong vect;
+	ulong code;
+	ulong eip;
+	ulong cs;
+	ulong eflag;
+};
+
+int memcmp(const void *s1, const void *s2, ulong count)
+{
+	const unsigned char *src1 = s1, *src2 = s2;
+	int i;
+	for(i = 0; i < count; i++) {
+		if (src1[i] != src2[i]) {
+			return (int)src1[i] - (int)src2[i];
+		}
+	}
+	return 0;
+}
+
+int strncmp(const char *s1, const char *s2, ulong n) {
+	signed char res = 0;
+	while (n) {
+		res = *s1 - *s2;
+		if (res != 0)
+			return res;
+		if (*s1 == '\0')
+			return 0;
+		++s1, ++s2;
+		--n;
+	}
+	return res;
+}
+
+void *memmove(void *dest, const void *src, ulong n)
+{
+	long i;
+	char *d = (char *)dest, *s = (char *)src;
+
+	/* If src == dest do nothing */
+	if (dest < src) {
+		for(i = 0; i < n; i++) {
+			d[i] = s[i];
+		}
+	}
+	else if (dest > src) {
+		for(i = n -1; i >= 0; i--) {
+			d[i] = s[i];
+		}
+	}
+	return dest;
+}
+
+char toupper(char c)
+{
+	if (c >= 'a' && c <= 'z')
+		return c + 'A' -'a';
+	else
+		return c;
+}
+
+int isdigit(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+int isxdigit(char c)
+{
+	return isdigit(c) || (toupper(c) >= 'A' && toupper(c) <= 'F'); }
+
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) {
+	unsigned long result = 0, value;
+
+	if (!base) {
+		base = 10;
+		if (*cp == '0') {
+			base = 8;
+			cp++;
+			if (toupper(*cp) == 'X' && isxdigit(cp[1])) {
+				cp++;
+				base = 16;
+			}
+		}
+	} else if (base == 16) {
+		if (cp[0] == '0' && toupper(cp[1]) == 'X')
+			cp += 2;
+	}
+	while (isxdigit(*cp) &&
+		(value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+		result = result*base + value;
+		cp++;
+	}
+	if (endp)
+		*endp = (char *)cp;
+	return result;
+}
+
+/*
+ * Scroll the error message area of the screen as needed
+ * Starts at line LINE_SCROLL and ends at line 23
+ */
+void scroll(void) 
+{
+	int i, j;
+	char *s, tmp;
+
+	/* Only scroll if at the bottom of the screen */
+	if (v->msg_line < 23) {
+		v->msg_line++;
+	} else {
+		/* If scroll lock is on, loop till it is cleared */
+		while (slock) {
+			check_input();
+		}
+	        for (i=LINE_SCROLL; i<23; i++) {
+			s = (char *)(SCREEN_ADR + ((i+1) * 160));
+			for (j=0; j<160; j+=2, s+=2) {
+				*(s-160) = *s;
+                                tmp = get_scrn_buf(i+1, j/2);
+                                set_scrn_buf(i, j/2, tmp);
+			}
+		}
+		/* Clear the newly opened line */
+		s = (char *)(SCREEN_ADR + (23 * 160));
+		for (j=0; j<80; j++) {
+			*s = ' ';
+                        set_scrn_buf(23, j, ' ');
+			s += 2;
+		}
+                tty_print_region(LINE_SCROLL, 0, 23, 79);
+        }
+}
+
+/*
+ * Clear scroll region
+ */
+void clear_scroll(void)
+{
+	int i;
+	char *s;
+
+	s = (char*)(SCREEN_ADR+LINE_HEADER*160);
+        for(i=0; i<80*(24-LINE_HEADER); i++) {
+                *s++ = ' ';
+                *s++ = 0x17;
+        }
+}
+
+/*
+ * Place a single character on screen
+ */
+void cplace(int y, int x, const char c)
+{
+	char *dptr;
+
+	dptr = (char *)(SCREEN_ADR + (160*y) + (2*x));
+	*dptr = c;
+}
+
+/*
+ * Print characters on screen
+ */
+void cprint(int y, int x, const char *text)
+{
+	register int i;
+	char *dptr;
+
+	dptr = (char *)(SCREEN_ADR + (160*y) + (2*x));
+	for (i=0; text[i]; i++) {
+		*dptr = text[i];
+		dptr += 2;
+        }
+        tty_print_line(y, x, text);
+}
+
+void itoa(char s[], int n) 
+{
+  int i, sign;
+
+  if((sign = n) < 0)
+    n = -n;
+  i=0;
+  do {
+    s[i++] = n % 10 + '0';
+  } while ((n /= 10) > 0);
+  if(sign < 0)
+    s[i++] = '-';
+  s[i] = '\0';
+  reverse(s);
+}
+
+void reverse(char s[])
+{
+  int c, i, j;
+  for(j = 0; s[j] != 0; j++)
+	  ;
+
+  for(i=0, j = j - 1; i < j; i++, j--) {
+    c = s[i];
+    s[i] = s[j];
+    s[j] = c;
+  }
+}
+void memcpy (void *dst, void *src, int len)
+{
+	char *s = (char*)src;
+	char *d = (char*)dst;
+	int i;
+
+	if (len <= 0) {
+		return;
+	}
+	for (i = 0 ; i < len; i++) {
+		*d++ = *s++;
+	} 
+}
+
+/*
+ * Print a people friendly address
+ */
+void aprint(int y, int x, ulong page)
+{
+	/* page is in multiples of 4K */
+	if ((page << 2) < 9999) {
+		dprint(y, x, page << 2, 4, 0);
+		cprint(y, x+4, "K");
+	}
+	else if ((page >>8) < 9999) {
+		dprint(y, x, (page  + (1 << 7)) >> 8, 4, 0);
+		cprint(y, x+4, "M");
+	}
+	else if ((page >>18) < 9999) {
+		dprint(y, x, (page + (1 << 17)) >> 18, 4, 0);
+		cprint(y, x+4, "G");
+	}
+	else {
+		dprint(y, x, (page + (1 << 27)) >> 28, 4, 0);
+		cprint(y, x+4, "T");
+	}
+}
+
+/*
+ * Print a decimal number on screen
+ */
+void dprint(int y, int x, ulong val, int len, int right)
+{
+	ulong j, k;
+	int i, flag=0;
+	char buf[18];
+
+	if (val > 999999999 || len > 9) {
+		return;
+	}
+	for(i=0, j=1; i<len-1; i++) {
+		j *= 10;
+	}
+	if (!right) {
+		for (i=0; j>0; j/=10) {
+			k = val/j;
+			if (k > 9) {
+				j *= 100;
+				continue;
+			}
+			if (flag || k || j == 1) {
+				buf[i++] = k + '0';
+				flag++;
+			} else {
+				buf[i++] = ' ';
+			}
+			val -= k * j;
+		}
+	} else {
+		for(i=0; i<len; j/=10) {
+			if (j) {
+				k = val/j;
+					if (k > 9) {
+					j *= 100;
+					len++;
+					continue;
+				}
+				if (k == 0 && flag == 0) {
+					continue;				
+				}
+				buf[i++] = k + '0';
+				val -= k * j;
+			} else {
+                                if (flag == 0 &&  i < len-1) {
+                                        buf[i++] = '0';
+                                } else {
+                                        buf[i++] = ' ';
+                                }
+			}
+			flag++;
+		}
+	}
+	buf[i] = 0;
+	cprint(y,x,buf);
+}
+
+/*
+ * Print a hex number on screen at least digits long
+ */
+void hprint2(int y,int x, unsigned long val, int digits)
+{
+	unsigned long j;
+	int i, idx, flag = 0;
+	char buf[18];
+
+        for (i=0, idx=0; i<8; i++) {
+                j = val >> (28 - (4 * i));
+		j &= 0xf;
+		if (j < 10) {
+			if (flag || j || i == 7) {
+		                buf[idx++] = j + '0';
+				flag++;
+			} else {
+				buf[idx++] = '0';
+			}
+		} else {
+			buf[idx++] = j + 'a' - 10;
+			flag++;
+		}
+        }
+	if (digits > 8) {
+		digits = 8;
+	}
+	if (flag > digits) {
+		digits = flag;
+	}
+        buf[idx] = 0;
+	cprint(y,x,buf + (idx - digits));
+}
+
+/*
+ * Print a hex number on screen exactly digits long
+ */
+void hprint3(int y,int x, unsigned long val, int digits)
+{
+	unsigned long j;
+	int i, idx, flag = 0;
+	char buf[18];
+
+	for (i=0, idx=0; i<digits; i++) {
+		j = 0xf & val;
+		val /= 16;
+
+		if (j < 10) {
+			if (flag || j || i == 7) {
+				buf[digits - ++idx] = j + '0';
+				flag++;
+			} else {
+				buf[digits - ++idx] = '0';
+			}
+		} else {
+			buf[digits - ++idx] = j + 'a' - 10;
+			flag++;
+		}
+	}
+	buf[idx] = 0;
+	cprint(y,x,buf);
+}
+
+/*
+ * Print a hex number on screen
+ */
+void hprint(int y, int x, unsigned long val)
+{
+	return hprint2(y, x, val, 8);
+}
+
+/*
+ * Print an address in 0000m0000k0000 notation
+ */
+void xprint(int y,int x, ulong val)
+{
+        ulong j;
+
+	j = (val & 0xffc00000) >> 20;
+	dprint(y, x, j, 4, 0);
+	cprint(y, x+4, "m");
+	j = (val & 0xffc00) >> 10;
+	dprint(y, x+5, j, 4, 0);
+	cprint(y, x+9, "k");
+	j = val & 0x3ff;
+	dprint(y, x+10, j, 4, 0);
+}
+	
+/* Handle an interrupt */
+void inter(struct eregs *trap_regs)
+{
+	int i, line;
+	unsigned char *pp;
+	ulong address = 0;
+	int my_cpu_num = smp_my_cpu_num();
+
+	/* Get the page fault address */
+	if (trap_regs->vect == 14) {
+		__asm__("movl %%cr2,%0":"=r" (address));
+	}
+#ifdef PARITY_MEM
+
+	/* Check for a parity error */
+	if (trap_regs->vect == 2) {
+		parity_err(trap_regs->edi, trap_regs->esi);
+		return;
+	}
+#endif
+
+	/* clear scrolling region */
+        pp=(unsigned char *)(SCREEN_ADR+(2*80*(LINE_SCROLL-2)));
+        for(i=0; i<2*80*(24-LINE_SCROLL-2); i++, pp+=2) {
+                *pp = ' ';
+        }
+	line = LINE_SCROLL-2;
+
+	cprint(line, 0, "Unexpected Interrupt - Halting CPU");
+	dprint(line, COL_MID + 4, my_cpu_num, 2, 1);
+	cprint(line+2, 0, " Type: ");
+	if (trap_regs->vect <= 19) {
+		cprint(line+2, 7, codes[trap_regs->vect]);
+	} else {
+		hprint(line+2, 7, trap_regs->vect);
+	}
+	cprint(line+3, 0, "   PC: ");
+	hprint(line+3, 7, trap_regs->eip);
+	cprint(line+4, 0, "   CS: ");
+	hprint(line+4, 7, trap_regs->cs);
+	cprint(line+5, 0, "Eflag: ");
+	hprint(line+5, 7, trap_regs->eflag);
+	cprint(line+6, 0, " Code: ");
+	hprint(line+6, 7, trap_regs->code);
+	cprint(line+7, 0, "   DS: ");
+	hprint(line+7, 7, trap_regs->ds);
+	cprint(line+8, 0, "   SS: ");
+	hprint(line+8, 7, trap_regs->ss);
+	if (trap_regs->vect == 14) {
+		/* Page fault address */
+		cprint(line+7, 0, " Addr: ");
+		hprint(line+7, 7, address);
+	}
+
+	cprint(line+2, 20, "eax: ");
+	hprint(line+2, 25, trap_regs->eax);
+	cprint(line+3, 20, "ebx: ");
+	hprint(line+3, 25, trap_regs->ebx);
+	cprint(line+4, 20, "ecx: ");
+	hprint(line+4, 25, trap_regs->ecx);
+	cprint(line+5, 20, "edx: ");
+	hprint(line+5, 25, trap_regs->edx);
+	cprint(line+6, 20, "edi: ");
+	hprint(line+6, 25, trap_regs->edi);
+	cprint(line+7, 20, "esi: ");
+	hprint(line+7, 25, trap_regs->esi);
+	cprint(line+8, 20, "ebp: ");
+	hprint(line+8, 25, trap_regs->ebp);
+	cprint(line+9, 20, "esp: ");
+	hprint(line+9, 25, trap_regs->esp);
+
+	cprint(line+1, 38, "Stack:");
+	for (i=0; i<12; i++) {
+		hprint(line+2+i, 38, trap_regs->esp+(4*i));
+		hprint(line+2+i, 47, *(ulong*)(trap_regs->esp+(4*i)));
+		hprint(line+2+i, 57, trap_regs->esp+(4*(i+12)));
+		hprint(line+2+i, 66, *(ulong*)(trap_regs->esp+(4*(i+12))));
+	}
+
+	cprint(line+11, 0, "CS:EIP:                          ");
+	pp = (unsigned char *)trap_regs->eip;
+	for(i = 0; i < 10; i++) {
+		hprint2(line+11, 8+(3*i), pp[i], 2);
+	}
+
+	while(1) {
+		check_input();
+	}
+}
+
+void set_cache(int val) 
+{
+	switch(val) {
+	case 0:
+		cache_off();	
+		break;
+	case 1:
+		cache_on();
+		break;
+	}
+}
+
+int get_key() {
+	int c;
+	
+	c = inb(0x64);
+	if ((c & 1) == 0) {
+		if (serial_cons) {
+			int comstat;
+			comstat = serial_echo_inb(UART_LSR);
+			if (comstat & UART_LSR_DR) {
+				c = serial_echo_inb(UART_RX);
+				/* Pressing '.' has same effect as 'c'
+				   on a keyboard.
+				   Oct 056   Dec 46   Hex 2E   Ascii .
+				*/
+				return (ascii_to_keycode(c));
+			}
+		}
+		return(0);
+	}
+	c = inb(0x60);
+	return((c));
+}
+
+void check_input(void)
+{
+	unsigned char c;
+
+	if ((c = get_key())) {
+		switch(c & 0x7f) {
+		case 1:	
+			/* "ESC" key was pressed, bail out.  */
+			cprint(LINE_RANGE, COL_MID+23, "Halting... ");
+			reboot();
+			break;
+		case 46:
+			/* c - Configure */
+			get_config();
+			break;
+		case 28:
+			/* CR - clear scroll lock */
+			slock = 0;
+			footer();
+			break;
+		case 57:
+			/* SP - set scroll lock */
+			slock = 1;
+			footer();
+			break;
+		case 0x26:
+			/* ^L/L - redraw the display */
+			tty_print_screen();
+			break;
+		}
+	}
+}
+
+void footer()
+{
+	cprint(24, 0, "(ESC)exit  (c)configuration  (SP)scroll_lock  (CR)scroll_unlock");
+	if (slock) {
+		cprint(24, 74, "Locked");
+	} else {
+		cprint(24, 74, "      ");
+	}
+}
+
+ulong getval(int x, int y, int result_shift)
+{
+	unsigned long val;
+	int done;
+	int c;
+	int i, n;
+	int base;
+	int shift;
+	char buf[16];
+
+	for(i = 0; i < sizeof(buf)/sizeof(buf[0]); i++ ) {
+		buf[i] = ' ';
+	}
+	buf[sizeof(buf)/sizeof(buf[0]) -1] = '\0';
+	
+	wait_keyup();
+	done = 0;
+	n = 0;
+	base = 10;
+	while(!done) {
+		/* Read a new character and process it */
+		c = get_key();
+		switch(c) {
+		case 0x26: /* ^L/L - redraw the display */
+			tty_print_screen();
+			break;
+		case 0x1c: /* CR */
+			/* If something has been entered we are done */
+			if(n) done = 1;
+			break;
+		case 0x19: /* p */ buf[n] = 'p'; break;
+		case 0x22: /* g */ buf[n] = 'g'; break;
+		case 0x32: /* m */ buf[n] = 'm'; break;
+		case 0x25: /* k */ buf[n] = 'k'; break;
+		case 0x2d: /* x */
+			/* Only allow 'x' after an initial 0 */
+			if (n == 1 && (buf[0] == '0')) {
+				buf[n] = 'x';
+			}
+			break;
+		case 0x0e: /* BS */
+			if (n > 0) {
+				n -= 1;
+				buf[n] = ' ';
+			}
+			break;
+		/* Don't allow entering a number not in our current base */
+		case 0x0B: if (base >= 1) buf[n] = '0'; break;
+		case 0x02: if (base >= 2) buf[n] = '1'; break;
+		case 0x03: if (base >= 3) buf[n] = '2'; break;
+		case 0x04: if (base >= 4) buf[n] = '3'; break;
+		case 0x05: if (base >= 5) buf[n] = '4'; break;
+		case 0x06: if (base >= 6) buf[n] = '5'; break;
+		case 0x07: if (base >= 7) buf[n] = '6'; break;
+		case 0x08: if (base >= 8) buf[n] = '7'; break;
+		case 0x09: if (base >= 9) buf[n] = '8'; break;
+		case 0x0A: if (base >= 10) buf[n] = '9'; break;
+		case 0x1e: if (base >= 11) buf[n] = 'a'; break;
+		case 0x30: if (base >= 12) buf[n] = 'b'; break;
+		case 0x2e: if (base >= 13) buf[n] = 'c'; break;
+		case 0x20: if (base >= 14) buf[n] = 'd'; break;
+		case 0x12: if (base >= 15) buf[n] = 'e'; break;
+		case 0x21: if (base >= 16) buf[n] = 'f'; break;
+		default:
+			break;
+		}
+		/* Don't allow anything to be entered after a suffix */
+		if (n > 0 && (
+			(buf[n-1] == 'p') || (buf[n-1] == 'g') || 
+			(buf[n-1] == 'm') || (buf[n-1] == 'k'))) {
+			buf[n] = ' ';
+		}
+		/* If we have entered a character increment n */
+		if (buf[n] != ' ') {
+			n++;
+		}
+		buf[n] = ' ';
+		/* Print the current number */
+		cprint(x, y, buf);
+
+		/* Find the base we are entering numbers in */
+		base = 10;
+		if ((buf[0] == '0') && (buf[1] == 'x')) {
+			base = 16;
+		}
+		else if (buf[0] == '0') {
+			base = 8;
+		}
+	}
+	/* Compute our current shift */
+	shift = 0;
+	switch(buf[n-1]) {
+	case 'g': /* gig */  shift = 30; break;
+	case 'm': /* meg */  shift = 20; break;
+	case 'p': /* page */ shift = 12; break;
+	case 'k': /* kilo */ shift = 10; break;
+	}
+	shift -= result_shift;
+
+	/* Compute our current value */
+	val = simple_strtoul(buf, 0, base);
+	if (shift > 0) {
+		if (shift >= 32) {
+			val = 0xffffffff;
+		} else {
+			val <<= shift;
+		}
+	} else {
+		if (-shift >= 32) {
+			val = 0;
+		}
+		else {
+			val >>= -shift;
+		}
+	}
+	return val;
+}
+
+void ttyprint(int y, int x, const char *p)
+{
+	static char sx[3];
+	static char sy[3];
+	
+	sx[0]='\0';
+	sy[0]='\0';
+	x++; y++;
+	itoa(sx, x);
+	itoa(sy, y);
+	serial_echo_print("[");
+	serial_echo_print(sy);
+	serial_echo_print(";");
+	serial_echo_print(sx);
+	serial_echo_print("H");
+	serial_echo_print(p);
+}
+
+void serial_echo_init(void)
+{
+	int comstat, hi, lo, serial_div;
+	unsigned char lcr;	
+
+	/* read the Divisor Latch */
+	comstat = serial_echo_inb(UART_LCR);
+	serial_echo_outb(comstat | UART_LCR_DLAB, UART_LCR);
+	hi = serial_echo_inb(UART_DLM);
+	lo = serial_echo_inb(UART_DLL);
+	serial_echo_outb(comstat, UART_LCR);
+
+	/* now do hardwired init */
+	lcr = serial_parity | (serial_bits - 5);
+	serial_echo_outb(lcr, UART_LCR); /* No parity, 8 data bits, 1 stop */
+	serial_div = 115200 / serial_baud_rate;
+	serial_echo_outb(0x80|lcr, UART_LCR); /* Access divisor latch */
+	serial_echo_outb(serial_div & 0xff, UART_DLL);  /* baud rate divisor */
+	serial_echo_outb((serial_div >> 8) & 0xff, UART_DLM);
+	serial_echo_outb(lcr, UART_LCR); /* Done with divisor */
+
+	/* Prior to disabling interrupts, read the LSR and RBR
+	 * registers */
+	comstat = serial_echo_inb(UART_LSR); /* COM? LSR */
+	comstat = serial_echo_inb(UART_RX);	/* COM? RBR */
+	serial_echo_outb(0x00, UART_IER); /* Disable all interrupts */
+
+        clear_screen_buf();
+
+	return;
+}
+
+void serial_echo_print(const char *p)
+{
+	if (!serial_cons) {
+		return;
+	}
+	/* Now, do each character */
+	while (*p) {
+		WAIT_FOR_XMITR;
+
+		/* Send the character out. */
+		serial_echo_outb(*p, UART_TX);
+		if(*p==10) {
+			WAIT_FOR_XMITR;
+			serial_echo_outb(13, UART_TX);
+		}
+		p++;
+	}
+}
+
+/* Except for multi-character key sequences this mapping
+ * table is complete.  So it should not need to be updated
+ * when new keys are searched for.  However the key handling
+ * should really be turned around and only in get_key should
+ * we worry about the exact keycode that was pressed.  Everywhere
+ * else we should switch on the character...
+ */
+struct ascii_map_str ser_map[] =
+/*ascii keycode     ascii  keycode*/
+{ 
+  /* Special cases come first so I can leave
+   * their ``normal'' mapping in the table,
+   * without it being activated.
+   */
+  {  27,   0x01}, /* ^[/ESC -> ESC  */
+  { 127,   0x0e}, /*    DEL -> BS   */
+  {   8,   0x0e}, /* ^H/BS  -> BS   */
+  {  10,   0x1c}, /* ^L/NL  -> CR   */
+  {  13,   0x1c}, /* ^M/CR  -> CR   */
+  {   9,   0x0f}, /* ^I/TAB -> TAB  */
+  {  19,   0x39}, /* ^S     -> SP   */
+  {  17,     28}, /* ^Q     -> CR   */
+
+  { ' ',   0x39}, /*     SP -> SP   */
+  { 'a',   0x1e},
+  { 'A',   0x1e},
+  {   1,   0x1e}, /* ^A      -> A */
+  { 'b',   0x30},
+  { 'B',   0x30},
+  {   2,   0x30}, /* ^B      -> B */
+  { 'c',   0x2e},
+  { 'C',   0x2e},
+  {   3,   0x2e}, /* ^C      -> C */
+  { 'd',   0x20},
+  { 'D',   0x20},
+  {   4,   0x20}, /* ^D      -> D */
+  { 'e',   0x12},
+  { 'E',   0x12},
+  {   5,   0x12}, /* ^E      -> E */
+  { 'f',   0x21},
+  { 'F',   0x21},
+  {   6,   0x21}, /* ^F      -> F */
+  { 'g',   0x22},
+  { 'G',   0x22},
+  {   7,   0x22}, /* ^G      -> G */
+  { 'h',   0x23},
+  { 'H',   0x23},
+  {   8,   0x23}, /* ^H      -> H */
+  { 'i',   0x17},
+  { 'I',   0x17},
+  {   9,   0x17}, /* ^I      -> I */
+  { 'j',   0x24},
+  { 'J',   0x24},
+  {  10,   0x24}, /* ^J      -> J */
+  { 'k',   0x25},
+  { 'K',   0x25},
+  {  11,   0x25}, /* ^K      -> K */
+  { 'l',   0x26},
+  { 'L',   0x26},
+  {  12,   0x26}, /* ^L      -> L */
+  { 'm',   0x32},
+  { 'M',   0x32},
+  {  13,   0x32}, /* ^M      -> M */
+  { 'n',   0x31},
+  { 'N',   0x31},
+  {  14,   0x31}, /* ^N      -> N */
+  { 'o',   0x18},
+  { 'O',   0x18},
+  {  15,   0x18}, /* ^O      -> O */
+  { 'p',   0x19},
+  { 'P',   0x19},
+  {  16,   0x19}, /* ^P      -> P */
+  { 'q',   0x10},
+  { 'Q',   0x10},
+  {  17,   0x10}, /* ^Q      -> Q */
+  { 'r',   0x13},
+  { 'R',   0x13},
+  {  18,   0x13}, /* ^R      -> R */
+  { 's',   0x1f},
+  { 'S',   0x1f},
+  {  19,   0x1f}, /* ^S      -> S */
+  { 't',   0x14},
+  { 'T',   0x14},
+  {  20,   0x14}, /* ^T      -> T */
+  { 'u',   0x16},
+  { 'U',   0x16},
+  {  21,   0x16}, /* ^U      -> U */
+  { 'v',   0x2f},
+  { 'V',   0x2f},
+  {  22,   0x2f}, /* ^V      -> V */
+  { 'w',   0x11},
+  { 'W',   0x11},
+  {  23,   0x11}, /* ^W      -> W */
+  { 'x',   0x2d},
+  { 'X',   0x2d},
+  {  24,   0x2d}, /* ^X      -> X */
+  { 'y',   0x15},
+  { 'Y',   0x15},
+  {  25,   0x15}, /* ^Y      -> Y */
+  { 'z',   0x2c},
+  { 'Z',   0x2c},
+  {  26,   0x2c}, /* ^Z      -> Z */
+  { '-',   0x0c},
+  { '_',   0x0c},
+  {  31,   0x0c}, /* ^_      -> _ */
+  { '=',   0x0c},
+  { '+',   0x0c},
+  { '[',   0x1a},
+  { '{',   0x1a},
+  {  27,   0x1a}, /* ^[      -> [ */
+  { ']',   0x1b},
+  { '}',   0x1b},
+  {  29,   0x1b}, /* ^]      -> ] */
+  { ';',   0x27},
+  { ':',   0x27},
+  { '\'',  0x28},
+  { '"',   0x28},
+  { '`',   0x29},
+  { '~',   0x29},
+  { '\\',  0x2b},
+  { '|',   0x2b},
+  {  28,   0x2b}, /* ^\      -> \ */
+  { ',',   0x33},
+  { '<',   0x33},
+  { '.',   0x34},
+  { '>',   0x34},
+  { '/',   0x35},
+  { '?',   0x35},
+  { '1',   0x02},
+  { '!',   0x02},
+  { '2',   0x03},
+  { '@',   0x03},
+  { '3',   0x04},
+  { '#',   0x04},
+  { '4',   0x05},
+  { '$',   0x05},
+  { '5',   0x06},
+  { '%',   0x06},
+  { '6',   0x07},
+  { '^',   0x07},
+  {  30,   0x07}, /* ^^      -> 6 */
+  { '7',   0x08},
+  { '&',   0x08},
+  { '8',   0x09},
+  { '*',   0x09},
+  { '9',   0x0a},
+  { '(',   0x0a},
+  { '0',   0x0b},
+  { ')',   0x0b},
+  {   0,      0}
+};
+
+/*
+ * Given an ascii character, return the keycode
+ *
+ * Uses ser_map definition above.
+ *
+ * It would be more efficient to use an array of 255 characters
+ * and directly index into it.
+ */
+int ascii_to_keycode (int in)
+{
+	struct ascii_map_str *p;
+	for (p = ser_map; p->ascii; p++) {
+		if (in ==p->ascii)
+			return p->keycode;
+	}
+	return 0;
+}
+
+/*
+ * Call this when you want to wait for the user to lift the
+ * finger off of a key.  It is a noop if you are using a
+ * serial console.
+ */
+void wait_keyup( void ) {
+	/* Check to see if someone lifted the keyboard key */
+	while (1) {
+		if ((get_key() & 0x80) != 0) {
+			return;
+		}
+		/* Trying to simulate waiting for a key release with
+		 * the serial port is to nasty to let live.
+		 * In particular some menus don't even display until
+		 * you release the key that caused to to get there.
+		 * With the serial port this results in double pressing
+		 * or something worse for just about every key.
+		 */
+		if (serial_cons) {
+			return;
+		}
+	}
+}
+
+/*
+ * Handles "console=<param>" command line option
+ *
+ * Examples of accepted params:
+ *   ttyS0
+ *   ttyS1
+ *   ttyS0,115200
+ *   ttyS0,9600e8
+ */
+void serial_console_setup(char *param)
+{
+	char *option, *end;
+	unsigned long tty;
+	unsigned long baud_rate;
+	unsigned char parity, bits;
+
+	if (strncmp(param, "ttyS", 4))
+		return;   /* not a serial port */
+
+	param += 4;
+
+	tty = simple_strtoul(param, &option, 10);
+
+	if (option == param)
+		return;   /* there were no digits */
+
+	if (tty > 1)
+		return;   /* only ttyS0 and ttyS1 supported */
+
+	if (*option == '\0' || *option == ' ')
+		goto save_tty; /* no options given, just ttyS? */
+
+	if (*option != ',')
+		return;  /* missing the comma separator */
+
+	/* baud rate must follow */
+	option++;
+	baud_rate = simple_strtoul(option, &end, 10);
+
+	if (end == option)
+		return;  /* no baudrate after comma */
+
+	if (baud_rate == 0 || (115200 % baud_rate) != 0)
+		return;  /* wrong baud rate */
+
+	if (*end == '\0' || *end == ' ')
+		goto save_baud_rate;  /* no more options given */
+
+	switch (toupper(*end)) {
+		case 'N':
+			parity = 0;
+			break;
+		case 'O':
+			parity = UART_LCR_PARITY;
+			break;
+		case 'E':
+			parity = UART_LCR_PARITY | UART_LCR_EPAR;
+			break;
+		default:
+			/* Unknown parity */
+			return;
+	}
+
+	end++;
+	if (*end == '\0' || *end == ' ')
+		goto save_parity;
+
+	/* word length (bits) */
+	if (*end < '7' || *end > '8')
+		return;  /* invalid number of bits */
+
+	bits = *end - '0';
+
+	end++;
+
+	if (*end != '\0' || *end != ' ')
+		return;  /* garbage at the end */
+
+	serial_bits = bits;
+	save_parity:
+	serial_parity = parity;
+	save_baud_rate:
+	serial_baud_rate = (int) baud_rate;
+  save_tty:
+	serial_tty = (short) tty;
+	serial_cons = 1;
+}
+
diff --git a/linuxbios.c b/linuxbios.c
new file mode 100644
index 0000000..db0b0f8
--- /dev/null
+++ b/linuxbios.c
@@ -0,0 +1,160 @@
+/* linuxbios.c - MemTest-86  Version 3.3
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Eric Biederman
+ */
+
+#include "linuxbios_tables.h"
+#include "test.h"
+
+static unsigned long ip_compute_csum(void *addr, unsigned long length)
+{
+	uint16_t *ptr;
+	unsigned long sum;
+	unsigned long len;
+	unsigned long laddr;
+	/* compute an ip style checksum */
+	laddr = (unsigned long )addr;
+	sum = 0;
+	if (laddr & 1) {
+		uint16_t buffer;
+		unsigned char *ptr;
+		/* copy the first byte into a 2 byte buffer.
+		 * This way automatically handles the endian question
+		 * of which byte (low or high) the last byte goes in.
+		 */
+		buffer = 0;
+		ptr = addr;
+		memmove(&buffer, ptr, 1);
+		sum += buffer;
+		if (sum > 0xFFFF)
+			sum -= 0xFFFF;
+		length -= 1;
+		addr = ptr +1;
+		
+	}
+	len = length >> 1;
+	ptr = addr;
+	while (len--) {
+		sum += *(ptr++);
+		if (sum > 0xFFFF)
+			sum -= 0xFFFF;
+	}
+	addr = ptr;
+	if (length & 1) {
+		uint16_t buffer;
+		unsigned char *ptr;
+		/* copy the last byte into a 2 byte buffer.
+		 * This way automatically handles the endian question
+		 * of which byte (low or high) the last byte goes in.
+		 */
+		buffer = 0;
+		ptr = addr;
+		memmove(&buffer, ptr, 1);
+		sum += buffer;
+		if (sum > 0xFFFF)
+			sum -= 0xFFFF;
+	}
+	return (~sum) & 0xFFFF;
+	
+}
+
+#define for_each_lbrec(head, rec) \
+	for(rec = (struct lb_record *)(((char *)head) + sizeof(*head)); \
+		(((char *)rec) < (((char *)head) + sizeof(*head) + head->table_bytes))  && \
+		(rec->size >= 1) && \
+		((((char *)rec) + rec->size) <= (((char *)head) + sizeof(*head) + head->table_bytes)); \
+		rec = (struct lb_record *)(((char *)rec) + rec->size)) 
+		
+
+static int count_lb_records(struct lb_header *head)
+{
+	struct lb_record *rec;
+	int count;
+	count = 0;
+	for_each_lbrec(head, rec) {
+		count++;
+	}
+	return count;
+}
+
+static struct lb_header * __find_lb_table(unsigned long start, unsigned long end)
+{
+	unsigned long addr;
+	/* For now be stupid.... */
+	for(addr = start; addr < end; addr += 16) {
+		struct lb_header *head = (struct lb_header *)addr;
+		struct lb_record *recs = (struct lb_record *)(addr + sizeof(*head));
+		if (memcmp(head->signature, "LBIO", 4) != 0)
+			continue;
+		if (head->header_bytes != sizeof(*head))
+			continue;
+		if (ip_compute_csum((unsigned char *)head, sizeof(*head)) != 0)
+			continue;
+		if (ip_compute_csum((unsigned char *)recs, head->table_bytes) 
+			!= head->table_checksum)
+			continue;
+		if (count_lb_records(head) != head->table_entries)
+			continue;
+		return head;
+	};
+	return 0;
+}
+
+static struct lb_header * find_lb_table(void)
+{
+	struct lb_header *head;
+	head = 0;
+	if (!head) {
+		/* First try at address 0 */
+		head = __find_lb_table(0x00000, 0x1000);
+	}
+	if (!head) {
+		/* Then try at address 0xf0000 */
+		head = __find_lb_table(0xf0000, 0x100000);
+	}
+	return head;
+}
+
+int query_linuxbios(void)
+{
+	struct lb_header *head;
+	struct lb_record *rec;
+	struct lb_memory *mem;
+	int i, entries;
+	head = find_lb_table();
+	if (!head) {
+		return 0;
+	}
+	mem = 0;
+	for_each_lbrec(head, rec) {
+		if (rec->tag == LB_TAG_MEMORY) {
+			mem = (struct lb_memory *)rec;
+			break;
+		}
+	}
+	if (!mem) {
+		return 1;
+	}
+	entries = (mem->size - sizeof(*mem))/sizeof(mem->map[0]);
+	if (entries == 0)
+		return 1;
+	mem_info.e820_nr = 0;
+	for(i = 0; i < entries; i++) {
+		unsigned long long start;
+		unsigned long long size;
+		unsigned long type;
+		if (i >= E820MAX) {
+			break;
+		}
+		start = mem->map[i].start;
+		size = mem->map[i].size;
+		type = (mem->map[i].type == LB_MEM_RAM)?E820_RAM: E820_RESERVED;
+		mem_info.e820[mem_info.e820_nr].addr = start;
+		mem_info.e820[mem_info.e820_nr].size = size;
+		mem_info.e820[mem_info.e820_nr].type = type;
+		mem_info.e820_nr++;
+	}
+	return 1;
+}
+
diff --git a/linuxbios_tables.h b/linuxbios_tables.h
new file mode 100644
index 0000000..d312173
--- /dev/null
+++ b/linuxbios_tables.h
@@ -0,0 +1,82 @@
+#ifndef LINUXBIOS_TABLES_H
+#define LINUXBIOS_TABLES_H
+
+#include "stdint.h"
+
+/* The linuxbios table information is for conveying information
+ * from the firmware to the loaded OS image.  Primarily this
+ * is expected to be information that cannot be discovered by
+ * other means, such as quering the hardware directly.
+ *
+ * All of the information should be Position Independent Data.  
+ * That is it should be safe to relocated any of the information
+ * without it's meaning/correctnes changing.   For table that
+ * can reasonably be used on multiple architectures the data
+ * size should be fixed.  This should ease the transition between
+ * 32 bit and 64 bit architectures etc.
+ *
+ * The completeness test for the information in this table is:
+ * - Can all of the hardware be detected?
+ * - Are the per motherboard constants available?
+ * - Is there enough to allow a kernel to run that was written before
+ *   a particular motherboard is constructed? (Assuming the kernel
+ *   has drivers for all of the hardware but it does not have
+ *   assumptions on how the hardware is connected together).
+ *
+ * With this test it should be straight forward to determine if a
+ * table entry is required or not.  This should remove much of the
+ * long term compatibility burden as table entries which are
+ * irrelevant or have been replaced by better alternatives may be
+ * dropped.  Of course it is polite and expidite to include extra
+ * table entries and be backwards compatible, but it is not required.
+ */
+
+
+struct lb_header
+{
+	uint8_t  signature[4]; /* LBIO */
+	uint32_t header_bytes;
+	uint32_t header_checksum;
+	uint32_t table_bytes;
+	uint32_t table_checksum;
+	uint32_t table_entries;
+};
+
+/* Every entry in the boot enviroment list will correspond to a boot
+ * info record.  Encoding both type and size.  The type is obviously
+ * so you can tell what it is.  The size allows you to skip that
+ * boot enviroment record if you don't know what it easy.  This allows
+ * forward compatibility with records not yet defined.
+ */
+struct lb_record {
+	uint32_t tag;		/* tag ID */
+	uint32_t size;		/* size of record (in bytes) */
+};
+
+#define LB_TAG_UNUSED	0x0000
+
+#define LB_TAG_MEMORY	0x0001
+
+struct lb_memory_range {
+	uint64_t start;
+	uint64_t size;
+	uint32_t type;
+#define LB_MEM_RAM      1
+#define LB_MEM_RESERVED 2
+	
+};
+
+struct lb_memory {
+	uint32_t tag;
+	uint32_t size;
+	struct lb_memory_range map[0];
+};
+
+#define LB_TAG_HWRPB	0x0002
+struct lb_hwrpb {
+	uint32_t tag;
+	uint32_t size;
+	uint64_t hwrpb;
+};
+
+#endif /* LINUXBIOS_TABLES_H */
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..e1c8470
--- /dev/null
+++ b/main.c
@@ -0,0 +1,973 @@
+/* main.c - MemTest-86  Version 3.5
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+#include "stdint.h"
+#include "stddef.h"
+#include "test.h"
+#include "defs.h"
+#include "smp.h"
+#undef TEST_TIMES
+#define DEFTESTS 12
+
+/* The main stack is allocated during boot time. The stack size should
+ * preferably be a multiple of page size(4Kbytes)
+*/
+#define STACKSIZE (6*1024)
+#define MAX_MEM   0x1000000	/* 64 GB */
+#define TWO_GB    0x80000	/* 2 GB */
+
+extern void bzero();
+extern ulong rand(int cpu);
+extern void get_mem_speed(int cpu, int ncpus);
+extern void rand_seed(unsigned int seed1, unsigned int seed2, int cpu);
+
+const struct tseq tseq[] = {
+	{-1,  0,   6, 0, "[Address test, walking ones, no cache] "},
+	{-1,  1,   6, 0, "[Address test, own address Seqential]  "},
+	{16,  2,   6, 0, "[Address test, own address Parallel]   "},
+	{-1,  3,   6, 0, "[Moving inversions, 1s & 0s] Seqential]"},
+	{16,  4,   6, 0, "[Moving inversions, 1s & 0s] Parallel] "},
+	{16,  5,   3, 0, "[Moving inversions, 8 bit pattern]     "},
+	{16,  6,  30, 0, "[Moving inversions, random pattern]    "},
+	{16,  7,  81, 0, "[Block move]                           "},
+	{16,  8,   3, 0, "[Moving inversions, 32 bit pattern]    "},
+	{16,  9,  24, 0, "[Random number sequence]               "},
+        {16, 10,   6, 0, "[Modulo 20, Random pattern]            "},
+	{1,  11, 300, 0, "[Bit fade test, 2 patterns]            "},
+	{0,   0,   0, 0, NULL}
+};
+
+extern struct barrier_s *barr;
+extern unsigned num_cpus;
+volatile int    mstr_cpu;
+volatile int	run_cpus;
+volatile int	test;
+volatile short  cpu_sel = 0;
+bool            smp_mode = TRUE;
+bool	        restart_pending = FALSE;
+uint8_t         volatile stacks[MAX_CPUS][STACKSIZE];
+int bitf_seq = 0;
+
+char cmdline_parsed = 0;
+struct vars variables = {};
+struct vars * const v = &variables;
+
+volatile int bail = 0;
+int test_ticks;
+volatile int segs;
+int nticks;
+ulong high_test_adr;
+
+volatile short start_seq = 0;
+volatile short cpu_mode = CPM_ALL;
+static int c_iter;
+volatile static int window;
+volatile static unsigned long win_next;
+volatile static ulong win0_start;	/* Start test address for window 0 */
+volatile static ulong win1_end;		/* End address for relocation */
+volatile static struct pmap winx;  	/* Window struct for mapping windows */
+
+#if (LOW_TEST_ADR > (400*1024))
+#error LOW_TEST_ADR must be below 400K
+#endif
+
+static int find_ticks_for_test(int test);
+void find_ticks_for_pass(void);
+int find_chunks(int test);
+static void test_setup(void);
+static int compute_segments(struct pmap map);
+int do_test(int cpu);
+
+/* Relocate the test to a new address. Be careful to not overlap! */
+static void run_at(unsigned long addr, int cpu)
+{
+	ulong *ja = (ulong *)(addr + startup_32 - _start);
+
+	/* CPU 0, Copy memtest86 code */
+	if (cpu == 0) {
+		memmove((void *)addr, &_start, _end - _start);
+	}
+
+	/* Wait for the copy */
+	barrier();
+
+	/* We use a lock to insure that only one CPU at a time jumps to
+	 * the new code. Some of the startup stuff is not thread safe! */
+        spin_lock(&barr->mutex);   
+
+	/* Jump to the start address */
+	goto *ja;
+}
+
+/* Switch from the boot stack to the main stack. First the main stack
+ * is allocated, then the contents of the boot stack are copied, then
+ * ESP is adjusted to point to the new stack.  
+ */
+static void
+switch_to_main_stack(unsigned cpu_num)
+{
+	extern uintptr_t boot_stack;
+	extern uintptr_t boot_stack_top; 
+	uintptr_t *src, *dst;
+	int offs;
+	uint8_t * stackAddr, *stackTop;
+   
+	stackAddr = (uint8_t *) &stacks[cpu_num][0];
+
+	stackTop  = stackAddr + STACKSIZE;
+   
+	src = (uintptr_t*)&boot_stack_top;
+	dst = (uintptr_t*)stackTop;
+	do {
+		src--; dst--;
+		*dst = *src;
+	} while ((uintptr_t *)src > (uintptr_t *)&boot_stack);
+
+	offs = (uint8_t *)&boot_stack_top - stackTop;
+	__asm__ __volatile__ (
+	"subl %%eax, %%esp" 
+		: /*no output*/
+		: "a" (offs) : "memory" 
+	);
+}
+
+void restart_internal(int cpu)
+{
+	/* clear variables */
+	smp_mode        = TRUE;
+        restart_pending = FALSE;
+
+	run_at(LOW_TEST_ADR, cpu);
+}
+
+void restart(void)
+{
+	bail++;
+        restart_pending = TRUE;
+}
+
+void initialise_cpus(void)
+{
+        int cpu_num;
+
+        smp_init_bsp();
+
+	/* Initialize the barrier before starting AP's */
+	barrier_init(num_cpus);
+
+        /* let the BSP initialise the APs. */
+        for(cpu_num = 1; cpu_num < num_cpus; cpu_num++) {
+             smp_boot_ap(cpu_num);
+        }
+}
+
+/* command line passing using the 'old' boot protocol */
+#define MK_PTR(seg,off) ((void*)(((unsigned long)(seg) << 4) + (off)))
+#define OLD_CL_MAGIC_ADDR ((unsigned short*) MK_PTR(INITSEG,0x20))
+#define OLD_CL_MAGIC 0xA33F 
+#define OLD_CL_OFFSET_ADDR ((unsigned short*) MK_PTR(INITSEG,0x22))
+
+static void parse_command_line(void)
+{
+	char *cmdline;
+
+	if (cmdline_parsed)
+		return;
+
+	if (*OLD_CL_MAGIC_ADDR != OLD_CL_MAGIC)
+		return;
+
+	unsigned short offset = *OLD_CL_OFFSET_ADDR;
+	cmdline = MK_PTR(INITSEG, offset);
+
+	/* skip leading spaces */
+	while (*cmdline == ' ')
+		cmdline++;
+
+	while (*cmdline) {
+		if (!strncmp(cmdline, "console=", 8)) {
+			cmdline += 8;
+			serial_console_setup(cmdline);
+		}
+
+		/* go to the next parameter */
+		while (*cmdline && *cmdline != ' ')
+			cmdline++;
+		while (*cmdline == ' ')
+			cmdline++;
+	}
+
+	cmdline_parsed = 1;
+}
+
+/* This is the test entry point. We get here on statup and also whenever
+ * we relocate. */
+void test_start(void)
+{
+	int my_cpu_num, run;
+
+
+	/* First thing, switch to main stack */
+	my_cpu_num = smp_my_cpu_num();
+	switch_to_main_stack(my_cpu_num);
+
+	/* First time initialization */
+	if (start_seq == 0) {
+
+	    /* These steps are only done by the boot cpu */
+	    if (my_cpu_num == 0) {
+		parse_command_line();
+		mem_size();	/* must be called before initialise_cpus(); */
+		initialise_cpus();
+	   	init();
+
+		test = 0;
+	        win_next = 0;
+	        window = 0;
+	        bail = 0;
+	
+		/* Setup base address for testing */
+		win0_start = (LOW_TEST_ADR+(_end - _start)+8191) >> 12;
+
+		/* Set relocation address to 32Mb if there is enough
+		 * memory. Otherwise set it to 3Mb */
+		/* Large reloc addr allows for more testing overlap */
+	        if ((ulong)v->pmap[v->msegs-1].end > 0x2f00) {
+			high_test_adr = 0x2000000;
+	        } else {
+			high_test_adr = 0x300000;
+		} 
+		win1_end = (high_test_adr >> 12);
+
+		/* Adjust the map to not test the page at 939k,
+		 *  reserved for locks */
+		v->pmap[0].end--;
+
+		find_ticks_for_pass();
+       	    } else {
+		/* AP only, Register the APs */
+		smp_ap_booted(my_cpu_num);
+	    }
+	} else {
+	    /* Unlock after a relocation restart */
+	    spin_unlock(&barr->mutex);
+	}
+
+	/* A barrier to insure that all of the CPUs are done with startup */
+	barrier();
+
+	/* Measure memory speed, we do it here because we need all of the
+	 * available CPUs */
+	if (start_seq == 0) {
+		get_mem_speed(my_cpu_num, num_cpus);
+	}
+
+	/* Set the initialized flag only after all of the CPU's have
+	 * Reached the barrier. This insures that relocation has
+	 * been completed for each CPU. */
+	start_seq = 1;
+
+	/* Loop through all tests */
+	while (1) {
+	    /* Skip tests 2 and 4 if we are using only one CPU */
+	    if (tseq[test].pat == 2 || tseq[test].pat == 4) {
+		if (num_cpus == 1 || cpu_mode != CPM_ALL) {
+			test++;
+			continue;
+		}
+	    }
+
+	    test_setup();
+
+	    /* Loop through all possible windows */
+	    while (win_next <= ((ulong)v->pmap[v->msegs-1].end + TWO_GB)) {
+
+		/* Main scheduling barrier */
+		cprint(8, 2*my_cpu_num+7, "W");
+		barrier();
+
+		/* Don't go over the 64 GB PAE limit */
+		if (win_next > MAX_MEM) {
+			break;
+		}
+
+		/* For the bit fade test, #11, we cannot relocate so bump the
+		 * window to 1 */
+		if (tseq[test].pat == 11 && window == 0) {
+			window = 1;
+		}
+
+		/* Relocate if required */
+		if (window != 0 && (ulong)&_start != LOW_TEST_ADR) {
+			run_at(LOW_TEST_ADR, my_cpu_num);
+	        }
+		if (window == 0 && (ulong)&_start == LOW_TEST_ADR) {
+			run_at(high_test_adr, my_cpu_num);
+		}
+
+		/* Decide which CPU(s) to use */
+		run = 1;
+		switch(cpu_mode) {
+		case CPM_RROBIN:
+		case CPM_SEQ:
+			/* Select a single CPU */
+			if (my_cpu_num == cpu_sel) {
+				mstr_cpu = cpu_sel;
+				run_cpus = 1;
+	    		} else {
+				run = 0;
+			}
+			break;
+		case CPM_ALL:
+		    /* Use all CPUs */
+		    if (tseq[test].cpu_sel == -1) {
+			/* Round robin through all of the CPUs */
+			if (my_cpu_num == cpu_sel) {
+				mstr_cpu = cpu_sel;
+				run_cpus = 1;
+	    		} else {
+				run = 0;
+			}
+		    } else {
+			/* Use the number of CPUs specified by the test,
+			 * Starting with zero */
+			if (my_cpu_num >= tseq[test].cpu_sel) {
+				run = 0;
+			}
+			/* Set the master CPU to the highest CPU number 
+			 * that has been selected */
+			if (num_cpus < tseq[test].cpu_sel) {
+				mstr_cpu = num_cpus-1;
+				run_cpus = num_cpus;
+			} else {
+				mstr_cpu = tseq[test].cpu_sel-1;
+				run_cpus = tseq[test].cpu_sel;
+			}
+		    }
+		}
+		barrier();
+		dprint(8, 54, run_cpus, 2, 0);
+
+		/* Setup a sub barrier for only the selected CPUs */
+		if (my_cpu_num == mstr_cpu) {
+			s_barrier_init(run_cpus);
+		}
+
+		/* Make sure the the sub barrier is ready before proceeding */
+		barrier();
+
+		/* Not selected CPUs go back to the scheduling barrier */
+		if (run == 0 ) {
+			continue;
+		}
+		cprint(8, 2*my_cpu_num+7, "-");
+
+		/* Do we need to exit */
+		if(restart_pending) {
+		    restart_internal(my_cpu_num);
+	 	}
+
+		if (my_cpu_num == mstr_cpu) {
+		    switch (window) {
+		    /* Special case for relocation */
+		    case 0:
+			winx.start = 0;
+			winx.end = win1_end;
+			window++;
+			break;
+		    /* Special case for first 2 GB */
+		    case 1:
+			winx.start = win0_start;
+			winx.end = TWO_GB;
+			win_next += TWO_GB;
+			window++;
+			break;
+		    /* For all other windows */
+		    default:
+			winx.start = win_next;
+			win_next += TWO_GB;
+			winx.end = win_next;
+		    }
+
+	            /* Find the memory areas to test */
+	            segs = compute_segments(winx);
+		}
+		s_barrier();
+
+	        if (segs == 0) {
+		/* No memory in this window so skip it */
+		    continue;
+	        }
+
+		/* map in the window... */
+		if (map_page(v->map[0].pbase_addr) < 0) {
+		    continue;
+		}
+
+		do_test(my_cpu_num);
+		if (bail) {
+		    break;
+		}
+
+            	paging_off();
+
+	    } /* End of window loop */
+
+	    s_barrier();
+
+	    /* Setup for the next set of windows */
+	    win_next = 0;
+	    window = 0;
+	    bail = 0;
+
+	    /* Only the master CPU does the end of test housekeeping */
+	    if (my_cpu_num != mstr_cpu) {
+		continue;
+	    }
+
+	    /* Special handling for the bit fade test #11 */
+	    if (tseq[test].pat == 11 && bitf_seq != 6) {
+		/* Keep going until the sequence is complete. */
+		bitf_seq++;
+		continue;
+	    } else {
+		bitf_seq = 0;
+	    }
+
+	    /* Select advancement of CPUs and next test */
+	    switch(cpu_mode) {
+	    case CPM_RROBIN:
+		if (++cpu_sel >= num_cpus) {
+		    cpu_sel = 0;
+		}
+		test++;
+		break;
+	    case CPM_SEQ:
+		if (++cpu_sel >= num_cpus) {
+		    cpu_sel = 0;
+		    test++;
+		}
+		break;
+	    case CPM_ALL:
+	        if (tseq[test].cpu_sel == -1) {
+		    /* Do the same test for each CPU */
+		    if (++cpu_sel >= num_cpus) {
+		        cpu_sel = 0;
+		        test++;
+		    } else {
+		        continue;
+		    }
+	        } else {
+		    test++;
+		}
+	    }
+
+	    /* If this was the last test then we finished a pass */
+	    if (test >= DEFTESTS ||
+			(v->testsel >= 0 && cpu_sel == (num_cpus-1))) {
+		v->pass++;
+		dprint(LINE_INFO, 55, v->pass, 5, 0);
+		v->total_ticks = 0;
+		find_ticks_for_pass();
+		cprint(1, COL_MID+8,
+			"                                         ");
+		if (v->ecount == 0 && v->testsel < 0) {
+		    cprint(LINE_MSG, COL_MSG,
+			"Pass complete, no errors, press Esc to exit");
+		}
+	    }
+	    if (test >= DEFTESTS) {
+		test = 0;
+	    }
+
+	    bail=0;
+	} /* End test loop */
+}
+
+void test_setup()
+{
+	static int ltest = -1;
+
+	/* Only do the setup if this is a new test */
+	if (test == ltest) {
+		return;
+	}
+	ltest = test;
+
+	/* Now setup the test parameters based on the current test number */
+	if (v->pass == 0) {
+		/* Reduce iterations for first pass */
+		c_iter = tseq[test].iter/3;
+	} else {
+		c_iter = tseq[test].iter;
+	}
+
+	/* Set the number of iterations. We only do half of the iterations */
+        /* on the first pass */
+	dprint(LINE_INFO, 28, c_iter, 3, 0);
+	test_ticks = find_ticks_for_test(test);
+	nticks = 0;
+	v->tptr = 0;
+
+	cprint(LINE_PAT, COL_PAT, "            ");
+	cprint(LINE_PAT, COL_PAT-3, "   ");
+	dprint(LINE_TST, COL_MID+6, test, 2, 1);
+	cprint(LINE_TST, COL_MID+9, tseq[test].msg);
+	cprint(2, COL_MID+8, "                                         ");
+}
+
+/* A couple static variables for when all cpus share the same pattern */
+static ulong sp1, sp2;
+
+int do_test(int my_cpu)
+{
+	int i=0, j=0;
+	static int bitf_sleep;
+	unsigned long p0=0, p1=0, p2=0;
+
+	if (my_cpu == mstr_cpu) {
+	    if ((ulong)&_start > LOW_TEST_ADR) {
+		/* Relocated so we need to test all selected lower memory */
+		v->map[0].start = mapping(v->plim_lower);
+		cprint(LINE_PAT, COL_MID+28, " Relocated");
+	    } else {
+		cprint(LINE_PAT, COL_MID+28, "          ");
+	    }
+
+	    /* Update display of memory segments being tested */
+	    p0 = page_of(v->map[0].start);
+	    p1 = page_of(v->map[segs-1].end);
+	    aprint(LINE_RANGE, COL_MID+9, p0);
+	    cprint(LINE_RANGE, COL_MID+14, " - ");
+	    aprint(LINE_RANGE, COL_MID+17, p1);
+	    aprint(LINE_RANGE, COL_MID+25, p1-p0);
+	    cprint(LINE_RANGE, COL_MID+30, " of ");
+	    aprint(LINE_RANGE, COL_MID+34, v->selected_pages);
+	}
+	
+	switch(tseq[test].pat) {
+
+	/* Do the testing according to the selected pattern */
+
+	case 0: /* Address test, walking ones (test #0) */
+		/* Run with cache turned off */
+		set_cache(0);
+		addr_tst1(my_cpu);
+		set_cache(1);
+		BAILOUT;
+		break;
+
+	case 1:
+	case 2: /* Address test, own address (test #1, 2) */
+		addr_tst2(my_cpu);
+		BAILOUT;
+		break;
+
+	case 3:
+	case 4:	/* Moving inversions, all ones and zeros (tests #3, 4) */
+		p1 = 0;
+		p2 = ~p1;
+		s_barrier();
+		movinv1(c_iter,p1,p2,my_cpu);
+		BAILOUT;
+	
+		/* Switch patterns */
+		s_barrier();
+		movinv1(c_iter,p2,p1,my_cpu);
+		BAILOUT;
+		break;
+		
+	case 5: /* Moving inversions, 8 bit walking ones and zeros (test #5) */
+		p0 = 0x80;
+		for (i=0; i<8; i++, p0=p0>>1) {
+			p1 = p0 | (p0<<8) | (p0<<16) | (p0<<24);
+			p2 = ~p1;
+			s_barrier();
+			movinv1(c_iter,p1,p2, my_cpu);
+			BAILOUT;
+	
+			/* Switch patterns */
+			s_barrier();
+			movinv1(c_iter,p2,p1, my_cpu);
+			BAILOUT
+		}
+		break;
+
+	case 6: /* Random Data (test #6) */
+		/* Seed the random number generator */
+		if (my_cpu == mstr_cpu) {
+		    if (v->rdtsc) {
+                	asm __volatile__ ("rdtsc":"=a" (sp1),"=d" (sp2));
+        	    } else {
+                	sp1 = 521288629 + v->pass;
+                	sp2 = 362436069 - v->pass;
+        	    }
+		    rand_seed(sp1, sp2, 0);
+		}
+
+		s_barrier();
+		for (i=0; i < c_iter; i++) {
+			if (my_cpu == mstr_cpu) {
+				sp1 = rand(0);
+				sp2 = ~p1;
+			}
+			s_barrier();
+			movinv1(2,sp1,sp2, my_cpu);
+			BAILOUT;
+		}
+		break;
+
+	case 7: /* Block move (test #7) */
+		block_move(c_iter, my_cpu);
+		BAILOUT;
+		break;
+
+	case 8: /* Moving inversions, 32 bit shifting pattern (test #8) */
+		for (i=0, p1=1; p1; p1=p1<<1, i++) {
+			s_barrier();
+			movinv32(c_iter,p1, 1, 0x80000000, 0, i, my_cpu);
+			BAILOUT
+			s_barrier();
+			movinv32(c_iter,~p1, 0xfffffffe,
+				0x7fffffff, 1, i, my_cpu);
+			BAILOUT
+		}
+		break;
+
+	case 9: /* Random Data Sequence (test #9) */
+		for (i=0; i < c_iter; i++) {
+			s_barrier();
+			movinvr(my_cpu);
+			BAILOUT;
+		}
+		break;
+
+	case 10: /* Modulo 20 check, Random pattern (test #10) */
+		for (j=0; j<c_iter; j++) {
+			p1 = rand(0);
+			for (i=0; i<MOD_SZ; i++) {
+				p2 = ~p1;
+				s_barrier();
+				modtst(i, 2, p1, p2, my_cpu);
+				BAILOUT
+
+				/* Switch patterns */
+				s_barrier();
+				modtst(i, 2, p2, p1, my_cpu);
+				BAILOUT
+			}
+		}
+		break;
+
+	case 11: /* Bit fade test, fill (test #11) */
+		/* Use a sequence to process all windows for each stage */
+		switch(bitf_seq) {
+		case 0:	/* Fill all of memory 0's */
+			bit_fade_fill(0, my_cpu);
+			bitf_sleep = 1;
+			break;
+		case 1: /* Sleep for the specified time */
+			/* Only sleep once */
+			if (bitf_sleep) {
+				sleep(c_iter, 1, my_cpu);
+				bitf_sleep = 0;
+			}
+			break;
+		case 2: /* Now check all of memory for changes */
+			bit_fade_chk(0, my_cpu);
+			break;
+		case 3:	/* Fill all of memory 1's */
+			bit_fade_fill(-1, my_cpu);
+			bitf_sleep = 1;
+			break;
+		case 4: /* Sleep for the specified time */
+			/* Only sleep once */
+			if (bitf_sleep) {
+				sleep(c_iter, 1, my_cpu);
+				bitf_sleep = 0;
+			}
+			break;
+		case 5: /* Now check all of memory for changes */
+			bit_fade_chk(-1, my_cpu);
+			break;
+		}
+		BAILOUT;
+		break;
+
+	case 90: /* Modulo 20 check, all ones and zeros (unused) */
+		p1=0;
+		for (i=0; i<MOD_SZ; i++) {
+			p2 = ~p1;
+			modtst(i, c_iter, p1, p2, my_cpu);
+			BAILOUT
+
+			/* Switch patterns */
+			p2 = p1;
+			p1 = ~p2;
+			modtst(i, c_iter, p1,p2, my_cpu);
+			BAILOUT
+		}
+		break;
+
+	case 91: /* Modulo 20 check, 8 bit pattern (unused) */
+		p0 = 0x80;
+		for (j=0; j<8; j++, p0=p0>>1) {
+			p1 = p0 | (p0<<8) | (p0<<16) | (p0<<24);
+			for (i=0; i<MOD_SZ; i++) {
+				p2 = ~p1;
+				modtst(i, c_iter, p1, p2, my_cpu);
+				BAILOUT
+
+				/* Switch patterns */
+				p2 = p1;
+				p1 = ~p2;
+				modtst(i, c_iter, p1, p2, my_cpu);
+				BAILOUT
+			}
+		}
+		break;
+	}
+	return(0);
+}
+
+/* Compute number of SPINSZ chunks being tested */
+int find_chunks(int tst) 
+{
+	int i, j, sg, wmax, ch;
+	struct pmap twin={0,0};
+	unsigned long wnxt = TWO_GB;
+	unsigned long len;
+
+	wmax = MAX_MEM/TWO_GB+2;  /* The number of 2 GB segments +2 */
+	/* Compute the number of SPINSZ memory segments */
+	ch = 0;
+	for(j = 0; j < wmax; j++) {
+		/* special case for relocation */
+		if (j == 0) {
+			twin.start = 0;
+			twin.end = win1_end;
+		}
+
+		/* special case for first 2 GB */
+		if (j == 1) {
+			twin.start = win0_start;
+			twin.end = TWO_GB;
+		}
+
+		/* For all other windows */
+		if (j > 1) {
+			twin.start = wnxt;
+			wnxt += TWO_GB;
+			twin.end = wnxt;
+		}
+
+	        /* Find the memory areas I am going to test */
+		sg = compute_segments(twin);
+		for(i = 0; i < sg; i++) {
+			len = v->map[i].end - v->map[i].start;
+
+			if (cpu_mode == CPM_ALL && num_cpus > 1) {
+				switch(tseq[tst].pat) {
+				case 2:
+				case 4:
+				case 5:
+				case 6:
+				case 9:
+				case 10:
+				    len /= num_cpus;
+				    break;
+				case 7:
+				case 8:
+				    len /= (num_cpus & 0xe);
+				    break;
+				}
+			}
+			ch += (len + SPINSZ -1)/SPINSZ;
+		}
+	}
+	return(ch);
+}
+
+/* Compute the total number of ticks per pass */
+void find_ticks_for_pass(void)
+{
+	int i;
+
+	v->pptr = 0;
+	v->pass_ticks=0;
+	if (v->testsel >= 0) {
+		v->pass_ticks = find_ticks_for_test(v->testsel);
+	} else {
+		for (i=0; i<DEFTESTS; i++) {
+			/* Skip tests 2 and 4 if we are using 1 cpu */
+			if (num_cpus == 1 && (i == 2 || i == 4)) { 
+			    continue;
+			}
+			v->pass_ticks += find_ticks_for_test(i);
+		}
+	}
+}
+
+static int find_ticks_for_test(int tst)
+{
+	int ticks=0, c, ch;
+
+	/* Determine the number of chunks for this test */
+	ch = find_chunks(tst);
+
+	/* Set the number of iterations. We only do 1/3 of the iterations */
+        /* on the first pass */
+	if (v->pass == 0) {
+		c = tseq[tst].iter/3;
+	} else {
+		c = tseq[tst].iter;
+	}
+
+	switch(tseq[tst].pat) {
+	case 0: /* Address test, walking ones */
+		ticks = 2;
+		break;
+	case 1: /* Address test, own address */
+	case 2:
+		ticks = 2;
+		break;
+	case 3: /* Moving inversions, all ones and zeros */
+	case 4:
+		ticks = 2 + 4 * c;
+		break;
+	case 5: /* Moving inversions, 8 bit walking ones and zeros */
+		ticks = 24 + 24 * c;
+		break;
+	case 6: /* Random Data */
+		ticks = c + 4 * c;
+		break;
+	case 7: /* Block move */
+		ticks = (ch + ch/num_cpus + c*ch);
+		break;
+	case 8: /* Moving inversions, 32 bit shifting pattern */
+		ticks = (1 + c * 2) * 64;
+		break;
+	case 9: /* Random Data Sequence */
+		ticks = 3 * c;
+		break;
+	case 10: /* Modulo 20 check, Random pattern */
+		ticks = 4 * 40 * c;
+		break;
+	case 11: /* Bit fade test */
+		ticks = c * 2 + 4 * ch;
+		break;
+	case 90: /* Modulo 20 check, all ones and zeros (unused) */
+		ticks = (2 + c) * 40;
+		break;
+	case 91: /* Modulo 20 check, 8 bit pattern (unused) */
+		ticks = (2 + c) * 40 * 8;
+		break;
+	}
+	if (cpu_mode == CPM_SEQ || tseq[tst].cpu_sel == -1) {
+		ticks *= num_cpus;
+	}
+	if (tseq[tst].pat == 7 || tseq[tst].pat == 11) {
+		return ticks;
+	}
+	return ticks*ch;
+}
+
+static int compute_segments(struct pmap win)
+{
+	unsigned long wstart, wend;
+	int i, sg;
+
+	/* Compute the window I am testing memory in */
+	wstart = win.start;
+	wend = win.end;
+	sg = 0;
+
+	/* Now reduce my window to the area of memory I want to test */
+	if (wstart < v->plim_lower) {
+		wstart = v->plim_lower;
+	}
+	if (wend > v->plim_upper) {
+		wend = v->plim_upper;
+	}
+	if (wstart >= wend) {
+		return(0);
+	}
+	/* List the segments being tested */
+	for (i=0; i< v->msegs; i++) {
+		unsigned long start, end;
+		start = v->pmap[i].start;
+		end = v->pmap[i].end;
+		if (start <= wstart) {
+			start = wstart;
+		}
+		if (end >= wend) {
+			end = wend;
+		}
+#if 0
+		cprint(LINE_SCROLL+(2*i), 0, " (");
+		hprint(LINE_SCROLL+(2*i), 2, start);
+		cprint(LINE_SCROLL+(2*i), 10, ", ");
+		hprint(LINE_SCROLL+(2*i), 12, end);
+		cprint(LINE_SCROLL+(2*i), 20, ") ");
+
+		cprint(LINE_SCROLL+(2*i), 22, "r(");
+		hprint(LINE_SCROLL+(2*i), 24, wstart);
+		cprint(LINE_SCROLL+(2*i), 32, ", ");
+		hprint(LINE_SCROLL+(2*i), 34, wend);
+		cprint(LINE_SCROLL+(2*i), 42, ") ");
+
+		cprint(LINE_SCROLL+(2*i), 44, "p(");
+		hprint(LINE_SCROLL+(2*i), 46, v->plim_lower);
+		cprint(LINE_SCROLL+(2*i), 54, ", ");
+		hprint(LINE_SCROLL+(2*i), 56, v->plim_upper);
+		cprint(LINE_SCROLL+(2*i), 64, ") ");
+
+		cprint(LINE_SCROLL+(2*i+1),  0, "w(");
+		hprint(LINE_SCROLL+(2*i+1),  2, win.start);
+		cprint(LINE_SCROLL+(2*i+1), 10, ", ");
+		hprint(LINE_SCROLL+(2*i+1), 12, win.end);
+		cprint(LINE_SCROLL+(2*i+1), 20, ") ");
+
+		cprint(LINE_SCROLL+(2*i+1), 22, "m(");
+		hprint(LINE_SCROLL+(2*i+1), 24, v->pmap[i].start);
+		cprint(LINE_SCROLL+(2*i+1), 32, ", ");
+		hprint(LINE_SCROLL+(2*i+1), 34, v->pmap[i].end);
+		cprint(LINE_SCROLL+(2*i+1), 42, ") ");
+
+		cprint(LINE_SCROLL+(2*i+1), 44, "i=");
+		hprint(LINE_SCROLL+(2*i+1), 46, i);
+		
+		cprint(LINE_SCROLL+(2*i+2), 0, 
+			"                                        "
+			"                                        ");
+		cprint(LINE_SCROLL+(2*i+3), 0, 
+			"                                        "
+			"                                        ");
+#endif
+		if ((start < end) && (start < wend) && (end > wstart)) {
+			v->map[sg].pbase_addr = start;
+			v->map[sg].start = mapping(start);
+			v->map[sg].end = emapping(end);
+#if 0
+		hprint(LINE_SCROLL+(sg+1), 0, sg);
+		hprint(LINE_SCROLL+(sg+1), 12, v->map[sg].pbase_addr);
+		hprint(LINE_SCROLL+(sg+1), 22, start);
+		hprint(LINE_SCROLL+(sg+1), 32, end);
+		hprint(LINE_SCROLL+(sg+1), 42, mapping(start));
+		hprint(LINE_SCROLL+(sg+1), 52, emapping(end));
+		cprint(LINE_SCROLL+(sg+2), 0, 
+			"                                        "
+			"                                        ");
+#endif
+#if 0
+		cprint(LINE_SCROLL+(2*i+1), 54, ", sg=");
+		hprint(LINE_SCROLL+(2*i+1), 59, sg);
+#endif
+			sg++;
+		}
+	}
+	return (sg);
+}
+
diff --git a/makeiso.sh b/makeiso.sh
new file mode 100755
index 0000000..a660c47
--- /dev/null
+++ b/makeiso.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+# check to see if the correct tools are installed
+for X in wc mkisofs
+do
+	if [ "$(which $X)" = "" ]; then
+		echo "makeiso.sh error: $X is not in your path." >&2
+		exit 1
+	elif [ ! -x $(which $X) ]; then
+		echo "makeiso.sh error: $X is not executable." >&2
+		exit 1
+	fi 
+done
+
+#check to see if memtest.bin is present
+if [ ! -w memtest.bin ]; then 
+	echo "makeiso.sh error: cannot find memtest.bin, did you compile it?" >&2 
+	exit 1
+fi
+
+# enlarge the size of memtest.bin
+SIZE=$(wc -c memtest.bin | awk '{print $1}')
+FILL=$((1474560 - $SIZE))
+dd if=/dev/zero of=fill.tmp bs=$FILL count=1
+cat memtest.bin fill.tmp >memtest.img
+rm -f fill.tmp
+
+echo "Generating iso image ..."
+
+mkdir "cd"
+mkdir "cd/boot"
+mv memtest.img cd/boot
+cd cd
+
+# Create the cd.README
+echo -e "Memtest86 is located on the bootsector of this CD\r\r\n" > README.TXT
+echo -e "Just boot from this CD and Memtest86 will start" >> README.TXT
+
+mkisofs -p "Memtest86 4.0" -publisher "Chris S. Brady - BradyTech Inc." -b boot/memtest.img -c boot/boot.catalog -V "MT350" -o memtest.iso .
+mv memtest.iso ..
+cd ..
+rm -rf cd
+
+echo "Done"
diff --git a/memsize.c b/memsize.c
new file mode 100644
index 0000000..9833ba3
--- /dev/null
+++ b/memsize.c
@@ -0,0 +1,338 @@
+/* memsize.c - MemTest-86  Version 3.3
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+
+#include "test.h"
+#include "defs.h"
+#include "config.h"
+
+short e820_nr;
+short memsz_mode = SZ_MODE_BIOS;
+
+static ulong alt_mem_k;
+static ulong ext_mem_k;
+static struct e820entry e820[E820MAX];
+
+ulong p1, p2;
+ulong *p;
+
+static void sort_pmap(void);
+static void memsize_bios(void);
+static void memsize_820(void);
+static void memsize_801(void);
+static int sanitize_e820_map(struct e820entry *orig_map,
+	struct e820entry *new_bios, short old_nr);
+static void memsize_linuxbios();
+
+/*
+ * Find out how much memory there is.
+ */
+void mem_size(void)
+{
+	int i, flag=0;
+	v->test_pages = 0;
+
+	/* Get the memory size from the BIOS */
+        /* Determine the memory map */
+	if (query_linuxbios()) {
+		flag = 1;
+	} else if (query_pcbios()) {
+		flag = 2;
+	}
+
+	/* On the first time thru only */
+	/* Make a copy of the memory info table so that we can re-evaluate */
+	/* The memory map later */
+	if (e820_nr == 0 && alt_mem_k == 0 && ext_mem_k == 0) {
+		ext_mem_k = mem_info.e88_mem_k;
+		alt_mem_k = mem_info.e801_mem_k;
+		e820_nr   = mem_info.e820_nr;
+		for (i=0; i< mem_info.e820_nr; i++) {
+			e820[i].addr = mem_info.e820[i].addr;
+			e820[i].size = mem_info.e820[i].size;
+			e820[i].type = mem_info.e820[i].type;
+		}
+	}
+	if (flag == 1) {
+		memsize_linuxbios();
+	} else if (flag == 2) {
+		memsize_820();
+	}
+
+	/* Guarantee that pmap entries are in ascending order */
+	sort_pmap();
+	v->plim_lower = 0;
+	v->plim_upper = v->pmap[v->msegs-1].end;
+
+	adj_mem();
+}
+
+static void sort_pmap(void)
+{
+	int i, j;
+	/* Do an insertion sort on the pmap, on an already sorted
+	 * list this should be a O(1) algorithm.
+	 */
+	for(i = 0; i < v->msegs; i++) {
+		/* Find where to insert the current element */
+		for(j = i -1; j >= 0; j--) {
+			if (v->pmap[i].start > v->pmap[j].start) {
+				j++;
+				break;
+			}
+		}
+		/* Insert the current element */
+		if (i != j) {
+			struct pmap temp;
+			temp = v->pmap[i];
+			memmove(&v->pmap[j], &v->pmap[j+1], 
+				(i -j)* sizeof(temp));
+			v->pmap[j] = temp;
+		}
+	}
+}
+static void memsize_linuxbios(void)
+{
+	int i, n;
+	/* Build the memory map for testing */
+	n = 0;
+	for (i=0; i < e820_nr; i++) {
+		unsigned long long end;
+
+		if (e820[i].type != E820_RAM) {
+			continue;
+		}
+		end = e820[i].addr;
+		end += e820[i].size;
+		v->pmap[n].start = (e820[i].addr + 4095) >> 12;
+		v->pmap[n].end = end >> 12;
+		v->test_pages += v->pmap[n].end - v->pmap[n].start;
+		n++;
+	}
+	v->msegs = n;
+}
+static void memsize_820()
+{
+	int i, n, nr;
+	struct e820entry nm[E820MAX];
+	unsigned long long start;
+	unsigned long long end;
+
+	/* Clean up, adjust and copy the BIOS-supplied E820-map. */
+	nr = sanitize_e820_map(e820, nm, e820_nr);
+
+	/* If there is not a good 820 map use the BIOS 801/88 info */
+	if (nr < 1 || nr > E820MAX) {
+		memsize_801();
+		return;
+	}
+
+	/* Build the memory map for testing */
+	n = 0;
+	for (i=0; i<nr; i++) {
+		if (nm[i].type == E820_RAM || nm[i].type == E820_ACPI) {
+			start = nm[i].addr;
+			end = start + nm[i].size;
+
+			/* Don't ever use memory between 640 and 1024k */
+			if (start > RES_START && start < RES_END) {
+				if (end < RES_END) {
+					continue;
+				}
+				start = RES_END;
+			}
+			if (end > RES_START && end < RES_END) {
+				end = RES_START;
+			}
+			v->pmap[n].start = (start + 4095) >> 12;
+			v->pmap[n].end = end >> 12;
+			v->test_pages += v->pmap[n].end - v->pmap[n].start;
+			n++;
+		}
+	}
+	v->msegs = n;
+}
+	
+static void memsize_801(void)
+{
+	ulong mem_size;
+
+	/* compare results from 88 and 801 methods and take the greater */
+	/* These sizes are for extended memory in 1k units. */
+
+	if (alt_mem_k < ext_mem_k) {
+		mem_size = ext_mem_k;
+	} else {
+		mem_size = alt_mem_k;
+	}
+	/* First we map in the first 640k */
+	v->pmap[0].start = 0;
+	v->pmap[0].end = RES_START >> 12;
+	v->test_pages = RES_START >> 12;
+
+	/* Now the extended memory */
+	v->pmap[1].start = (RES_END + 4095) >> 12;
+	v->pmap[1].end = (mem_size + 1024) >> 2;
+	v->test_pages += mem_size >> 2;
+	v->msegs = 2;
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries.  The following 
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+static int sanitize_e820_map(struct e820entry *orig_map, struct e820entry *new_bios,
+	short old_nr)
+{
+	struct change_member {
+		struct e820entry *pbios; /* pointer to original bios entry */
+		unsigned long long addr; /* address for this change point */
+	};
+	struct change_member change_point_list[2*E820MAX];
+	struct change_member *change_point[2*E820MAX];
+	struct e820entry *overlap_list[E820MAX];
+	struct e820entry biosmap[E820MAX];
+	struct change_member *change_tmp;
+	ulong current_type, last_type;
+	unsigned long long last_addr;
+	int chgidx, still_changing;
+	int overlap_entries;
+	int new_bios_entry;
+	int i;
+
+	/*
+		Visually we're performing the following (1,2,3,4 = memory types)...
+		Sample memory map (w/overlaps):
+		   ____22__________________
+		   ______________________4_
+		   ____1111________________
+		   _44_____________________
+		   11111111________________
+		   ____________________33__
+		   ___________44___________
+		   __________33333_________
+		   ______________22________
+		   ___________________2222_
+		   _________111111111______
+		   _____________________11_
+		   _________________4______
+
+		Sanitized equivalent (no overlap):
+		   1_______________________
+		   _44_____________________
+		   ___1____________________
+		   ____22__________________
+		   ______11________________
+		   _________1______________
+		   __________3_____________
+		   ___________44___________
+		   _____________33_________
+		   _______________2________
+		   ________________1_______
+		   _________________4______
+		   ___________________2____
+		   ____________________33__
+		   ______________________4_
+	*/
+	/* First make a copy of the map */
+	for (i=0; i<old_nr; i++) {
+		biosmap[i].addr = orig_map[i].addr;
+		biosmap[i].size = orig_map[i].size;
+		biosmap[i].type = orig_map[i].type;
+	}
+
+	/* bail out if we find any unreasonable addresses in bios map */
+	for (i=0; i<old_nr; i++) {
+		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+			return 0;
+	}
+
+	/* create pointers for initial change-point information (for sorting) */
+	for (i=0; i < 2*old_nr; i++)
+		change_point[i] = &change_point_list[i];
+
+	/* record all known change-points (starting and ending addresses) */
+	chgidx = 0;
+	for (i=0; i < old_nr; i++)	{
+		change_point[chgidx]->addr = biosmap[i].addr;
+		change_point[chgidx++]->pbios = &biosmap[i];
+		change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+		change_point[chgidx++]->pbios = &biosmap[i];
+	}
+
+	/* sort change-point list by memory addresses (low -> high) */
+	still_changing = 1;
+	while (still_changing)	{
+		still_changing = 0;
+		for (i=1; i < 2*old_nr; i++)  {
+			/* if <current_addr> > <last_addr>, swap */
+			/* or, if current=<start_addr> & last=<end_addr>, swap */
+			if ((change_point[i]->addr < change_point[i-1]->addr) ||
+				((change_point[i]->addr == change_point[i-1]->addr) &&
+				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
+				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+			   )
+			{
+				change_tmp = change_point[i];
+				change_point[i] = change_point[i-1];
+				change_point[i-1] = change_tmp;
+				still_changing=1;
+			}
+		}
+	}
+
+	/* create a new bios memory map, removing overlaps */
+	overlap_entries=0;	 /* number of entries in the overlap table */
+	new_bios_entry=0;	 /* index for creating new bios map entries */
+	last_type = 0;		 /* start with undefined memory type */
+	last_addr = 0;		 /* start with 0 as last starting address */
+	/* loop through change-points, determining affect on the new bios map */
+	for (chgidx=0; chgidx < 2*old_nr; chgidx++)
+	{
+		/* keep track of all overlapping bios entries */
+		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+		{
+			/* add map entry to overlap list (> 1 entry implies an overlap) */
+			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+		}
+		else
+		{
+			/* remove entry from list (order independent, so swap with last) */
+			for (i=0; i<overlap_entries; i++)
+			{
+				if (overlap_list[i] == change_point[chgidx]->pbios)
+					overlap_list[i] = overlap_list[overlap_entries-1];
+			}
+			overlap_entries--;
+		}
+		/* if there are overlapping entries, decide which "type" to use */
+		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+		current_type = 0;
+		for (i=0; i<overlap_entries; i++)
+			if (overlap_list[i]->type > current_type)
+				current_type = overlap_list[i]->type;
+		/* continue building up new bios map based on this information */
+		if (current_type != last_type)	{
+			if (last_type != 0)	 {
+				new_bios[new_bios_entry].size =
+					change_point[chgidx]->addr - last_addr;
+				/* move forward only if the new size was non-zero */
+				if (new_bios[new_bios_entry].size != 0)
+					if (++new_bios_entry >= E820MAX)
+						break; 	/* no more space left for new bios entries */
+			}
+			if (current_type != 0)	{
+				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+				new_bios[new_bios_entry].type = current_type;
+				last_addr=change_point[chgidx]->addr;
+			}
+			last_type = current_type;
+		}
+	}
+	return(new_bios_entry);
+}
diff --git a/memtest.bin.lds b/memtest.bin.lds
new file mode 100644
index 0000000..702cdb1
--- /dev/null
+++ b/memtest.bin.lds
@@ -0,0 +1,15 @@
+OUTPUT_FORMAT("binary")
+OUTPUT_ARCH("i386")
+
+ENTRY(_main);
+SECTIONS {
+	. = 0;
+	.bootsect : { *(.bootsect) }
+	.setup : { *(.setup) }
+	.memtest : { 
+		_start = . ;
+		*(.data) 
+		_end = . ;
+	}
+	_syssize = (_end - _start + 15) >> 4;
+}
diff --git a/memtest.lds b/memtest.lds
new file mode 100644
index 0000000..bbb190a
--- /dev/null
+++ b/memtest.lds
@@ -0,0 +1,11 @@
+OUTPUT_FORMAT("elf32-i386");
+OUTPUT_ARCH(i386);
+
+ENTRY(_start); 
+SECTIONS {
+	. = 0x10000;
+	_start = . ;
+	.data : {
+		*(.data)
+	}
+}
diff --git a/memtest_shared.lds b/memtest_shared.lds
new file mode 100644
index 0000000..603f012
--- /dev/null
+++ b/memtest_shared.lds
@@ -0,0 +1,53 @@
+OUTPUT_FORMAT("elf32-i386");
+OUTPUT_ARCH(i386);
+
+ENTRY(startup_32); 
+SECTIONS {
+	. = 0;
+	.text : {
+		_start = .;
+		*(.text)
+		*(.text.*)
+		*(.plt)
+		_etext = . ;
+	} = 0x9090
+	.rodata : {
+		*(.rodata)
+		*(.rodata.*)
+	}
+	.dynsym     : { *(.dynsym) }
+	.dynstr     : { *(.dynstr) }
+	.hash       : { *(.hash) }
+	.gnu.hash   : { *(.gnu.hash) }
+	.dynamic    : { *(.dynamic) }
+
+	.rel.text    : { *(.rel.text   .rel.text.*) }
+	.rel.rodata  : { *(.rel.rodata .rel.rodata.*) }
+	.rel.data    : { *(.rel.data   .rel.data.*) }
+	.rel.got     : { *(.rel.got    .rel.got.*) }
+	.rel.plt     : { *(.rel.plt    .rel.plt.*) }
+
+	. = ALIGN(4);
+	.data : {
+		 _data = .; 
+		*(.data) 
+		*(.data.*) 
+	}
+	.got : {
+		*(.got.plt)
+		*(.got)
+		_edata = . ;
+	}
+	. = ALIGN(4);
+	.bss : { 
+		_bss = .;
+		*(.dynbss)
+		*(.bss) 
+		*(.bss.*) 
+		*(COMMON) 
+		/* _end must be at least 256 byte aligned */
+		. = ALIGN(256); 
+		_end = .;
+	}
+	/DISCARD/ : { *(*) }	
+}
diff --git a/mkusb_img.sh b/mkusb_img.sh
new file mode 100755
index 0000000..d7c3a07
--- /dev/null
+++ b/mkusb_img.sh
@@ -0,0 +1,74 @@
+#!/bin/ksh
+
+DEV="/dev/sdd"
+IDIR=/ptmp/usb
+ISOF=/bradytech/src/boot_files
+BINDIR=/bradytech/src/bin
+NSECT=64	# Number of sectors
+NHDS=32		# Number of heads
+NCYL=1		# Number of cylinders
+NBLKS=2050	# Number of blocks (NSECT*NHDS*NCYL+2)
+
+dir=/bradytech/src/memtest86-3.6
+
+print "Insert USB Key"
+print "Creating image on device $DEV"
+print "Is this correct?"
+read ans
+if [ $ans != "y" ]
+then
+	exit
+fi
+
+$BINDIR/mkdiskimage -4 $DEV $NCYL $NSECT $NHDS
+sync
+sleep 2
+
+mkfs.msdos ${DEV}4 
+$BINDIR/syslinux ${DEV}4 
+mkdir $IDIR
+mount ${DEV}4 $IDIR
+
+# Copy boot files
+cd $ISOF
+cp boot.txt $IDIR
+cp boot.cfg $IDIR/syslinux.cfg
+cd -
+
+# Create std boot images
+cd $dir
+make
+cp memtest.bin $IDIR/memtest
+make smp
+cp memtest.bin $IDIR/memtest.smp
+cd -
+
+umount $IDIR
+
+# Create image
+dd if=$DEV count=$NBLKS of=memtest86-3.6.usb
+cat <<EOF>README
+Installation instructions for the Memtest86 USB key image (Linux Only).
+=======================================================================
+
+1) Insert a USB key into a USB slot.
+2) Determine which device the USB key is assigned as (ie. /dev/sdc).
+3) As root type: dd if=memtest86-3.5.usb of=dev where dev is the device
+the key is assigned to. Use the base device (ie. /dev/sdc) not a partition
+designation (ie. /dev/sdc1).
+
+Warning: all data on the USB key will be lost. 
+Warning Warning: Make sure that the device used in the dd command above
+is correct!!!
+EOF
+tar cvzf memtest86-3.5.usb.tgz memtest86-3.5.usb README
+chown cbrady memtest86-3.5.usb.tgz
+rm memtest86-3.5.usb README
+
+cd -
+# Clean up
+rm -rf $IDIR
+cd $dir
+make clean
+print "## Done!"
+
diff --git a/msr.h b/msr.h
new file mode 100644
index 0000000..c904d50
--- /dev/null
+++ b/msr.h
@@ -0,0 +1,124 @@
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+     __asm__ __volatile__("rdmsr" \
+			  : "=a" (val1), "=d" (val2) \
+			  : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+     __asm__ __volatile__("wrmsr" \
+			  : /* no outputs */ \
+			  : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+			  : "=a" (low), "=d" (high) \
+			  : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR		0
+#define MSR_IA32_P5_MC_TYPE		1
+#define MSR_IA32_PLATFORM_ID		0x17
+#define MSR_IA32_EBL_CR_POWERON		0x2a
+
+#define MSR_IA32_APICBASE		0x1b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE		0x79
+#define MSR_IA32_UCODE_REV		0x8b
+
+#define MSR_IA32_BBL_CR_CTL		0x119
+
+#define MSR_IA32_MCG_CAP		0x179
+#define MSR_IA32_MCG_STATUS		0x17a
+#define MSR_IA32_MCG_CTL		0x17b
+
+#define MSR_IA32_THERM_CONTROL		0x19a
+#define MSR_IA32_THERM_INTERRUPT	0x19b
+#define MSR_IA32_THERM_STATUS		0x19c
+#define MSR_IA32_MISC_ENABLE		0x1a0
+
+#define MSR_IA32_DEBUGCTLMSR		0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP	0x1db
+#define MSR_IA32_LASTBRANCHTOIP		0x1dc
+#define MSR_IA32_LASTINTFROMIP		0x1dd
+#define MSR_IA32_LASTINTTOIP		0x1de
+
+#define MSR_IA32_MC0_CTL		0x400
+#define MSR_IA32_MC0_STATUS		0x401
+#define MSR_IA32_MC0_ADDR		0x402
+#define MSR_IA32_MC0_MISC		0x403
+
+#define MSR_P6_PERFCTR0			0xc1
+#define MSR_P6_PERFCTR1			0xc2
+#define MSR_P6_EVNTSEL0			0x186
+#define MSR_P6_EVNTSEL1			0x187
+
+#define MSR_IA32_PERF_STATUS		0x198
+#define MSR_IA32_PERF_CTL		0x199
+
+/* AMD Defined MSRs */
+#define MSR_K6_EFER			0xC0000080
+#define MSR_K6_STAR			0xC0000081
+#define MSR_K6_WHCR			0xC0000082
+#define MSR_K6_UWCCR			0xC0000085
+#define MSR_K6_EPMR			0xC0000086
+#define MSR_K6_PSOR			0xC0000087
+#define MSR_K6_PFIR			0xC0000088
+
+#define MSR_K7_EVNTSEL0			0xC0010000
+#define MSR_K7_PERFCTR0			0xC0010004
+#define MSR_K7_HWCR			0xC0010015
+#define MSR_K7_CLK_CTL			0xC001001b
+#define MSR_K7_FID_VID_CTL		0xC0010041
+#define MSR_K7_VID_STATUS		0xC0010042
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1			0x107
+#define MSR_IDT_FCR2			0x108
+#define MSR_IDT_FCR3			0x109
+#define MSR_IDT_FCR4			0x10a
+
+#define MSR_IDT_MCR0			0x110
+#define MSR_IDT_MCR1			0x111
+#define MSR_IDT_MCR2			0x112
+#define MSR_IDT_MCR3			0x113
+#define MSR_IDT_MCR4			0x114
+#define MSR_IDT_MCR5			0x115
+#define MSR_IDT_MCR6			0x116
+#define MSR_IDT_MCR7			0x117
+#define MSR_IDT_MCR_CTRL		0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR			0x1107
+#define MSR_VIA_LONGHAUL		0x110a
+#define MSR_VIA_BCR2			0x1147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL		0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS		0x80868011
+#define MSR_TMTA_LRTI_READOUT		0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ		0x8086801a
+
+#endif /* __ASM_MSR_H */
diff --git a/patn.c b/patn.c
new file mode 100644
index 0000000..0c5b490
--- /dev/null
+++ b/patn.c
@@ -0,0 +1,144 @@
+/* Pattern extension for memtest86
+ *
+ * Generates patterns for the Linux kernel's BadRAM extension that avoids
+ * allocation of faulty pages.
+ *
+ * Released under version 2 of the Gnu Public License.
+ *
+ * By Rick van Rein, vanrein@zonnet.nl
+ * ----------------------------------------------------
+ * MemTest86+ V1.60 Specific code (GPL V2.0)
+ * By Samuel DEMEULEMEESTER, sdemeule@memtest.org
+ * http://www.x86-secret.com - http://www.memtest.org 
+ */
+
+
+#include "test.h"
+
+
+/*
+ * DEFAULT_MASK covers a longword, since that is the testing granularity.
+ */
+#define DEFAULT_MASK ((~0L) << 2)
+
+
+/* extern struct vars *v; */
+
+
+/* What it does:
+ *  - Keep track of a number of BadRAM patterns in an array;
+ *  - Combine new faulty addresses with it whenever possible;
+ *  - Keep masks as selective as possible by minimising resulting faults;
+ *  - Print a new pattern only when the pattern array is changed.
+ */
+
+#define COMBINE_MASK(a,b,c,d) ((a & b & c & d) | (~a & b & ~c & d))
+
+/* Combine two adr/mask pairs to one adr/mask pair.
+ */
+void combine (ulong adr1, ulong mask1, ulong adr2, ulong mask2,
+		ulong *adr, ulong *mask) {
+
+	*mask = COMBINE_MASK (adr1, mask1, adr2, mask2);
+
+	*adr  = adr1 | adr2;
+	*adr &= *mask;	// Normalise, no fundamental need for this
+}
+
+/* Count the number of addresses covered with a mask.
+ */
+ulong addresses (ulong mask) {
+	ulong ctr=1;
+	int i=32;
+	while (i-- > 0) {
+		if (! (mask & 1)) {
+			ctr += ctr;
+		}
+		mask >>= 1;
+	}
+	return ctr;
+}
+
+/* Count how much more addresses would be covered by adr1/mask1 when combined
+ * with adr2/mask2.
+ */
+ulong combicost (ulong adr1, ulong mask1, ulong adr2, ulong mask2) {
+	ulong cost1=addresses (mask1);
+	ulong tmp, mask;
+	combine (adr1, mask1, adr2, mask2, &tmp, &mask);
+	return addresses (mask) - cost1;
+}
+
+/* Find the cheapest array index to extend with the given adr/mask pair.
+ * Return -1 if nothing below the given minimum cost can be found.
+ */
+int cheapindex (ulong adr1, ulong mask1, ulong mincost) {
+	int i=v->numpatn;
+	int idx=-1;
+	while (i-- > 0) {
+		ulong tmpcost=combicost(v->patn[i].adr, v->patn[i].mask, adr1, mask1);
+		if (tmpcost < mincost) {
+			mincost=tmpcost;
+			idx=i;
+		}
+	}
+	return idx;
+}
+
+/* Try to find a relocation index for idx if it costs nothing.
+ * Return -1 if no such index exists.
+ */
+int relocateidx (int idx) {
+	ulong adr =v->patn[idx].adr;
+	ulong mask=v->patn[idx].mask;
+	int new;
+	v->patn[idx].adr ^= ~0L;	// Never select idx
+	new=cheapindex (adr, mask, 1+addresses (mask));
+	v->patn[idx].adr = adr;
+	return new;
+}
+
+/* Relocate the given index idx only if free of charge.
+ * This is useful to combine to `neighbouring' sections to integrate.
+ * Inspired on the Buddy memalloc principle in the Linux kernel.
+ */
+void relocateiffree (int idx) {
+	int newidx=relocateidx (idx);
+	if (newidx>=0) {
+		ulong cadr, cmask;
+		combine (v->patn [newidx].adr, v->patn[newidx].mask,
+		         v->patn [   idx].adr, v->patn[   idx].mask,
+			 &cadr, &cmask);
+		v->patn[newidx].adr =cadr;
+		v->patn[newidx].mask=cmask;
+		if (idx < --v->numpatn) {
+			v->patn[idx].adr =v->patn[v->numpatn].adr;
+			v->patn[idx].mask=v->patn[v->numpatn].mask;
+		}
+		relocateiffree (newidx);
+	}
+}
+
+/* Insert a single faulty address in the pattern array.
+ * Return 1 only if the array was changed.
+ */
+int insertaddress (ulong adr) {
+	if (cheapindex (adr, DEFAULT_MASK, 1L) != -1)
+		return 0;
+
+	if (v->numpatn < BADRAM_MAXPATNS) {
+		v->patn[v->numpatn].adr =adr;
+		v->patn[v->numpatn].mask=DEFAULT_MASK;
+		v->numpatn++;
+		relocateiffree (v->numpatn-1);
+	} else {
+		int idx=cheapindex (adr, DEFAULT_MASK, ~0L);
+		ulong cadr, cmask;
+		combine (v->patn [idx].adr, v->patn[idx].mask,
+		         adr, DEFAULT_MASK, &cadr, &cmask);
+		v->patn[idx].adr =cadr;
+		v->patn[idx].mask=cmask;
+		relocateiffree (idx);
+	}
+	return 1;
+}
diff --git a/precomp.bin b/precomp.bin
new file mode 100755
index 0000000..a070d1b
--- /dev/null
+++ b/precomp.bin
Binary files differ
diff --git a/random.c b/random.c
new file mode 100644
index 0000000..8e7b13e
--- /dev/null
+++ b/random.c
@@ -0,0 +1,36 @@
+/******************************************************************/
+/* Random number generator */
+/* concatenation of following two 16-bit multiply with carry generators */
+/* x(n)=a*x(n-1)+carry mod 2^16 and y(n)=b*y(n-1)+carry mod 2^16, */
+/* number and carry packed within the same 32 bit integer.        */
+/******************************************************************/
+#include "smp.h"
+
+/* Keep a separate seed for each CPU */
+/* Space the seeds by at least a cache line or performance suffers big time! */
+static unsigned int SEED_X[MAX_CPUS*16];
+static unsigned int SEED_Y[MAX_CPUS*16];
+
+unsigned long rand (int cpu)
+{
+   static unsigned int a = 18000, b = 30903;
+   int me;
+
+   me = cpu*16;
+
+   SEED_X[me] = a*(SEED_X[me]&65535) + (SEED_X[me]>>16);
+   SEED_Y[me] = b*(SEED_Y[me]&65535) + (SEED_Y[me]>>16);
+
+   return ((SEED_X[me]<<16) + (SEED_Y[me]&65535));
+}
+
+
+void rand_seed( unsigned int seed1, unsigned int seed2, int cpu)
+{
+   int me;
+
+   me = cpu*16;
+   SEED_X[me] = seed1;   
+   SEED_Y[me] = seed2;
+}
+
diff --git a/reloc.c b/reloc.c
new file mode 100644
index 0000000..1b80731
--- /dev/null
+++ b/reloc.c
@@ -0,0 +1,267 @@
+/* reloc.c - MemTest-86  Version 3.3
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Eric Biederman
+ */
+
+#include "stddef.h"
+#include "stdint.h"
+#include "elf.h"
+
+#define __ELF_NATIVE_CLASS 32
+#define ELF_MACHINE_NO_RELA 1
+
+/* We use this macro to refer to ELF types independent of the native wordsize.
+   `ElfW(TYPE)' is used in place of `Elf32_TYPE' or `Elf64_TYPE'.  */
+
+#define ElfW(type)	_ElfW (Elf, __ELF_NATIVE_CLASS, type)
+#define _ElfW(e,w,t)	_ElfW_1 (e, w, _##t)
+#define _ElfW_1(e,w,t)	e##w##t
+/* We use this macro to refer to ELF types independent of the native wordsize.
+   `ElfW(TYPE)' is used in place of `Elf32_TYPE' or `Elf64_TYPE'.  */
+#define ELFW(type)	_ElfW (ELF, __ELF_NATIVE_CLASS, type)
+
+#define assert(expr) ((void) 0)
+
+  /* This #define produces dynamic linking inline functions for
+     bootstrap relocation instead of general-purpose relocation.  */
+#define RTLD_BOOTSTRAP
+
+struct link_map 
+{
+	ElfW(Addr) l_addr;  /* Current load address */
+	ElfW(Addr) ll_addr; /* Last load address */
+	ElfW(Dyn)  *l_ld;
+    /* Indexed pointers to dynamic section.
+       [0,DT_NUM) are indexed by the processor-independent tags.
+       [DT_NUM,DT_NUM+DT_PROCNUM) are indexed by the tag minus DT_LOPROC.
+       [DT_NUM+DT_PROCNUM,DT_NUM+DT_PROCNUM+DT_EXTRANUM) are indexed
+       by DT_EXTRATAGIDX(tagvalue) and
+       [DT_NUM+DT_PROCNUM,
+        DT_NUM+DT_PROCNUM+DT_EXTRANUM)
+       are indexed by DT_EXTRATAGIDX(tagvalue) (see <elf.h>).  */
+
+	ElfW(Dyn)  *l_info[DT_NUM + DT_PROCNUM + DT_EXTRANUM];
+};
+
+
+/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
+   first element of the GOT.  This must be inlined in a function which
+   uses global data.  */
+static inline Elf32_Addr __attribute__ ((unused))
+elf_machine_dynamic (void)
+{
+	register Elf32_Addr *got asm ("%ebx");
+	return *got;
+}
+
+/* Return the run-time load address of the shared object.  */
+static inline Elf32_Addr __attribute__ ((unused))
+elf_machine_load_address (void)
+{
+	Elf32_Addr addr;
+	asm volatile ("leal _start@GOTOFF(%%ebx), %0\n"
+		: "=r" (addr) : : "cc");
+	return addr;
+}
+
+/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
+   MAP is the object containing the reloc.  */
+static inline void
+elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
+		 const Elf32_Sym *sym, Elf32_Addr *const reloc_addr)
+{
+	Elf32_Addr ls_addr, s_addr;
+	Elf32_Addr value;
+	if (ELF32_R_TYPE (reloc->r_info) == R_386_RELATIVE)
+	{
+		*reloc_addr += map->l_addr - map->ll_addr;
+		return;
+	}
+	if (ELF32_R_TYPE(reloc->r_info) == R_386_NONE) {
+		return;
+	}
+	value = sym->st_value;
+	/* Every section except the undefined section has a base of map->l_addr */
+	ls_addr = sym->st_shndx == SHN_UNDEF ? 0 : map->ll_addr;
+	s_addr = sym->st_shndx == SHN_UNDEF ? 0 : map->l_addr;
+
+	switch (ELF32_R_TYPE (reloc->r_info))
+	{
+	case R_386_COPY:
+	{
+		/* Roll memcpy by hand as we don't have function calls yet. */
+		unsigned char *dest, *src;
+		long i;
+		dest = (unsigned char *)reloc_addr;
+		src = (unsigned char *)(value + s_addr);
+		for(i = 0; i < sym->st_size; i++) {
+			dest[i] = src[i];
+		}
+	}
+	break;
+	case R_386_GLOB_DAT:
+		*reloc_addr = s_addr + value;
+		break;
+	case R_386_JMP_SLOT:
+		*reloc_addr = s_addr + value;
+		break;
+	case R_386_32:
+		if (map->ll_addr == 0) {
+			*reloc_addr += value;
+		}
+		*reloc_addr += s_addr - ls_addr;
+		break;
+	case R_386_PC32:
+		if (map->ll_addr == 0) {
+			*reloc_addr += value - reloc->r_offset;
+		}
+		*reloc_addr += (s_addr - map->l_addr) - (ls_addr - map->ll_addr);
+		break;
+	default:
+		assert (! "unexpected dynamic reloc type");
+		break;
+	}
+}
+
+/* Read the dynamic section at DYN and fill in INFO with indices DT_*.  */
+
+static inline void __attribute__ ((unused))
+elf_get_dynamic_info(ElfW(Dyn) *dyn, ElfW(Addr) l_addr,
+	ElfW(Dyn) *info[DT_NUM + DT_PROCNUM + DT_EXTRANUM])
+{
+	if (! dyn)
+		return;
+	
+	while (dyn->d_tag != DT_NULL)
+	{
+		if (dyn->d_tag < DT_NUM)
+			info[dyn->d_tag] = dyn;
+		else if (dyn->d_tag >= DT_LOPROC &&
+			dyn->d_tag < DT_LOPROC + DT_PROCNUM)
+			info[dyn->d_tag - DT_LOPROC + DT_NUM] = dyn;
+		else if ((Elf32_Word) DT_EXTRATAGIDX (dyn->d_tag) < DT_EXTRANUM)
+			info[DT_EXTRATAGIDX (dyn->d_tag) + DT_NUM + DT_PROCNUM
+				] = dyn;
+		else
+			assert (! "bad dynamic tag");
+		++dyn;
+	}
+	
+	if (info[DT_PLTGOT] != NULL) 
+		info[DT_PLTGOT]->d_un.d_ptr += l_addr;
+	if (info[DT_STRTAB] != NULL)
+		info[DT_STRTAB]->d_un.d_ptr += l_addr;
+	if (info[DT_SYMTAB] != NULL)
+		info[DT_SYMTAB]->d_un.d_ptr += l_addr;
+#if ! ELF_MACHINE_NO_RELA
+	if (info[DT_RELA] != NULL)
+	{
+		assert (info[DT_RELAENT]->d_un.d_val == sizeof (ElfW(Rela)));
+		info[DT_RELA]->d_un.d_ptr += l_addr;
+	}
+#endif
+#if ! ELF_MACHINE_NO_REL
+	if (info[DT_REL] != NULL)
+	{
+		assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel)));
+		info[DT_REL]->d_un.d_ptr += l_addr;
+	}
+#endif
+	if (info[DT_PLTREL] != NULL)
+	{
+#if ELF_MACHINE_NO_RELA
+		assert (info[DT_PLTREL]->d_un.d_val == DT_REL);
+#elif ELF_MACHINE_NO_REL
+		assert (info[DT_PLTREL]->d_un.d_val == DT_RELA);
+#else
+		assert (info[DT_PLTREL]->d_un.d_val == DT_REL
+			|| info[DT_PLTREL]->d_un.d_val == DT_RELA);
+#endif
+	}
+	if (info[DT_JMPREL] != NULL)
+		info[DT_JMPREL]->d_un.d_ptr += l_addr;
+}
+
+
+
+/* Perform the relocations in MAP on the running program image as specified
+   by RELTAG, SZTAG.  If LAZY is nonzero, this is the first pass on PLT
+   relocations; they should be set up to call _dl_runtime_resolve, rather
+   than fully resolved now.  */
+
+static inline void
+elf_dynamic_do_rel (struct link_map *map,
+		    ElfW(Addr) reladdr, ElfW(Addr) relsize)
+{
+	const ElfW(Rel) *r = (const void *) reladdr;
+	const ElfW(Rel) *end = (const void *) (reladdr + relsize);
+
+	const ElfW(Sym) *const symtab =
+		(const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
+	
+	for (; r < end; ++r) {
+		elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
+			(void *) (map->l_addr + r->r_offset));
+	}
+}
+
+
+void _dl_start(void)
+{
+	static Elf32_Addr last_load_address = 0;
+	struct link_map map;
+	size_t cnt;
+
+
+	/* Partly clean the `map' structure up.  Don't use `memset'
+	   since it might nor be built in or inlined and we cannot make function
+	   calls at this point.  */
+	for (cnt = 0; cnt < sizeof(map.l_info) / sizeof(map.l_info[0]); ++cnt) {
+		map.l_info[cnt] = 0;
+	}
+
+	/* Get the last load address */
+	map.ll_addr = last_load_address;
+
+	/* Figure out the run-time load address of the dynamic linker itself.  */
+	last_load_address = map.l_addr = elf_machine_load_address();
+	
+	/* Read our own dynamic section and fill in the info array.  */
+	map.l_ld = (void *)map.l_addr + elf_machine_dynamic();
+
+	elf_get_dynamic_info (map.l_ld, map.l_addr - map.ll_addr, map.l_info);
+
+	/* Relocate ourselves so we can do normal function calls and
+	 * data access using the global offset table.  
+	 */
+#if !ELF_MACHINE_NO_REL
+	elf_dynamic_do_rel(&map, 
+		map.l_info[DT_REL]->d_un.d_ptr,
+		map.l_info[DT_RELSZ]->d_un.d_val);
+	if (map.l_info[DT_PLTREL]->d_un.d_val == DT_REL) {
+		elf_dynamic_do_rel(&map, 
+			map.l_info[DT_JMPREL]->d_un.d_ptr,
+			map.l_info[DT_PLTRELSZ]->d_un.d_val);
+	}
+#endif
+
+#if !ELF_MACHINE_NO_RELA
+	elf_dynamic_do_rela(&map, 
+		map.l_info[DT_RELA]->d_un.d_ptr,
+		map.l_info[DT_RELASZ]->d_un.d_val);
+	if (map.l_info[DT_PLTREL]->d_un.d_val == DT_RELA) {
+		elf_dynamic_do_rela(&map, 
+			map.l_info[DT_JMPREL]->d_un.d_ptr,
+			map.l_info[DT_PLTRELSZ]->d_un.d_val);
+	}
+#endif
+
+	/* Now life is sane; we can call functions and access global data.
+	   Set up to use the operating system facilities, and find out from
+	   the operating system's program loader where to find the program
+	   header table in core.  Put the rest of _dl_start into a separate
+	   function, that way the compiler cannot put accesses to the GOT
+	   before ELF_DYNAMIC_RELOCATE.  */
+	return;
+}
diff --git a/screen_buffer.c b/screen_buffer.c
new file mode 100644
index 0000000..f9e01be
--- /dev/null
+++ b/screen_buffer.c
@@ -0,0 +1,127 @@
+/* screen_buffer.c - MemTest-86  Version 3.3
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Jani Averbach, Jaa@iki.fi, 2001
+ */
+
+#include "test.h"
+#include "screen_buffer.h"
+
+#define SCREEN_X 80
+#define SCREEN_Y 25
+#define Y_SIZE SCREEN_Y
+/*
+ * X-size should by one of by screen size, 
+ * so that there is room for ending '\0'
+ */
+#define X_SIZE SCREEN_X+1
+
+static char screen_buf[Y_SIZE][X_SIZE];
+
+#ifdef SCRN_DEBUG
+
+char *padding = "12345678901234567890123456789012345678901234567890123456789012345678901234567890";
+
+#define CHECK_BOUNDS(y,x) do {if (y < 0 || Y_SIZE <= y || x < 0 || X_SIZE <= x) print_error("out of index");}while(0)
+
+#else /* ! SCRN_DEBUG */
+
+#define CHECK_BOUNDS(y,x)
+
+#endif /* SCRN_DEBUG */
+
+char
+get_scrn_buf(const int y,
+             const int x)
+{
+    CHECK_BOUNDS(y,x);
+    return screen_buf[y][x];
+}
+
+
+void
+set_scrn_buf(const int y,
+             const int x,
+             const char val)
+{
+    CHECK_BOUNDS(y,x);
+    screen_buf[y][x] = val;
+}
+
+void clear_screen_buf()
+{
+    int y, x;
+
+    for (y=0; y < SCREEN_Y; ++y){
+        for (x=0; x < SCREEN_X; ++x){
+            CHECK_BOUNDS(y,x);
+            screen_buf[y][x] = ' ';
+        }
+        CHECK_BOUNDS(y,SCREEN_X);
+        screen_buf[y][SCREEN_X] = '\0';
+    }
+}
+
+void tty_print_region(const int pi_top, 
+                      const int pi_left,
+                      const int pi_bottom,
+                      const int pi_right)
+{
+    int y;
+    char tmp;
+
+    for (y=pi_top; y < pi_bottom; ++y){
+        CHECK_BOUNDS(y, pi_right);
+        
+        tmp = screen_buf[y][pi_right];
+        screen_buf[y][pi_right] = '\0';
+
+        CHECK_BOUNDS(y, pi_left);
+        ttyprint(y, pi_left, &(screen_buf[y][pi_left]));                
+
+        screen_buf[y][pi_right] = tmp;
+    }
+}
+
+void tty_print_line(
+	int y, int x, const char *text)
+{
+	for(; *text && (x < SCREEN_X); x++, text++) {
+		if (*text != screen_buf[y][x]) {
+			break;
+		}
+	}
+	/* If there is nothing to do return */
+	if (*text == '\0') {
+		return;
+	}
+	ttyprint(y, x, text);
+	for(; *text && (x < SCREEN_X); x++, text++) {
+		screen_buf[y][x] = *text;
+	}
+}
+
+
+void tty_print_screen(void)
+{
+#ifdef SCRN_DEBUG
+    int i; 
+
+    for (i=0; i < SCREEN_Y; ++i)
+        ttyprint(i,0, padding);
+#endif /* SCRN_DEBUG */
+
+    tty_print_region(0, 0, SCREEN_Y, SCREEN_X);
+}
+
+void print_error(char *pstr)
+{
+
+#ifdef SCRN_DEBUG
+    ttyprint(0,0, padding);
+#endif /* SCRN_DEBUG */
+
+    ttyprint(0,35, pstr);        
+    
+    while(1);
+}
diff --git a/screen_buffer.h b/screen_buffer.h
new file mode 100644
index 0000000..8ceba03
--- /dev/null
+++ b/screen_buffer.h
@@ -0,0 +1,20 @@
+/* --*- C -*-- 
+ * 
+ * By Jani Averbach, Jaa@iki.fi, 2001
+ * 
+ * Released under version 2 of the Gnu Public License.
+ *
+ */
+#ifndef SCREEN_BUFFER_H_1D10F83B_INCLUDED
+#define SCREEN_BUFFER_H_1D10F83B_INCLUDED
+
+#include "config.h"
+
+char get_scrn_buf(const int y, const int x);
+void set_scrn_buf(const int y, const int x, const char val);
+void clear_screen_buf(void);
+void tty_print_region(const int pi_top,const int pi_left, const int pi_bottom,const int pi_right);
+void tty_print_line(int y, int x, const char *text);
+void tty_print_screen(void);
+void print_error(char *pstr);
+#endif /* SCREEN_BUFFER_H_1D10F83B_INCLUDED */
diff --git a/serial.h b/serial.h
new file mode 100644
index 0000000..0261073
--- /dev/null
+++ b/serial.h
@@ -0,0 +1,163 @@
+/*
+ * include/linux/serial.h
+ *
+ * Copyright (C) 1992, 1994 by Theodore Ts'o.
+ * 
+ * Redistribution of this file is permitted under the terms of the GNU 
+ * Public License (GPL)
+ * 
+ * These are the UART port assignments, expressed as offsets from the base
+ * register.  These assignments should hold for any serial port based on
+ * a 8250, 16450, or 16550(A).
+ */
+
+#ifndef _LINUX_SERIAL_REG_H
+#define _LINUX_SERIAL_REG_H
+
+#define UART_RX		0	/* In:  Receive buffer (DLAB=0) */
+#define UART_TX		0	/* Out: Transmit buffer (DLAB=0) */
+#define UART_DLL	0	/* Out: Divisor Latch Low (DLAB=1) */
+#define UART_DLM	1	/* Out: Divisor Latch High (DLAB=1) */
+#define UART_IER	1	/* Out: Interrupt Enable Register */
+#define UART_IIR	2	/* In:  Interrupt ID Register */
+#define UART_FCR	2	/* Out: FIFO Control Register */
+#define UART_EFR	2	/* I/O: Extended Features Register */
+				/* (DLAB=1, 16C660 only) */
+#define UART_LCR	3	/* Out: Line Control Register */
+#define UART_MCR	4	/* Out: Modem Control Register */
+#define UART_LSR	5	/* In:  Line Status Register */
+#define UART_MSR	6	/* In:  Modem Status Register */
+#define UART_SCR	7	/* I/O: Scratch Register */
+
+
+
+/*
+ * These are the definitions for the FIFO Control Register
+ * (16650 only)
+ */
+#define UART_FCR_ENABLE_FIFO	0x01 /* Enable the FIFO */
+#define UART_FCR_CLEAR_RCVR	0x02 /* Clear the RCVR FIFO */
+#define UART_FCR_CLEAR_XMIT	0x04 /* Clear the XMIT FIFO */
+#define UART_FCR_DMA_SELECT	0x08 /* For DMA applications */
+#define UART_FCR_TRIGGER_MASK	0xC0 /* Mask for the FIFO trigger range */
+#define UART_FCR_TRIGGER_1	0x00 /* Mask for trigger set at 1 */
+#define UART_FCR_TRIGGER_4	0x40 /* Mask for trigger set at 4 */
+#define UART_FCR_TRIGGER_8	0x80 /* Mask for trigger set at 8 */
+#define UART_FCR_TRIGGER_14	0xC0 /* Mask for trigger set at 14 */
+/* 16650 redefinitions */
+#define UART_FCR6_R_TRIGGER_8	0x00 /* Mask for receive trigger set at 1 */
+#define UART_FCR6_R_TRIGGER_16	0x40 /* Mask for receive trigger set at 4 */
+#define UART_FCR6_R_TRIGGER_24  0x80 /* Mask for receive trigger set at 8 */
+#define UART_FCR6_R_TRIGGER_28	0xC0 /* Mask for receive trigger set at 14 */
+#define UART_FCR6_T_TRIGGER_16	0x00 /* Mask for transmit trigger set at 16 */
+#define UART_FCR6_T_TRIGGER_8	0x10 /* Mask for transmit trigger set at 8 */
+#define UART_FCR6_T_TRIGGER_24  0x20 /* Mask for transmit trigger set at 24 */
+#define UART_FCR6_T_TRIGGER_30	0x30 /* Mask for transmit trigger set at 30 */
+
+/*
+ * These are the definitions for the Line Control Register
+ * 
+ * Note: if the word length is 5 bits (UART_LCR_WLEN5), then setting 
+ * UART_LCR_STOP will select 1.5 stop bits, not 2 stop bits.
+ */
+#define UART_LCR_DLAB	0x80	/* Divisor latch access bit */
+#define UART_LCR_SBC	0x40	/* Set break control */
+#define UART_LCR_SPAR	0x20	/* Stick parity (?) */
+#define UART_LCR_EPAR	0x10	/* Even parity select */
+#define UART_LCR_PARITY	0x08	/* Parity Enable */
+#define UART_LCR_STOP	0x04	/* Stop bits: 0=1 stop bit, 1= 2 stop bits */
+#define UART_LCR_WLEN5  0x00	/* Wordlength: 5 bits */
+#define UART_LCR_WLEN6  0x01	/* Wordlength: 6 bits */
+#define UART_LCR_WLEN7  0x02	/* Wordlength: 7 bits */
+#define UART_LCR_WLEN8  0x03	/* Wordlength: 8 bits */
+
+/*
+ * These are the definitions for the Line Status Register
+ */
+#define UART_LSR_TEMT	0x40	/* Transmitter empty */
+#define UART_LSR_THRE	0x20	/* Transmit-hold-register empty */
+#define UART_LSR_BI	0x10	/* Break interrupt indicator */
+#define UART_LSR_FE	0x08	/* Frame error indicator */
+#define UART_LSR_PE	0x04	/* Parity error indicator */
+#define UART_LSR_OE	0x02	/* Overrun error indicator */
+#define UART_LSR_DR	0x01	/* Receiver data ready */
+
+/*
+ * These are the definitions for the Interrupt Identification Register
+ */
+#define UART_IIR_NO_INT	0x01	/* No interrupts pending */
+#define UART_IIR_ID	0x06	/* Mask for the interrupt ID */
+
+#define UART_IIR_MSI	0x00	/* Modem status interrupt */
+#define UART_IIR_THRI	0x02	/* Transmitter holding register empty */
+#define UART_IIR_RDI	0x04	/* Receiver data interrupt */
+#define UART_IIR_RLSI	0x06	/* Receiver line status interrupt */
+
+/*
+ * These are the definitions for the Interrupt Enable Register
+ */
+#define UART_IER_MSI	0x08	/* Enable Modem status interrupt */
+#define UART_IER_RLSI	0x04	/* Enable receiver line status interrupt */
+#define UART_IER_THRI	0x02	/* Enable Transmitter holding register int. */
+#define UART_IER_RDI	0x01	/* Enable receiver data interrupt */
+
+/*
+ * These are the definitions for the Modem Control Register
+ */
+#define UART_MCR_LOOP	0x10	/* Enable loopback test mode */
+#define UART_MCR_OUT2	0x08	/* Out2 complement */
+#define UART_MCR_OUT1	0x04	/* Out1 complement */
+#define UART_MCR_RTS	0x02	/* RTS complement */
+#define UART_MCR_DTR	0x01	/* DTR complement */
+
+/*
+ * These are the definitions for the Modem Status Register
+ */
+#define UART_MSR_DCD	0x80	/* Data Carrier Detect */
+#define UART_MSR_RI	0x40	/* Ring Indicator */
+#define UART_MSR_DSR	0x20	/* Data Set Ready */
+#define UART_MSR_CTS	0x10	/* Clear to Send */
+#define UART_MSR_DDCD	0x08	/* Delta DCD */
+#define UART_MSR_TERI	0x04	/* Trailing edge ring indicator */
+#define UART_MSR_DDSR	0x02	/* Delta DSR */
+#define UART_MSR_DCTS	0x01	/* Delta CTS */
+#define UART_MSR_ANY_DELTA 0x0F	/* Any of the delta bits! */
+
+/*
+ * These are the definitions for the Extended Features Register
+ * (StarTech 16C660 only, when DLAB=1)
+ */
+#define UART_EFR_CTS	0x80	/* CTS flow control */
+#define UART_EFR_RTS	0x40	/* RTS flow control */
+#define UART_EFR_SCD	0x20	/* Special character detect */
+#define UART_EFR_ENI	0x10	/* Enhanced Interrupt */
+/*
+ * the low four bits control software flow control
+ */
+
+#include "io.h"
+#define serial_echo_outb(v,a) outb((v),(a)+serial_base_ports[serial_tty])
+#define serial_echo_inb(a)    inb((a)+serial_base_ports[serial_tty])
+#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
+/* Wait for transmitter & holding register to empty */
+#define WAIT_FOR_XMITR \
+ do { \
+       lsr = serial_echo_inb(UART_LSR); \
+ } while ((lsr & BOTH_EMPTY) != BOTH_EMPTY)
+
+#if 0
+static inline void serial_echo(int ch)
+{
+	int lsr;
+	WAIT_FOR_XMITR;
+	serial_echo_outb(ch, UART_TX);
+}
+static inline void serial_debug(int ch)
+{
+	serial_echo(ch);
+	serial_echo('\r');
+	serial_echo('\n');
+}
+#endif
+#endif /* _LINUX_SERIAL_REG_H */
+
diff --git a/setup.S b/setup.S
new file mode 100644
index 0000000..f80875b
--- /dev/null
+++ b/setup.S
@@ -0,0 +1,154 @@
+/*
+ * setup.s is responsible for getting the system data from the BIOS,
+ * and putting them into the appropriate places in system memory.
+ * both setup.s and system has been loaded by the bootblock.
+ *
+ * 1-Jan-96 Modified by Chris Brady for use as a boot/loader for memtest-86.
+ */
+
+#define __ASSEMBLY__
+#include "defs.h"
+
+.code16
+.section ".setup", "ax", @progbits
+.globl start
+start:
+# ok, the read went well 
+# now we want to move to protected mode ...
+
+
+	cli			# no interrupts allowed #
+	movb	$0x80, %al	# disable NMI for the bootup sequence
+	outb	%al, $0x70
+
+# The system will move itself to its rightful place.
+# reload the segment registers and the stack since the 
+# APs also execute this code
+#ljmp	$INITSEG, $(reload - start + 0x200)
+reload:
+	movw	$INITSEG, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %fs
+	movw	%ax, %ss	# reset the stack to INITSEG:0x4000-12.
+	movw	%dx, %sp
+	push	%cs
+	pop	%ds
+	lidt	idt_48 - start	# load idt with 0,0
+	lgdt	gdt_48 - start	# load gdt with whatever appropriate
+
+# that was painless, now we enable A20
+# start from grub-a20.patch
+     	/*
+     	* try to switch gateA20 using PORT92, the "Fast A20 and Init"
+     	* register
+     	*/
+     	mov	$0x92, %dx
+     	inb	%dx, %al
+     	/* skip the port92 code if it's unimplemented (read returns 0xff) */
+     	cmpb	$0xff, %al
+     	jz	alt_a20_done
+     	
+     	/* set or clear bit1, the ALT_A20_GATE bit */
+     	movb	4(%esp), %ah
+     	testb	%ah, %ah
+     	jz	alt_a20_cont1
+     	orb	$2, %al
+     	jmp	alt_a20_cont2
+alt_a20_cont1:
+	and	$0xfd, %al
+
+	/* clear the INIT_NOW bit; don't accidently reset the machine */
+alt_a20_cont2:
+	and	$0xfe, %al
+	outb	%al, %dx
+
+alt_a20_done:
+# end from grub-a20.patch
+
+	call    empty_8042
+	
+	movb	$0xD1, %al	# command write
+	outb	%al, $0x64
+	call    empty_8042	
+
+	movb	$0xDF, %al	# A20 on
+	outb	%al, $0x60
+	call	empty_8042
+
+/*
+ * Note that the short jump isn't strictly needed, althought there are
+ * reasons why it might be a good idea. It won't hurt in any case.
+ */
+	movw	$0x0001, %ax	# protected mode (PE) bit
+	lmsw	%ax		# This is it#
+	jmp	flush_instr
+flush_instr:
+	movw	$KERNEL_DS, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %ss
+	movw	%ax, %fs
+	movw	%ax, %gs
+
+data32	ljmp	$KERNEL_CS, $(TSTLOAD <<4)	# jmp offset 2000 of segment 0x10 (cs)
+
+/*
+ * This routine checks that the keyboard command queue is empty
+ * (after emptying the output buffers)
+ *
+ * No timeout is used - if this hangs there is something wrong with
+ * the machine, and we probably couldn't proceed anyway.
+ */
+empty_8042:
+	call	delay
+	inb	$0x64, %al	# 8042 status port
+	cmpb	$0xff, %al	# from grub-a20-patch, skip if not impl
+	jz	empty_8042_ret
+	testb	$1, %al		# output buffer?
+	jz	no_output
+	call	delay
+	inb	$0x60, %al	# read it
+	jmp	empty_8042
+	
+no_output:
+	testb	$2, %al		# is input buffer full?
+	jnz	empty_8042	# yes - loop
+empty_8042_ret:
+	ret
+#
+# Delay is needed after doing i/o
+#
+delay:
+	.word	0x00eb			# jmp $+2
+	ret
+
+gdt:
+	.word	0,0,0,0		# dummy
+
+	.word	0,0,0,0		# unused
+
+	.word	0x7FFF		# limit 128mb
+	.word	0x0000		# base address=0
+	.word	0x9A00		# code read/exec
+	.word	0x00C0		# granularity=4096, 386
+
+	.word	0x7FFF		# limit 128mb
+	.word	0x0000		# base address=0
+	.word	0x9200		# data read/write
+	.word	0x00C0		# granularity=4096, 386
+
+idt_48:
+	.word	0			# idt limit=0
+	.long	0			# idt base=0L
+
+gdt_48:
+	.word	0x800		# gdt limit=2048, 256 GDT entries
+	.word	512+gdt - start,0x9	# gdt base = 0X9xxxx
+
+msg1:
+	.asciz "Setup.S\r\n"
+
+	/* Pad setup to the proper size */
+	.org	(SETUPSECS*512)
+
diff --git a/smp.c b/smp.c
new file mode 100644
index 0000000..55c7bdb
--- /dev/null
+++ b/smp.c
@@ -0,0 +1,627 @@
+/*
+ * smp.c --
+ */
+
+#include "stddef.h"
+#include "smp.h"
+#include "cpuid.h"
+#include "test.h"
+#define DELAY_FACTOR 1
+unsigned num_cpus = 1; // There is at least one cpu, the BSP
+unsigned found_cpus = 0;
+
+extern void memcpy(void *dst, void *src , int len);
+extern void test_start(void);
+extern int run_cpus;
+
+struct barrier_s *barr;
+
+void barrier_init(int max)
+{
+	/* Set the adddress of the barrier structure */
+	barr = (struct barrier_s *)0x9ff00;
+        barr->lck.slock = 1;
+        barr->mutex.slock = 1;
+        barr->maxproc = max;
+        barr->count = max;
+        barr->st1.slock = 1;
+        barr->st2.slock = 0;
+}
+
+void s_barrier_init(int max)
+{
+        barr->s_lck.slock = 1;
+        barr->s_maxproc = max;
+        barr->s_count = max;
+        barr->s_st1.slock = 1;
+        barr->s_st2.slock = 0;
+}
+
+/* Spin until var = 1 */
+void spin_wait(spinlock_t *lck)
+{
+        int inc = 0x400;
+
+        asm volatile( "1:\t"
+                      "cmpb $0,%1\n\t"
+                      "jne 2f\n\t"
+                      "rep ; nop\n\t"
+                      "jmp 1b\n"
+                      "2:"
+                      : : "c" (inc), "m" (lck->slock): "memory" );
+}
+
+void barrier()
+{
+	if (num_cpus == 1) {
+		return;
+	}
+	spin_wait(&barr->st1);     /* Wait if the barrier is active */
+        spin_lock(&barr->lck);	   /* Get lock for barr struct */
+        if (--barr->count == 0) {  /* Last process? */
+                barr->st1.slock = 0;   /* Hold up any processes re-entering */
+                barr->st2.slock = 1;   /* Release the other processes */
+                barr->count++;
+                spin_unlock(&barr->lck); 
+        } else {
+                spin_unlock(&barr->lck); 
+                spin_wait(&barr->st2);	/* wait for peers to arrive */
+                spin_lock(&barr->lck);   
+                if (++barr->count == barr->maxproc) { 
+                        barr->st1.slock = 1; 
+                        barr->st2.slock = 0; 
+                }
+                spin_unlock(&barr->lck); 
+        }
+}
+
+void s_barrier()
+{
+	if (run_cpus == 1) {
+		return;
+	}
+	spin_wait(&barr->s_st1);     /* Wait if the barrier is active */
+        spin_lock(&barr->s_lck);     /* Get lock for barr struct */
+        if (--barr->s_count == 0) {  /* Last process? */
+                barr->s_st1.slock = 0;   /* Hold up any processes re-entering */
+                barr->s_st2.slock = 1;   /* Release the other processes */
+                barr->s_count++;
+                spin_unlock(&barr->s_lck); 
+        } else {
+                spin_unlock(&barr->s_lck); 
+                spin_wait(&barr->s_st2);	/* wait for peers to arrive */
+                spin_lock(&barr->s_lck);   
+                if (++barr->s_count == barr->s_maxproc) { 
+                        barr->s_st1.slock = 1; 
+                        barr->s_st2.slock = 0; 
+                }
+                spin_unlock(&barr->s_lck); 
+        }
+}
+
+typedef struct {
+   bool started;
+} ap_info_t;
+
+volatile apic_register_t *APIC = NULL;
+/* CPU number to APIC ID mapping table. CPU 0 is the BSP. */
+static unsigned cpu_num_to_apic_id[MAX_CPUS];
+volatile ap_info_t AP[MAX_CPUS];
+
+void PUT_MEM16(uintptr_t addr, uint16_t val)
+{
+   *((volatile uint16_t *)addr) = val;
+}
+
+void PUT_MEM32(uintptr_t addr, uint32_t val)
+{
+   *((volatile uint32_t *)addr) = val;
+}
+
+static void inline 
+APIC_WRITE(unsigned reg, uint32_t val)
+{
+   APIC[reg][0] = val;
+}
+
+static inline uint32_t 
+APIC_READ(unsigned reg)
+{
+   return APIC[reg][0];
+}
+
+
+static void 
+SEND_IPI(unsigned apic_id, unsigned trigger, unsigned level, unsigned mode,
+	    uint8_t vector)
+{
+   uint32_t v;
+
+   v = APIC_READ(APICR_ICRHI) & 0x00ffffff;
+   APIC_WRITE(APICR_ICRHI, v | (apic_id << 24));
+
+   v = APIC_READ(APICR_ICRLO) & ~0xcdfff;
+   v |= (APIC_DEST_DEST << APIC_ICRLO_DEST_OFFSET) 
+      | (trigger << APIC_ICRLO_TRIGGER_OFFSET)
+      | (level << APIC_ICRLO_LEVEL_OFFSET)
+      | (mode << APIC_ICRLO_DELMODE_OFFSET)
+      | (vector);
+   APIC_WRITE(APICR_ICRLO, v);
+}
+
+
+// Silly way of busywaiting, but we don't have a timer
+void delay(unsigned us) 
+{
+   unsigned freq = 1000; // in MHz, assume 1GHz CPU speed
+   uint64_t cycles = us * freq;
+   uint64_t t0 = RDTSC();
+   uint64_t t1;
+   volatile unsigned k;
+
+   do {
+      for (k = 0; k < 1000; k++) continue;
+      t1 = RDTSC();
+   } while (t1 - t0 < cycles);
+}
+
+static inline void
+memset (void *dst,
+        char  value,
+        int   len)
+{
+   int i;
+   for (i = 0 ; i < len ; i++ ) { 
+      *((char *) dst + i) = value;
+   }
+}
+
+void kick_cpu(unsigned cpu_num)
+{
+   unsigned num_sipi, apic_id;
+   apic_id = cpu_num_to_apic_id[cpu_num];
+
+   // clear the APIC ESR register
+   APIC_WRITE(APICR_ESR, 0);
+   APIC_READ(APICR_ESR);
+
+   // asserting the INIT IPI
+   SEND_IPI(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0);
+   delay(100000 / DELAY_FACTOR);
+
+   // de-assert the INIT IPI
+   SEND_IPI(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0);
+
+   for (num_sipi = 0; num_sipi < 2; num_sipi++) {
+      unsigned timeout;
+      bool send_pending;
+      unsigned err;
+
+      APIC_WRITE(APICR_ESR, 0);
+
+      SEND_IPI(apic_id, 0, 0, APIC_DELMODE_STARTUP, (unsigned)startup_32 >> 12);
+
+      timeout = 0;
+      do {
+	 delay(10);
+	 timeout++;
+	 send_pending = (APIC_READ(APICR_ICRLO) & APIC_ICRLO_STATUS_MASK) != 0;
+      } while (send_pending && timeout < 1000);
+
+      if (send_pending) {
+	 cprint(LINE_STATUS+1, 0, "SMP: STARTUP IPI was never sent");
+      }
+      
+      delay(100000 / DELAY_FACTOR);
+
+      err = APIC_READ(APICR_ESR) & 0xef;
+      if (err) {
+	 cprint(LINE_STATUS+1, 0, "SMP: After STARTUP IPI: err = 0x");
+         hprint(LINE_STATUS+1, COL_MID, err);
+      }
+   }
+}
+
+// These memory locations are used for the trampoline code and data.
+
+#define BOOTCODESTART 0x9000
+#define GDTPOINTERADDR 0x9100
+#define GDTADDR 0x9110
+
+void boot_ap(unsigned cpu_num)
+{
+   unsigned num_sipi, apic_id;
+   extern uint8_t gdt; 
+   extern uint8_t _ap_trampoline_start;
+   extern uint8_t _ap_trampoline_protmode;
+   unsigned len = &_ap_trampoline_protmode - &_ap_trampoline_start;
+   apic_id = cpu_num_to_apic_id[cpu_num];
+
+
+   memcpy((uint8_t*)BOOTCODESTART, &_ap_trampoline_start, len);
+
+   // Fixup the LGDT instruction to point to GDT pointer.
+   PUT_MEM16(BOOTCODESTART + 3, GDTPOINTERADDR);
+
+   // Copy a pointer to the temporary GDT to addr GDTPOINTERADDR.
+   // The temporary gdt is at addr GDTADDR
+   PUT_MEM16(GDTPOINTERADDR, 4 * 8);
+   PUT_MEM32(GDTPOINTERADDR + 2, GDTADDR);
+
+   // Copy the first 4 gdt entries from the currently used GDT to the
+   // temporary GDT.
+   memcpy((uint8_t *)GDTADDR, &gdt, 32);
+
+   // clear the APIC ESR register
+   APIC_WRITE(APICR_ESR, 0);
+   APIC_READ(APICR_ESR);
+
+   // asserting the INIT IPI
+   SEND_IPI(apic_id, APIC_TRIGGER_LEVEL, 1, APIC_DELMODE_INIT, 0);
+   delay(100000 / DELAY_FACTOR);
+
+   // de-assert the INIT IPI
+   SEND_IPI(apic_id, APIC_TRIGGER_LEVEL, 0, APIC_DELMODE_INIT, 0);
+
+   for (num_sipi = 0; num_sipi < 2; num_sipi++) {
+      unsigned timeout;
+      bool send_pending;
+      unsigned err;
+
+      APIC_WRITE(APICR_ESR, 0);
+
+      SEND_IPI(apic_id, 0, 0, APIC_DELMODE_STARTUP, BOOTCODESTART >> 12);
+
+      timeout = 0;
+      do {
+	 delay(10);
+	 timeout++;
+	 send_pending = (APIC_READ(APICR_ICRLO) & APIC_ICRLO_STATUS_MASK) != 0;
+      } while (send_pending && timeout < 1000);
+
+      if (send_pending) {
+	 cprint(LINE_STATUS+1, 0, "SMP: STARTUP IPI was never sent");
+      }
+      
+      delay(100000 / DELAY_FACTOR);
+
+      err = APIC_READ(APICR_ESR) & 0xef;
+      if (err) {
+	 cprint(LINE_STATUS+1, 0, "SMP: After STARTUP IPI: err = 0x");
+         hprint(LINE_STATUS+1, COL_MID, err);
+      }
+   }
+}
+
+static int checksum(unsigned char *mp, int len)
+{
+   int sum = 0;
+
+   while (len--) {
+       sum += *mp++;
+   }
+   return (sum & 0xFF);
+}
+
+/* Parse an MP config table for CPU information */
+bool read_mp_config_table(uintptr_t addr)
+{
+   mp_config_table_header_t *mpc = (mp_config_table_header_t*)addr;
+   uint8_t *tab_entry_ptr;
+   uint8_t *mpc_table_end;
+
+   if (mpc->signature != MPCSignature) {
+      return FALSE;
+   }
+   if (checksum((unsigned char*)mpc, mpc->length) != 0) {
+      return FALSE;
+   }
+
+   /* FIXME: the uintptr_t cast here works around a compilation problem on
+    * AMD64, but it ignores the real problem, which is that lapic_addr
+    * is only 32 bits.  Maybe that's OK, but it should be investigated.
+    */
+   APIC = (volatile apic_register_t*)(uintptr_t)mpc->lapic_addr;
+
+   tab_entry_ptr = ((uint8_t*)mpc) + sizeof(mp_config_table_header_t);
+   mpc_table_end = ((uint8_t*)mpc) + mpc->length;
+   while (tab_entry_ptr < mpc_table_end) {
+      switch (*tab_entry_ptr) {
+      case MP_PROCESSOR: {
+	 mp_processor_entry_t *pe = (mp_processor_entry_t*)tab_entry_ptr;
+
+	 if (pe->cpu_flag & CPU_BOOTPROCESSOR) {
+	    // BSP is CPU 0
+	    cpu_num_to_apic_id[0] = pe->apic_id;
+	 } else if (num_cpus < MAX_CPUS) {
+	    cpu_num_to_apic_id[num_cpus] = pe->apic_id;
+	    num_cpus++;
+	 }
+	 found_cpus++;
+	    
+	 // we cannot handle non-local 82489DX apics
+	 if ((pe->apic_ver & 0xf0) != 0x10) {
+	    return 0;
+	 }
+
+	 tab_entry_ptr += sizeof(mp_processor_entry_t);
+	 break;
+      }
+      case MP_BUS: {
+	 tab_entry_ptr += sizeof(mp_bus_entry_t);
+	 break;
+      }
+      case MP_IOAPIC: {
+	 tab_entry_ptr += sizeof(mp_io_apic_entry_t);
+	 break;
+      }
+      case MP_INTSRC:
+	 tab_entry_ptr += sizeof(mp_interrupt_entry_t);
+      case MP_LINTSRC:
+	 tab_entry_ptr += sizeof(mp_local_interrupt_entry_t);
+	 break;
+      default: 
+	 return FALSE;
+      }
+   }
+   return TRUE;
+}
+
+/* Search for a Floating Pointer structure */
+floating_pointer_struct_t *
+scan_for_floating_ptr_struct(uintptr_t addr, uint32_t length)
+{
+   floating_pointer_struct_t *fp;
+   uintptr_t end = addr + length;
+
+
+   while ((uintptr_t)addr < end) {
+      fp = (floating_pointer_struct_t*)addr;
+      if (*(unsigned int *)addr == FPSignature && fp->length == 1 && 
+		checksum((unsigned char*)addr, 16) == 0 &&
+		((fp->spec_rev == 1) || (fp->spec_rev == 4))) {
+		
+	   return fp;
+      }
+      addr += 4;
+   }
+   return NULL;
+}
+
+/* Search for a Root System Descriptor Pointer */
+rsdp_t *scan_for_rsdp(uintptr_t addr, uint32_t length)
+{
+   rsdp_t *rp;
+   uintptr_t end = addr + length;
+
+
+   while ((uintptr_t)addr < end) {
+      rp = (rsdp_t*)addr;
+      if (*(unsigned int *)addr == RSDPSignature && 
+		checksum((unsigned char*)addr, rp->length) == 0) {
+	   return rp;
+      }
+      addr += 4;
+   }
+   return NULL;
+}
+
+/* Parse a MADT table for processor entries */
+int parse_madt(uintptr_t addr) {
+
+   mp_config_table_header_t *mpc = (mp_config_table_header_t*)addr;
+   uint8_t *tab_entry_ptr;
+   uint8_t *mpc_table_end;
+
+   if (checksum((unsigned char*)mpc, mpc->length) != 0) {
+      return FALSE;
+   }
+
+   APIC = (volatile apic_register_t*)(uintptr_t)mpc->lapic_addr;
+
+   tab_entry_ptr = ((uint8_t*)mpc) + sizeof(mp_config_table_header_t);
+   mpc_table_end = ((uint8_t*)mpc) + mpc->length;
+   while (tab_entry_ptr < mpc_table_end) {
+
+      madt_processor_entry_t *pe = (madt_processor_entry_t*)tab_entry_ptr;
+      if (pe->type == MP_PROCESSOR) {
+	 if (pe->enabled) {
+	     if (num_cpus < MAX_CPUS) {
+		cpu_num_to_apic_id[num_cpus] = pe->apic_id;
+
+		/* the first CPU is the BSP, don't increment */
+		if (found_cpus) {
+		    num_cpus++;
+		}
+	     }
+	     found_cpus++;
+	 }
+      }
+       tab_entry_ptr += pe->length;
+   }
+   return TRUE;
+}
+
+/* This is where we search for SMP information in the following order
+ * look for a floating MP pointer
+ *   found:
+ *     check for a default configuration
+ * 	 found:
+ *	   setup config, return
+ *     check for a MP config table
+ *	 found:
+ *	   validate:
+ *           good:
+ *	        parse the MP config table
+ *		  good:
+ *		    setup config, return
+ *
+ * find & validate ACPI RSDP (Root System Descriptor Pointer)
+ *   found:
+ *     find & validate RSDT (Root System Descriptor Table)
+ *       found:
+ *         find & validate MSDT
+ *	     found:
+ *             parse the MADT table
+ *               good:
+ *		   setup config, return
+ */
+void smp_init_bsp()
+{
+   floating_pointer_struct_t *fp;
+   rsdp_t *rp;
+   rsdt_t *rt;
+   uint8_t *tab_ptr, *tab_end;
+   unsigned int *ptr;
+
+   memset(&AP, 0, sizeof AP);
+
+   /* Search for the Floating MP structure pointer */
+   fp = scan_for_floating_ptr_struct(0x0, 0x400);
+   if (fp == NULL) {
+      fp = scan_for_floating_ptr_struct(639*0x400, 0x400);
+   }
+   if (fp == NULL) {
+         fp = scan_for_floating_ptr_struct(0xf0000, 0x10000);
+   }
+   if (fp == NULL) {
+        /* Search the BIOS ESDS area */
+        unsigned int address = *(unsigned short *)0x40E;
+        address <<= 4;
+	if (address) {
+       		fp = scan_for_floating_ptr_struct(address, 0x400);
+        }
+   }
+
+   if (fp != NULL) {
+	/* We have a floating MP pointer */
+
+	/* Is this a default configuration? */
+	if (fp->feature[0] > 0 && fp->feature[0] <=7) {
+	    /* This is a default config so plug in the numbers */
+	    num_cpus = 2;
+	    APIC = 0xfee00000;
+	    cpu_num_to_apic_id[0] = 0;
+	    cpu_num_to_apic_id[1] = 1;
+	    return;
+	}
+
+	/* Do we have a pointer to a MP configuration table? */
+	if ( fp->phys_addr != 0) {
+	    if (read_mp_config_table(fp->phys_addr)) {
+		/* Found a good MP table, done */
+		return;
+	    }
+	}
+    }
+
+   /* No MP table so far, try to find an ACPI MADT table
+    * We try to use the MP table first since there is no way to distinguish
+    * real cores from hyper-threads in the MADT */
+
+   /* Search for the RSDP */
+   rp = scan_for_rsdp(0xe0000, 0x20000);
+   if (rp == NULL) {
+        /* Search the BIOS ESDS area */
+        unsigned int address = *(unsigned short *)0x40E;
+        address <<= 4;
+	if (address) {
+       		rp = scan_for_rsdp(address, 0x400);
+        }
+   }
+
+   if (rp == NULL) {
+	/* RSDP not found, give up */
+	return;
+   }
+
+   /* Found the RSDP, now get either the RSDP or XRSDP */
+   if (rp->revision >= 2) {
+		rt = (rsdt_t *)rp->xrsdt[0];
+		if (rt == 0) {
+			return;
+		}
+		/* Validate the XSDT */
+		if (*(unsigned int *)rt != XSDTSignature) {
+			return;
+		}
+		if ( checksum((unsigned char*)rt, rt->length) != 0) {
+			return;
+		}
+    } else {
+		rt = (rsdt_t *)rp->rsdt;
+		if (rt == 0) {
+			return;
+		}
+		/* Validate the RSDT */
+		if (*(unsigned int *)rt != RSDTSignature) {
+			return;
+		}
+		if ( checksum((unsigned char*)rt, rt->length) != 0) {
+			return;
+		}
+    }
+
+    /* Scan the RSDT or XSDT for a pointer to the MADT */
+    tab_ptr = ((uint8_t*)rt) + sizeof(rsdt_t);
+    tab_end = ((uint8_t*)rt) + rt->length;
+
+    while (tab_ptr < tab_end) {
+	ptr = *(unsigned int *)tab_ptr;
+	/* Check for the MADT signature */
+	if (ptr && *ptr == MADTSignature) {
+
+	    /* Found it, now parse it */
+	    if (parse_madt((uintptr_t)ptr)) {
+		return;
+	    }
+	}
+        tab_ptr += 4;
+    }
+}
+	
+unsigned my_apic_id()
+{
+   return (APIC[APICR_ID][0]) >> 24;
+}
+
+void smp_ap_booted(unsigned cpu_num) 
+{
+   AP[cpu_num].started = TRUE;
+}
+
+void smp_boot_ap(unsigned cpu_num)
+{
+   unsigned timeout;
+   extern bool smp_mode;
+   boot_ap(cpu_num);
+   timeout = 0;
+   do {
+      delay(1000 / DELAY_FACTOR);
+      timeout++;
+   } while (!AP[cpu_num].started && timeout < 100000 / DELAY_FACTOR);
+
+   if (!AP[cpu_num].started) {
+      cprint(LINE_STATUS+1, 0, "SMP: Boot timeout for");
+      dprint(LINE_STATUS+1, COL_MID, cpu_num,2,1);
+      cprint(LINE_STATUS+1, 26, "Turning off SMP");
+      smp_mode = FALSE;
+   }
+}
+
+unsigned smp_my_cpu_num()
+{
+   unsigned apicid = my_apic_id();
+   unsigned i;
+
+   for (i = 0; i < MAX_CPUS; i++) {
+      if (apicid == cpu_num_to_apic_id[i]) {
+	 break;
+      }
+   }
+   if (i == MAX_CPUS) {
+      i = 0;
+   }
+   return i;
+}
+
diff --git a/smp.h b/smp.h
new file mode 100644
index 0000000..68b495b
--- /dev/null
+++ b/smp.h
@@ -0,0 +1,308 @@
+/* **********************************************************
+ * Copyright 2002 VMware, Inc.  All rights reserved. -- VMware Confidential
+ * **********************************************************/
+
+
+#ifndef _SMP_H_
+#define _SMP_H_
+#include "stdint.h"
+#include "defs.h"
+#define MAX_CPUS 16 // "16 CPUs ought to be enough for everybody."
+
+
+#define FPSignature ('_' | ('M' << 8) | ('P' << 16) | ('_' << 24))
+
+typedef struct {
+   uint32_t signature;   // "_MP_"
+   uint32_t phys_addr;
+   uint8_t  length;
+   uint8_t  spec_rev;
+   uint8_t  checksum;
+   uint8_t  feature[5];
+} floating_pointer_struct_t;
+
+#define MPCSignature ('P' | ('C' << 8) | ('M' << 16) | ('P' << 24))
+typedef struct {
+   uint32_t signature;   // "PCMP"
+   uint16_t length;
+   uint8_t  spec_rev;
+   uint8_t  checksum;
+   char   oem[8];
+   char   productid[12];
+   uint32_t oem_ptr;
+   uint16_t oem_size;
+   uint16_t oem_count;
+   uint32_t lapic_addr;
+   uint32_t reserved;
+} mp_config_table_header_t;
+
+/* Followed by entries */
+
+#define MP_PROCESSOR    0
+#define MP_BUS          1
+#define MP_IOAPIC       2
+#define MP_INTSRC       3
+#define MP_LINTSRC      4
+
+typedef struct {
+   uint8_t type;          /* MP_PROCESSOR */
+   uint8_t apic_id;       /* Local APIC number */
+   uint8_t apic_ver;      /* Its versions */
+   uint8_t cpu_flag;
+#define CPU_ENABLED             1       /* Processor is available */
+#define CPU_BOOTPROCESSOR       2       /* Processor is the BP */
+   uint32_t cpu_signature;           
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK  0xF0
+#define CPU_FAMILY_MASK 0xF00
+   uint32_t featureflag;  /* CPUID feature value */
+   uint32_t reserved[2];
+} mp_processor_entry_t;
+
+typedef struct {
+   uint8_t type;   // has value MP_BUS
+   uint8_t busid;
+   char  bustype[6];
+} mp_bus_entry_t;
+
+/* We don't understand the others */
+
+typedef struct {
+   uint8_t  type;   // set to MP_IOAPIC
+   uint8_t  apicid;
+   uint8_t  apicver;
+   uint8_t  flags;
+#define MPC_APIC_USABLE         0x01
+   uint32_t apicaddr;
+} mp_io_apic_entry_t;
+
+
+typedef struct {
+   uint8_t  type;
+   uint8_t  irqtype;
+   uint16_t irqflag;
+   uint8_t  srcbus;
+   uint8_t  srcbusirq;
+   uint8_t  dstapic;
+   uint8_t  dstirq;
+} mp_interrupt_entry_t;
+
+#define MP_INT_VECTORED         0
+#define MP_INT_NMI              1
+#define MP_INT_SMI              2
+#define MP_INT_EXTINT           3
+
+#define MP_IRQDIR_DEFAULT       0
+#define MP_IRQDIR_HIGH          1
+#define MP_IRQDIR_LOW           3
+
+
+typedef struct {
+   uint8_t  type;
+   uint8_t  irqtype;
+   uint16_t irqflag;
+   uint8_t  srcbusid;
+   uint8_t  srcbusirq;
+   uint8_t  destapic;     
+#define MP_APIC_ALL     0xFF
+   uint8_t  destapiclint;
+} mp_local_interrupt_entry_t;
+
+#define RSDPSignature ('R' | ('S' << 8) | ('D' << 16) | (' ' << 24))
+typedef struct {
+   char signature[8];   // "RSD "
+   uint8_t  checksum;
+   char oemid[6];
+   uint8_t revision; 
+   uint32_t rsdt;
+   uint32_t length;
+   uint32_t xrsdt[2];
+   uint8_t  xsum;
+} rsdp_t;
+
+#define RSDTSignature ('R' | ('S' << 8) | ('D' << 16) | ('T' << 24))
+#define XSDTSignature ('X' | ('S' << 8) | ('D' << 16) | ('T' << 24))
+typedef struct {
+   char signature[4];   // "RSDT"
+   uint32_t length;
+   uint8_t revision; 
+   uint8_t  checksum;
+   char oemid[18];
+   char cid[4];
+   char cver[4];
+} rsdt_t;
+
+#define MADTSignature ('A' | ('P' << 8) | ('I' << 16) | ('C' << 24))
+typedef struct {
+   uint8_t type; 
+   uint8_t length;
+   uint8_t acpi_id;
+   uint8_t apic_id;       /* Local APIC number */
+   uint32_t enabled;
+} madt_processor_entry_t;
+
+/* APIC definitions */
+/*
+ * APIC registers
+ */
+#define APICR_ID         0x02
+#define APICR_ESR        0x28
+#define APICR_ICRLO      0x30
+#define APICR_ICRHI      0x31
+
+/* APIC destination shorthands */
+#define APIC_DEST_DEST        0
+#define APIC_DEST_LOCAL       1
+#define APIC_DEST_ALL_INC     2
+#define APIC_DEST_ALL_EXC     3
+
+/* APIC IPI Command Register format */
+#define APIC_ICRHI_RESERVED		0x00ffffff
+#define APIC_ICRHI_DEST_MASK		0xff000000
+#define APIC_ICRHI_DEST_OFFSET		24
+
+#define APIC_ICRLO_RESERVED		0xfff32000
+#define APIC_ICRLO_DEST_MASK		0x000c0000
+#define APIC_ICRLO_DEST_OFFSET		18
+#define APIC_ICRLO_TRIGGER_MASK		0x00008000
+#define APIC_ICRLO_TRIGGER_OFFSET	15
+#define APIC_ICRLO_LEVEL_MASK		0x00004000
+#define APIC_ICRLO_LEVEL_OFFSET		14
+#define APIC_ICRLO_STATUS_MASK		0x00001000
+#define APIC_ICRLO_STATUS_OFFSET	12
+#define APIC_ICRLO_DESTMODE_MASK	0x00000800
+#define APIC_ICRLO_DESTMODE_OFFSET	11
+#define APIC_ICRLO_DELMODE_MASK		0x00000700
+#define APIC_ICRLO_DELMODE_OFFSET	8
+#define APIC_ICRLO_VECTOR_MASK		0x000000ff
+#define APIC_ICRLO_VECTOR_OFFSET	0
+
+/* APIC trigger types (edge/level) */
+#define APIC_TRIGGER_EDGE     0
+#define APIC_TRIGGER_LEVEL    1
+
+/* APIC delivery modes */
+#define APIC_DELMODE_FIXED    0
+#define APIC_DELMODE_LOWEST   1
+#define APIC_DELMODE_SMI      2
+#define APIC_DELMODE_NMI      4
+#define APIC_DELMODE_INIT     5
+#define APIC_DELMODE_STARTUP  6
+#define APIC_DELMODE_EXTINT   7
+typedef uint32_t apic_register_t[4];
+
+extern volatile apic_register_t *APIC;
+
+unsigned smp_my_cpu_num();
+
+void smp_init_bsp(void);
+void smp_init_aps(void);
+
+void smp_boot_ap(unsigned cpu_num);
+void smp_ap_booted(unsigned cpu_num);
+
+typedef struct {
+        unsigned int slock;
+} spinlock_t;
+
+struct barrier_s
+{
+        spinlock_t mutex;
+        spinlock_t lck;
+        int maxproc;
+        volatile int count;
+        spinlock_t st1;
+        spinlock_t st2;
+        spinlock_t s_lck;
+        int s_maxproc;
+        volatile int s_count;
+        spinlock_t s_st1;
+        spinlock_t s_st2;
+};
+
+void barrier();
+void s_barrier();
+void barrier_init(int max);
+void s_barrier_init(int max);
+
+static inline void
+__GET_CPUID(int ax, uint32_t *regs)
+{
+   __asm__ __volatile__("\t"
+   	/* save ebx in case -fPIC is being used */
+      "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
+      : "=a" (regs[0]), "=D" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+      : "a" (ax)
+      : "memory"
+   );
+}
+
+#define GET_CPUID(_ax,_bx,_cx,_dx) { \
+   uint32_t regs[4];                   \
+   __GET_CPUID(_ax,regs);            \
+   _ax = regs[0];                    \
+   _bx = regs[1];                    \
+   _cx = regs[2];                    \
+   _dx = regs[3];                    \
+}
+
+/*
+ * Checked against the Intel manual and GCC --hpreg
+ *
+ * volatile because the tsc always changes without the compiler knowing it.
+ */
+static inline uint64_t
+RDTSC(void)
+{
+   uint64_t tim;
+
+   __asm__ __volatile__(
+      "rdtsc"
+      : "=A" (tim)
+   );
+
+   return tim;
+}
+
+static inline uint64_t __GET_MSR(int cx)
+{
+   uint64_t msr;
+
+   __asm__ __volatile__(
+      "rdmsr"
+      : "=A" (msr)
+      : "c" (cx)
+   );
+
+   return msr;
+}
+
+#define __GCC_OUT(s, s2, port, val) do { \
+   __asm__(                              \
+      "out" #s " %" #s2 "1, %w0"         \
+      :                                  \
+      : "Nd" (port), "a" (val)           \
+   );                                    \
+} while (0)
+#define OUTB(port, val) __GCC_OUT(b, b, port, val)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+        asm volatile("\n1:\t"
+                     " ; lock;decb %0\n\t"
+                     "jns 3f\n"
+                     "2:\t"
+                     "rep;nop\n\t"
+                     "cmpb $0,%0\n\t"
+                     "jle 2b\n\t"
+                     "jmp 1b\n"
+                     "3:\n\t"
+                     : "+m" (lock->slock) : : "memory");
+}
+static inline void spin_unlock(spinlock_t *lock)
+{
+        asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
+}
+
+
+#endif /* _SMP_H_ */
diff --git a/spd.c b/spd.c
new file mode 100644
index 0000000..d0b9b0d
--- /dev/null
+++ b/spd.c
@@ -0,0 +1,189 @@
+/* Memtest86 SPD extension 

+ * added by Reto Sonderegger, 2004, reto@swissbit.com

+ * 

+ * Released under version 2 of the Gnu Puclic License

+ */

+ 

+#include "test.h"

+#include "io.h"

+#include "pci.h"

+#include "msr.h"

+#include "screen_buffer.h"

+

+#define SMBHSTSTS smbusbase

+#define SMBHSTCNT smbusbase + 2

+#define SMBHSTCMD smbusbase + 3

+#define SMBHSTADD smbusbase + 4

+#define SMBHSTDAT smbusbase + 5

+

+extern void wait_keyup();

+

+int smbdev, smbfun;

+unsigned short smbusbase;

+unsigned char spd[256];

+char s[] = {'/', 0, '-', 0, '\\', 0, '|', 0};	

+

+static void ich5_get_smb(void)

+{

+    unsigned long x;

+    int result;

+    result = pci_conf_read(0, smbdev, smbfun, 0x20, 2, &x);

+    if (result == 0) smbusbase = (unsigned short) x & 0xFFFE;

+}

+

+unsigned char ich5_smb_read_byte(unsigned char adr, unsigned char cmd)

+{

+    int l1, h1, l2, h2;

+    unsigned long long t;

+    __outb(0x1f, SMBHSTSTS);			// reset SMBus Controller

+    __outb(0xff, SMBHSTDAT);

+    while(__inb(SMBHSTSTS) & 0x01);		// wait until ready

+    __outb(cmd, SMBHSTCMD);

+    __outb((adr << 1) | 0x01, SMBHSTADD);

+    __outb(0x48, SMBHSTCNT);

+    rdtsc(l1, h1);

+    cprint(POP2_Y, POP2_X + 16, s + cmd % 8);	// progress bar

+    while (!(__inb(SMBHSTSTS) & 0x02)) {	// wait til command finished

+	rdtsc(l2, h2);

+	t = ((h2 - h1) * 0xffffffff + (l2 - l1)) / v->clks_msec;

+	if (t > 10) break;			// break after 10ms

+    }

+    return __inb(SMBHSTDAT);

+}

+

+static int ich5_read_spd(int dimmadr)

+{

+    int x;

+    spd[0] = ich5_smb_read_byte(0x50 + dimmadr, 0);

+    if (spd[0] == 0xff)	return -1;		// no spd here

+    for (x = 1; x < 256; x++) {

+	spd[x] = ich5_smb_read_byte(0x50 + dimmadr, (unsigned char) x);

+    }

+    return 0;

+}

+

+static void us15w_get_smb(void)

+{

+    unsigned long x;

+    int result;

+    result = pci_conf_read(0, 0x1f, 0, 0x40, 2, &x);

+    if (result == 0) smbusbase = (unsigned short) x & 0xFFC0;

+}

+

+unsigned char us15w_smb_read_byte(unsigned char adr, unsigned char cmd)

+{

+    int l1, h1, l2, h2;

+    unsigned long long t;

+    //__outb(0x00, smbusbase + 1);			// reset SMBus Controller

+    //__outb(0x00, smbusbase + 6);

+    //while((__inb(smbusbase + 1) & 0x08) != 0);		// wait until ready

+    __outb(0x02, smbusbase + 0);    // Byte read

+    __outb(cmd, smbusbase + 5);     // Command

+    __outb(0x07, smbusbase + 1);    // Clear status

+    __outb((adr << 1) | 0x01, smbusbase + 4);   // DIMM address

+    __outb(0x12, smbusbase + 0);    // Start

+    //while (((__inb(smbusbase + 1) & 0x08) == 0)) {}	// wait til busy

+    rdtsc(l1, h1);

+    cprint(POP2_Y, POP2_X + 16, s + cmd % 8);	// progress bar

+    while (((__inb(smbusbase + 1) & 0x01) == 0) ||

+		((__inb(smbusbase + 1) & 0x08) != 0)) {	// wait til command finished

+	rdtsc(l2, h2);

+	t = ((h2 - h1) * 0xffffffff + (l2 - l1)) / v->clks_msec;

+	if (t > 10) break;			// break after 10ms

+    }

+    return __inb(smbusbase + 6);

+}

+

+static int us15w_read_spd(int dimmadr)

+{

+    int x;

+    spd[0] = us15w_smb_read_byte(0x50 + dimmadr, 0);

+    if (spd[0] == 0xff)	return -1;		// no spd here

+    for (x = 1; x < 256; x++) {

+	spd[x] = us15w_smb_read_byte(0x50 + dimmadr, (unsigned char) x);

+    }

+    return 0;

+}

+    

+struct pci_smbus_controller {

+    unsigned vendor;

+    unsigned device;

+    char *name;

+    void (*get_adr)(void);

+    int (*read_spd)(int dimmadr);

+};

+

+static struct pci_smbus_controller smbcontrollers[] = {

+{0x8086, 0x3B30, "Intel P55", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x3A60, "Intel ICH10B", 	ich5_get_smb, ich5_read_spd},

+{0x8086, 0x3A30, "Intel ICH10R", 	ich5_get_smb, ich5_read_spd},

+{0x8086, 0x2930, "Intel ICH9", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x283E, "Intel ICH8", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x27DA, "Intel ICH7", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x266A, "Intel ICH6", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x24D3, "Intel ICH5", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x24C3, "Intel ICH4", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x25A4, "Intel 6300ESB", ich5_get_smb, ich5_read_spd},

+{0x8086, 0x269B, "Intel ESB2", 		ich5_get_smb, ich5_read_spd},

+{0x8086, 0x8119, "Intel US15W", 	us15w_get_smb, us15w_read_spd},

+{0x8086, 0x5032, "Intel EP80579", ich5_get_smb, ich5_read_spd},

+{0, 0, "", 0, 0}

+};

+

+

+int find_smb_controller(void)

+{

+    int i = 0;

+    unsigned long valuev, valued;

+    for (smbdev = 0; smbdev < 32; smbdev++) {

+	for (smbfun = 0; smbfun < 8; smbfun++) {

+	    pci_conf_read(0, smbdev, smbfun, 0, 2, &valuev);

+	    if (valuev != 0xFFFF) {					// if there is something look what's it..

+		for (i = 0; smbcontrollers[i].vendor > 0; i++) {	// check if this is a known smbus controller

+		    if (valuev == smbcontrollers[i].vendor) {

+			pci_conf_read(0, smbdev, smbfun, 2, 2, &valued);	// read the device id

+			if (valued == smbcontrollers[i].device) {

+			    return i;

+			}

+		    }

+		}

+	    }	

+	}

+    }

+    return -1;

+}

+	    

+	    

+void show_spd(void)

+{

+    int index;

+    int i, j;

+    int flag = 0;

+    pop2up();

+    wait_keyup();

+    index = find_smb_controller();

+    if (index == -1) {

+	cprint(POP2_Y, POP2_X+1, "SMBus Controller not known");

+	while (!get_key());

+	wait_keyup();

+	pop2down();

+	return;

+    }

+    else cprint(POP2_Y, POP2_X+1, "SPD Data: Slot");    

+    smbcontrollers[index].get_adr();

+    for (j = 0; j < 16; j++) {

+	if (smbcontrollers[index].read_spd(j) == 0) {

+	    dprint(POP2_Y, POP2_X + 15, j, 2, 0);		

+    	    for (i = 0; i < 256; i++) {

+		hprint2(2 + POP2_Y + i / 16, 3 + POP2_X + (i % 16) * 3, spd[i], 2);

+	    }

+	    flag = 0;

+    	    while(!flag) {

+		if (get_key()) flag++;

+	    }

+	    wait_keyup();

+	}

+    }

+    pop2down();

+}

+

diff --git a/stddef.h b/stddef.h
new file mode 100644
index 0000000..213c89b
--- /dev/null
+++ b/stddef.h
@@ -0,0 +1,8 @@
+#ifndef I386_STDDEF_H
+#define I386_STDDEF_H
+
+#define NULL ((void *)0)
+
+typedef unsigned long size_t;
+
+#endif /* I386_STDDEF_H */
diff --git a/stdin.h b/stdin.h
new file mode 100644
index 0000000..e3a08e1
--- /dev/null
+++ b/stdin.h
@@ -0,0 +1,52 @@
+#ifndef I386_STDINT_H
+#define I386_STDINT_H
+
+/* Exact integral types */
+typedef unsigned char      uint8_t;
+typedef signed char        int8_t; 
+
+typedef unsigned short     uint16_t;
+typedef signed short       int16_t;
+
+typedef unsigned int       uint32_t;
+typedef signed int         int32_t;
+
+typedef unsigned long long uint64_t;
+typedef signed long long   int64_t;
+
+/* Small types */
+typedef unsigned char      uint_least8_t;
+typedef signed char        int_least8_t; 
+
+typedef unsigned short     uint_least16_t;
+typedef signed short       int_least16_t;
+
+typedef unsigned int       uint_least32_t;
+typedef signed int         int_least32_t;
+
+typedef unsigned long long uint_least64_t;
+typedef signed long long   int_least64_t;
+
+/* Fast Types */
+typedef unsigned char      uint_fast8_t;
+typedef signed char        int_fast8_t; 
+
+typedef unsigned int       uint_fast16_t;
+typedef signed int         int_fast16_t;
+
+typedef unsigned int       uint_fast32_t;
+typedef signed int         int_fast32_t;
+
+typedef unsigned long long uint_fast64_t;
+typedef signed long long   int_fast64_t;
+
+/* Types for `void *' pointers.  */
+typedef int                intptr_t;
+typedef unsigned int       uintptr_t;
+
+/* Largest integral types */
+typedef long long int      intmax_t;
+typedef unsigned long long uintmax_t;
+
+
+#endif /* I386_STDINT_H */
\ No newline at end of file
diff --git a/stdint.h b/stdint.h
new file mode 100644
index 0000000..1c136e0
--- /dev/null
+++ b/stdint.h
@@ -0,0 +1,60 @@
+#ifndef I386_STDINT_H
+#define I386_STDINT_H
+
+/* Exact integral types */
+typedef unsigned char      uint8_t;
+typedef signed char        int8_t; 
+
+typedef unsigned short     uint16_t;
+typedef signed short       int16_t;
+
+typedef unsigned int       uint32_t;
+typedef signed int         int32_t;
+
+typedef unsigned long long uint64_t;
+typedef signed long long   int64_t;
+
+/* Small types */
+typedef unsigned char      uint_least8_t;
+typedef signed char        int_least8_t; 
+
+typedef unsigned short     uint_least16_t;
+typedef signed short       int_least16_t;
+
+typedef unsigned int       uint_least32_t;
+typedef signed int         int_least32_t;
+
+typedef unsigned long long uint_least64_t;
+typedef signed long long   int_least64_t;
+
+/* Fast Types */
+typedef unsigned char      uint_fast8_t;
+typedef signed char        int_fast8_t; 
+
+typedef unsigned int       uint_fast16_t;
+typedef signed int         int_fast16_t;
+
+typedef unsigned int       uint_fast32_t;
+typedef signed int         int_fast32_t;
+
+typedef unsigned long long uint_fast64_t;
+typedef signed long long   int_fast64_t;
+
+/* Types for `void *' pointers.  */
+typedef int                intptr_t;
+typedef unsigned int       uintptr_t;
+
+/* Largest integral types */
+typedef long long int      intmax_t;
+typedef unsigned long long uintmax_t;
+
+typedef char           bool;
+#ifndef FALSE
+#define FALSE          0
+#endif
+
+#ifndef TRUE
+#define TRUE           1
+#endif
+
+#endif /* I386_STDINT_H */
diff --git a/test.c b/test.c
new file mode 100644
index 0000000..9bcc953
--- /dev/null
+++ b/test.c
@@ -0,0 +1,1644 @@
+/* test.c - MemTest-86  Version 3.4
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+#include "test.h"
+#include "config.h"
+#include "smp.h"
+
+extern volatile int    mstr_cpu;
+extern volatile int    run_cpus;
+extern volatile int    test;
+extern volatile int segs, bail;
+extern int test_ticks, nticks;
+extern struct tseq tseq[];
+extern void update_err_counts(void);
+extern void print_err_counts(void);
+void rand_seed( unsigned int seed1, unsigned int seed2, int me);
+ulong rand(int me);
+void poll_errors();
+
+int ecount = 0;
+
+static inline ulong roundup(ulong value, ulong mask)
+{
+	return (value + mask) & ~mask;
+}
+/*
+ * Memory address test, walking ones
+ */
+void addr_tst1(int me)
+{
+	int i, j, k;
+	volatile ulong *p, *pt, *end;
+	ulong bad, mask, bank, p1;
+
+	/* Test the global address bits */
+	for (p1=0, j=0; j<2; j++) {
+        	hprint(LINE_PAT, COL_PAT, p1);
+
+		/* Set pattern in our lowest multiple of 0x20000 */
+		p = (ulong *)roundup((ulong)v->map[0].start, 0x1ffff);
+		*p = p1;
+	
+		/* Now write pattern compliment */
+		p1 = ~p1;
+		end = v->map[segs-1].end;
+		for (i=0; i<100; i++) {
+			mask = 4;
+			do {
+				pt = (ulong *)((ulong)p | mask);
+				if (pt == p) {
+					mask = mask << 1;
+					continue;
+				}
+				if (pt >= end) {
+					break;
+				}
+				*pt = p1;
+				if ((bad = *p) != ~p1) {
+					ad_err1((ulong *)p, (ulong *)mask,
+						bad, ~p1);
+					i = 1000;
+				}
+				mask = mask << 1;
+			} while(mask);
+		}
+		do_tick(me);
+		BAILR
+	}
+
+	/* Now check the address bits in each bank */
+	/* If we have more than 8mb of memory then the bank size must be */
+	/* bigger than 256k.  If so use 1mb for the bank size. */
+	if (v->pmap[v->msegs - 1].end > (0x800000 >> 12)) {
+		bank = 0x100000;
+	} else {
+		bank = 0x40000;
+	}
+	for (p1=0, k=0; k<2; k++) {
+        	hprint(LINE_PAT, COL_PAT, p1);
+
+		for (j=0; j<segs; j++) {
+			p = v->map[j].start;
+			/* Force start address to be a multiple of 256k */
+			p = (ulong *)roundup((ulong)p, bank - 1);
+			end = v->map[j].end;
+			/* Redundant checks for overflow */
+                        while (p < end && p > v->map[j].start && p != 0) {
+				*p = p1;
+
+				p1 = ~p1;
+				for (i=0; i<50; i++) {
+					mask = 4;
+					do {
+						pt = (ulong *)
+						    ((ulong)p | mask);
+						if (pt == p) {
+							mask = mask << 1;
+							continue;
+						}
+						if (pt >= end) {
+							break;
+						}
+						*pt = p1;
+						if ((bad = *p) != ~p1) {
+							ad_err1((ulong *)p,
+							    (ulong *)mask,
+							    bad,~p1);
+							i = 200;
+						}
+						mask = mask << 1;
+					} while(mask);
+				}
+				if (p + bank > p) {
+					p += bank;
+				} else {
+					p = end;
+				}
+				p1 = ~p1;
+			}
+		}
+		do_tick(me);
+		BAILR
+		p1 = ~p1;
+	}
+}
+
+/*
+ * Memory address test, own address
+ */
+void addr_tst2(int me)
+{
+	int j, done;
+	ulong *p, *pe, *end, *start;
+
+        cprint(LINE_PAT, COL_PAT, "address ");
+
+	/* Write each address with it's own address */
+	for (j=0; j<segs; j++) {
+		start = v->map[j].start;
+		end = v->map[j].end;
+		pe = (ulong *)start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+
+/* Original C code replaced with hand tuned assembly code
+ *			for (; p <= pe; p++) {
+ *				*p = (ulong)p;
+ *			}
+ */
+			asm __volatile__ (
+				"jmp L91\n\t"
+				".p2align 4,,7\n\t"
+				"L90:\n\t"
+				"addl $4,%%edi\n\t"
+				"L91:\n\t"
+				"movl %%edi,(%%edi)\n\t"
+				"cmpl %%edx,%%edi\n\t"
+				"jb L90\n\t"
+				: : "D" (p), "d" (pe)
+			);
+			p = pe + 1;
+		} while (!done);
+	}
+
+	/* Each address should have its own address */
+	for (j=0; j<segs; j++) {
+		start = v->map[j].start;
+		end = v->map[j].end;
+		pe = (ulong *)start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+                                pe += SPINSZ;
+                        } else {
+                                pe = end;
+                        }
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+/* Original C code replaced with hand tuned assembly code
+ *			for (; p <= pe; p++) {
+ *				if((bad = *p) != (ulong)p) {
+ *					ad_err2((ulong)p, bad);
+ *				}
+ *			}
+ */
+			asm __volatile__ (
+				"jmp L95\n\t"
+				".p2align 4,,7\n\t"
+				"L99:\n\t"
+				"addl $4,%%edi\n\t"
+				"L95:\n\t"
+				"movl (%%edi),%%ecx\n\t"
+				"cmpl %%edi,%%ecx\n\t"
+				"jne L97\n\t"
+				"L96:\n\t"
+				"cmpl %%edx,%%edi\n\t"
+				"jb L99\n\t"
+				"jmp L98\n\t"
+			
+				"L97:\n\t"
+				"pushl %%edx\n\t"
+				"pushl %%ecx\n\t"
+				"pushl %%edi\n\t"
+				"call ad_err2\n\t"
+				"popl %%edi\n\t"
+				"popl %%ecx\n\t"
+				"popl %%edx\n\t"
+				"jmp L96\n\t"
+
+				"L98:\n\t"
+				: : "D" (p), "d" (pe)
+				: "ecx"
+			);
+			p = pe + 1;
+		} while (!done);
+	}
+}
+
+/*
+ * Test all of memory using a "half moving inversions" algorithm using random
+ * numbers and their complment as the data pattern. Since we are not able to
+ * produce random numbers in reverse order testing is only done in the forward
+ * direction.
+ */
+void movinvr(int me)
+{
+	int i, j, done, seed1, seed2;
+	ulong *p;
+	ulong *pe;
+	ulong *start,*end;
+	ulong num, chunk;
+
+	/* Initialize memory with initial sequence of random numbers.  */
+	if (v->rdtsc) {
+		asm __volatile__ ("rdtsc":"=a" (seed1),"=d" (seed2));
+	} else {
+		seed1 = 521288629 + v->pass;
+		seed2 = 362436069 - v->pass;
+	}
+
+	/* Display the current seed */
+        if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, seed1);
+	rand_seed(seed1, seed2, me);
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    chunk++;
+		    start = v->map[j].start+(chunk*me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+			end = v->map[j].end;
+		    } else {
+		    	end = start + chunk - 1;
+		    }
+		}
+		pe = start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+/* Original C code replaced with hand tuned assembly code */
+/*
+			for (; p <= pe; p++) {
+				*p = rand();
+			}
+ */
+
+                        asm __volatile__ (
+                                "jmp L200\n\t"
+                                ".p2align 4,,7\n\t"
+                                "L201:\n\t"
+                                "addl $4,%%edi\n\t"
+                                "L200:\n\t"
+				"pushl %%ecx\n\t" \
+                                "call rand\n\t"
+				"popl %%ecx\n\t" \
+				"movl %%eax,(%%edi)\n\t"
+                                "cmpl %%ebx,%%edi\n\t"
+                                "jb L201\n\t"
+                                : : "D" (p), "b" (pe), "c" (me)
+				: "eax"
+                        );
+			p = pe + 1;
+		} while (!done);
+	}
+
+	/* Do moving inversions test. Check for initial pattern and then
+	 * write the complement for each memory location.
+	 */
+	for (i=0; i<2; i++) {
+		rand_seed(seed1, seed2, me);
+		for (j=0; j<segs; j++) {
+			if (run_cpus == 1) {
+			    start = v->map[j].start;
+			    end = v->map[j].end;
+			} else {
+			    chunk = v->map[j].end - v->map[j].start;
+			    chunk /= run_cpus;
+			    chunk++;
+			    start = v->map[j].start+(chunk*me);
+			    /* Set end addrs for the highest CPU num to the
+			     * end of the segment for rounding errors */
+			    if (me == mstr_cpu) {
+				end = v->map[j].end;
+			    } else {
+			    	end = start + chunk - 1;
+			    }
+			}
+			pe = start;
+			p = start;
+			done = 0;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for overflow */
+				if (pe + SPINSZ > pe && pe != 0) {
+					pe += SPINSZ;
+				} else {
+					pe = end;
+				}
+				if (pe >= end) {
+					pe = end;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code */
+/*
+				for (; p <= pe; p++) {
+					num = rand();
+					if (i) {
+						num = ~num;
+					}
+					if ((bad=*p) != num) {
+						error((ulong*)p, num, bad);
+					}
+					*p = ~num;
+				}
+*/
+				if (i) {
+					num = 0xffffffff;
+				} else {
+					num = 0;
+				}
+				asm __volatile__ (
+                                        "pushl %%ebp\n\t"
+					"jmp L26\n\t" \
+					".p2align 4,,7\n\t" \
+					"L27:\n\t" \
+					"addl $4,%%edi\n\t" \
+					"L26:\n\t" \
+					"pushl %%edx\n\t" \
+					"call rand\n\t"
+					"popl %%edx\n\t" \
+					"xorl %%ebx,%%eax\n\t" \
+					"movl (%%edi),%%ecx\n\t" \
+					"cmpl %%eax,%%ecx\n\t" \
+					"jne L23\n\t" \
+					"L25:\n\t" \
+					"movl $0xffffffff,%%ebp\n\t" \
+					"xorl %%ebp,%%eax\n\t" \
+					"movl %%eax,(%%edi)\n\t" \
+					"cmpl %%esi,%%edi\n\t" \
+					"jb L27\n\t" \
+					"jmp L24\n" \
+
+					"L23:\n\t" \
+					"pushl %%esi\n\t" \
+					"pushl %%ecx\n\t" \
+					"pushl %%eax\n\t" \
+					"pushl %%edi\n\t" \
+					"call error\n\t" \
+					"popl %%edi\n\t" \
+					"popl %%eax\n\t" \
+					"popl %%ecx\n\t" \
+					"popl %%esi\n\t" \
+					"jmp L25\n" \
+
+					"L24:\n\t" \
+                                        "popl %%ebp\n\t"
+					:: "D" (p), "S" (pe), "b" (num),
+						 "d" (me)
+					: "eax", "ecx"
+				);
+				p = pe + 1;
+			} while (!done);
+		}
+	}
+}
+
+/*
+ * Test all of memory using a "moving inversions" algorithm using the
+ * pattern in p1 and it's complement in p2.
+ */
+void movinv1(int iter, ulong p1, ulong p2, int me)
+{
+	int i, j, done;
+	ulong *p, *pe, len, chunk, *start, *end;
+
+	/* Display the current pattern */
+        if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1);
+
+	/* Initialize memory with the initial pattern.  */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    chunk++;
+		    start = v->map[j].start + (chunk * me);
+		    /* Set the end addrs for the highest numbers CPU to the
+		     * end of the segment to take care of rounding errors */
+		    if (me == mstr_cpu) {
+			end = v->map[j].end;
+		    } else {
+		    	end = start + chunk - 1;
+		    }
+		}
+
+		pe = start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			len = pe - p + 1;
+			if (p == pe ) {
+				break;
+			}
+/* Original C code replaced with hand tuned assembly code
+ *			for (; p <= pe; p++) {
+ *				*p = p1;
+ *			}
+ */
+			asm __volatile__ (
+				"rep\n\t" \
+				"stosl\n\t"
+				: : "c" (len), "D" (p), "a" (p1)
+			);
+			p = pe + 1;
+		} while (!done);
+	}
+
+	/* Do moving inversions test. Check for initial pattern and then
+	 * write the complement for each memory location. Test from bottom
+	 * up and then from the top down.  */
+	for (i=0; i<iter; i++) {
+		for (j=0; j<segs; j++) {
+			if (run_cpus == 1) {
+		    	    start = v->map[j].start;
+		            end = v->map[j].end;
+		        } else {
+		            chunk = v->map[j].end - v->map[j].start;
+		            chunk /= run_cpus;
+			    chunk++;
+		            start = v->map[j].start + (chunk * me);
+		        /* Set the end addrs for the highest numbers CPU to the
+		         * end of the segment to take care of rounding errors */
+		            if (me == mstr_cpu) {
+			        end = v->map[j].end;
+		            } else {
+		    	        end = start + chunk - 1;
+		            }
+			}
+			pe = start;
+			p = start;
+			done = 0;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for overflow */
+				if (pe + SPINSZ > pe && pe != 0) {
+					pe += SPINSZ;
+				} else {
+					pe = end;
+				}
+				if (pe >= end) {
+					pe = end;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code
+ *				for (; p <= pe; p++) {
+ *					if ((bad=*p) != p1) {
+ *						error((ulong*)p, p1, bad);
+ *					}
+ *					*p = p2;
+ *				}
+ */
+				asm __volatile__ (
+					"jmp L2\n\t" \
+					".p2align 4,,7\n\t" \
+					"L0:\n\t" \
+					"addl $4,%%edi\n\t" \
+					"L2:\n\t" \
+					"movl (%%edi),%%ecx\n\t" \
+					"cmpl %%eax,%%ecx\n\t" \
+					"jne L3\n\t" \
+					"L5:\n\t" \
+					"movl %%ebx,(%%edi)\n\t" \
+					"cmpl %%edx,%%edi\n\t" \
+					"jb L0\n\t" \
+					"jmp L4\n" \
+
+					"L3:\n\t" \
+					"pushl %%edx\n\t" \
+					"pushl %%ebx\n\t" \
+					"pushl %%ecx\n\t" \
+					"pushl %%eax\n\t" \
+					"pushl %%edi\n\t" \
+					"call error\n\t" \
+					"popl %%edi\n\t" \
+					"popl %%eax\n\t" \
+					"popl %%ecx\n\t" \
+					"popl %%ebx\n\t" \
+					"popl %%edx\n\t" \
+					"jmp L5\n" \
+
+					"L4:\n\t" \
+					:: "a" (p1), "D" (p), "d" (pe), "b" (p2)
+					: "ecx"
+				);
+				p = pe + 1;
+			} while (!done);
+		}
+		for (j=segs-1; j>=0; j--) {
+		        if (run_cpus == 1) {
+		    	    start = v->map[j].start;
+		            end = v->map[j].end;
+		        } else {
+		            chunk = v->map[j].end - v->map[j].start;
+		            chunk /= run_cpus;
+			    chunk++;
+		            start = v->map[j].start + (chunk * me);
+		            /* Set the end addrs for the highest num CPU to the
+		             * end of the segment for rounding errors */
+		            if (me == mstr_cpu) {
+			        end = v->map[j].end;
+		            } else {
+		    	        end = start + chunk - 1;
+		            }
+			}
+			pe = end;
+			p = end;
+			done = 0;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for underflow */
+				if (pe - SPINSZ < pe && pe != 0) {
+					pe -= SPINSZ;
+				} else {
+					pe = start;
+					done++;
+				}
+
+				/* Since we are using unsigned addresses a 
+				 * redundent check is required */
+				if (pe < start || pe > end) {
+					pe = start;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code
+ *				do {
+ *					if ((bad=*p) != p2) {
+ *						error((ulong*)p, p2, bad);
+ *					}
+ *					*p = p1;
+ *				} while (p-- >= pe);
+ */
+				asm __volatile__ (
+					"jmp L9\n\t"
+					".p2align 4,,7\n\t"
+					"L11:\n\t"
+					"subl $4, %%edi\n\t"
+					"L9:\n\t"
+					"movl (%%edi),%%ecx\n\t"
+					"cmpl %%ebx,%%ecx\n\t"
+					"jne L6\n\t"
+					"L10:\n\t"
+					"movl %%eax,(%%edi)\n\t"
+					"cmpl %%edi, %%edx\n\t"
+					"jne L11\n\t"
+					"jmp L7\n\t"
+
+					"L6:\n\t"
+					"pushl %%edx\n\t"
+					"pushl %%eax\n\t"
+					"pushl %%ecx\n\t"
+					"pushl %%ebx\n\t"
+					"pushl %%edi\n\t"
+					"call error\n\t"
+					"popl %%edi\n\t"
+					"popl %%ebx\n\t"
+					"popl %%ecx\n\t"
+					"popl %%eax\n\t"
+					"popl %%edx\n\t"
+					"jmp L10\n"
+
+					"L7:\n\t"
+					:: "a" (p1), "D" (p), "d" (pe), "b" (p2)
+					: "ecx"
+				);
+				p = pe - 1;
+			} while (!done);
+		}
+	}
+}
+
+void movinv32(int iter, ulong p1, ulong lb, ulong hb, int sval, int off,int me)
+{
+	int i, j, k=0, n=0, done;
+	ulong *p, *pe, *start, *end, chunk, pat = 0, p3;
+
+	p3 = sval << 31;
+	/* Display the current pattern */
+	if (mstr_cpu == me) hprint(LINE_PAT, COL_PAT, p1);
+
+	/* Initialize memory with the initial pattern.  */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    /* Force chunk to be a multiple of 64 */ 
+		    chunk = (chunk + 63) & 0xffffff80;
+		    start = v->map[j].start+(chunk*me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+			end = v->map[j].end;
+		    } else {
+		    	end = start + chunk - 1;
+		    }
+		}
+		pe = start;
+		p = start;
+		done = 0;
+		k = off;
+		pat = p1;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+			/* Do a SPINSZ section of memory */
+/* Original C code replaced with hand tuned assembly code
+ *			while (p <= pe) {
+ *				*p = pat;
+ *				if (++k >= 32) {
+ *					pat = lb;
+ *					k = 0;
+ *				} else {
+ *					pat = pat << 1;
+ *					pat |= sval;
+ *				}
+ *				p++;
+ *			}
+ */
+			asm __volatile__ (
+                                "jmp L20\n\t"
+                                ".p2align 4,,7\n\t"
+                                "L923:\n\t"
+                                "addl $4,%%edi\n\t"
+                                "L20:\n\t"
+                                "movl %%ecx,(%%edi)\n\t"
+                                "addl $1,%%ebx\n\t"
+                                "cmpl $32,%%ebx\n\t"
+                                "jne L21\n\t"
+                                "movl %%esi,%%ecx\n\t"
+                                "xorl %%ebx,%%ebx\n\t"
+                                "jmp L22\n"
+                                "L21:\n\t"
+                                "shll $1,%%ecx\n\t"
+                                "orl %%eax,%%ecx\n\t"
+                                "L22:\n\t"
+                                "cmpl %%edx,%%edi\n\t"
+                                "jb L923\n\t"
+                                : "=b" (k), "=c" (pat)
+                                : "D" (p),"d" (pe),"b" (k),"c" (pat),
+                                        "a" (sval), "S" (lb)
+			);
+			p = pe + 1;
+		} while (!done);
+	}
+
+	/* Do moving inversions test. Check for initial pattern and then
+	 * write the complement for each memory location. Test from bottom
+	 * up and then from the top down.  */
+	for (i=0; i<iter; i++) {
+		for (j=0; j<segs; j++) {
+			if (run_cpus == 1) {
+			    start = v->map[j].start;
+			    end = v->map[j].end;
+			} else {
+			    chunk = v->map[j].end - v->map[j].start;
+			    chunk /= run_cpus;
+			    /* Force chunk to be a multiple of 64 */ 
+			    chunk = (chunk + 63) & 0xffffff80;
+			    start = v->map[j].start+(chunk*me);
+			    /* Set end addrs for the highest CPU num to the
+			     * end of the segment for rounding errors */
+			    if (me == mstr_cpu) {
+				end = v->map[j].end;
+			    } else {
+			    	end = start + chunk - 1;
+			    }
+			}
+			pe = start;
+			p = start;
+			done = 0;
+			k = off;
+			pat = p1;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for overflow */
+				if (pe + SPINSZ > pe && pe != 0) {
+					pe += SPINSZ;
+				} else {
+					pe = end;
+				}
+				if (pe >= end) {
+					pe = end;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code
+ *				while (1) {
+ *					if ((bad=*p) != pat) {
+ *						error((ulong*)p, pat, bad);
+ *					}
+ *					*p = ~pat;
+ *					if (p >= pe) break;
+ *					p++;
+ *
+ *					if (++k >= 32) {
+ *						pat = lb;
+ *						k = 0;
+ *					} else {
+ *						pat = pat << 1;
+ *						pat |= sval;
+ *					}
+ *				}
+ */
+				asm __volatile__ (
+                                        "pushl %%ebp\n\t"
+                                        "jmp L30\n\t"
+                                        ".p2align 4,,7\n\t"
+                                        "L930:\n\t"
+                                        "addl $4,%%edi\n\t"
+                                        "L30:\n\t"
+                                        "movl (%%edi),%%ebp\n\t"
+                                        "cmpl %%ecx,%%ebp\n\t"
+                                        "jne L34\n\t"
+
+                                        "L35:\n\t"
+                                        "notl %%ecx\n\t"
+                                        "movl %%ecx,(%%edi)\n\t"
+                                        "notl %%ecx\n\t"
+                                        "incl %%ebx\n\t"
+                                        "cmpl $32,%%ebx\n\t"
+                                        "jne L31\n\t"
+                                        "movl %%esi,%%ecx\n\t"
+                                        "xorl %%ebx,%%ebx\n\t"
+                                        "jmp L32\n"
+                                        "L31:\n\t"
+                                        "shll $1,%%ecx\n\t"
+                                        "orl %%eax,%%ecx\n\t"
+					"L32:\n\t"
+                                        "cmpl %%edx,%%edi\n\t"
+                                        "jb L930\n\t"
+                                        "jmp L33\n\t"
+
+                                        "L34:\n\t" \
+                                        "pushl %%esi\n\t"
+                                        "pushl %%eax\n\t"
+                                        "pushl %%ebx\n\t"
+                                        "pushl %%edx\n\t"
+                                        "pushl %%ebp\n\t"
+                                        "pushl %%ecx\n\t"
+                                        "pushl %%edi\n\t"
+                                        "call error\n\t"
+                                        "popl %%edi\n\t"
+                                        "popl %%ecx\n\t"
+                                        "popl %%ebp\n\t"
+                                        "popl %%edx\n\t"
+                                        "popl %%ebx\n\t"
+                                        "popl %%eax\n\t"
+                                        "popl %%esi\n\t"
+                                        "jmp L35\n"
+
+                                        "L33:\n\t"
+                                        "popl %%ebp\n\t"
+                                        : "=b" (k),"=c" (pat)
+                                        : "D" (p),"d" (pe),"b" (k),"c" (pat),
+                                                "a" (sval), "S" (lb)
+				);
+				p = pe + 1;
+			} while (!done);
+		}
+
+                if (--k < 0) {
+                        k = 31;
+                }
+                for (pat = lb, n = 0; n < k; n++) {
+                        pat = pat << 1;
+                        pat |= sval;
+                }
+		k++;
+
+		for (j=segs-1; j>=0; j--) {
+			if (run_cpus == 1) {
+			    start = v->map[j].start;
+			    end = v->map[j].end;
+			} else {
+			    chunk = v->map[j].end - v->map[j].start;
+			    chunk /= run_cpus;
+			    /* Force chunk to be a multiple of 64 */ 
+			    chunk = (chunk + 63) & 0xffffff80;
+			    start = v->map[j].start+(chunk*me);
+			    /* Set end addrs for the highest CPU num to the
+			     * end of the segment for rounding errors */
+			    if (me == mstr_cpu) {
+				end = v->map[j].end;
+			    } else {
+			    	end = start + chunk - 1;
+			    }
+			}
+			p = end;
+			pe = end;
+			done = 0;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for underflow */
+                                if (pe - SPINSZ < pe && pe != 0) {
+                                        pe -= SPINSZ;
+                                } else {
+                                        pe = start;
+					done++;
+                                }
+				/* We need this redundant check because we are
+				 * using unsigned longs for the address.
+				 */
+				if (pe < start || pe > end) {
+					pe = start;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code
+ *				while(1) {
+ *					if ((bad=*p) != ~pat) {
+ *						error((ulong*)p, ~pat, bad);
+ *					}
+ *					*p = pat;
+					if (p >= pe) break;
+					p++;
+ *					if (--k <= 0) {
+ *						pat = hb;
+ *						k = 32;
+ *					} else {
+ *						pat = pat >> 1;
+ *						pat |= p3;
+ *					}
+ *				};
+ */
+				asm __volatile__ (
+                                        "pushl %%ebp\n\t"
+                                        "jmp L40\n\t"
+                                        ".p2align 4,,7\n\t"
+                                        "L49:\n\t"
+                                        "subl $4,%%edi\n\t"
+                                        "L40:\n\t"
+                                        "movl (%%edi),%%ebp\n\t"
+                                        "notl %%ecx\n\t"
+                                        "cmpl %%ecx,%%ebp\n\t"
+                                        "jne L44\n\t"
+
+                                        "L45:\n\t"
+                                        "notl %%ecx\n\t"
+                                        "movl %%ecx,(%%edi)\n\t"
+                                        "decl %%ebx\n\t"
+                                        "cmpl $0,%%ebx\n\t"
+                                        "jg L41\n\t"
+                                        "movl %%esi,%%ecx\n\t"
+                                        "movl $32,%%ebx\n\t"
+                                        "jmp L42\n"
+                                        "L41:\n\t"
+                                        "shrl $1,%%ecx\n\t"
+                                        "orl %%eax,%%ecx\n\t"
+					"L42:\n\t"
+                                        "cmpl %%edx,%%edi\n\t"
+                                        "ja L49\n\t"
+                                        "jmp L43\n\t"
+
+                                        "L44:\n\t" \
+                                        "pushl %%esi\n\t"
+                                        "pushl %%eax\n\t"
+                                        "pushl %%ebx\n\t"
+                                        "pushl %%edx\n\t"
+                                        "pushl %%ebp\n\t"
+                                        "pushl %%ecx\n\t"
+                                        "pushl %%edi\n\t"
+                                        "call error\n\t"
+                                        "popl %%edi\n\t"
+                                        "popl %%ecx\n\t"
+                                        "popl %%ebp\n\t"
+                                        "popl %%edx\n\t"
+                                        "popl %%ebx\n\t"
+                                        "popl %%eax\n\t"
+                                        "popl %%esi\n\t"
+                                        "jmp L45\n"
+
+                                        "L43:\n\t"
+                                        "popl %%ebp\n\t"
+                                        : "=b" (k), "=c" (pat)
+                                        : "D" (p),"d" (pe),"b" (k),"c" (pat),
+                                                "a" (p3), "S" (hb)
+				);
+				p = pe - 1;
+			} while (!done);
+		}
+	}
+}
+
+/*
+ * Test all of memory using modulo X access pattern.
+ */
+void modtst(int offset, int iter, ulong p1, ulong p2, int me)
+{
+	int j, k, l, done;
+	ulong *p;
+	ulong *pe;
+	ulong *start, *end, chunk;
+
+	/* Display the current pattern */
+        if (mstr_cpu == me) {
+		hprint(LINE_PAT, COL_PAT-2, p1);
+		cprint(LINE_PAT, COL_PAT+6, "-");
+       		dprint(LINE_PAT, COL_PAT+7, offset, 2, 1);
+	}
+
+	/* Write every nth location with pattern */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    chunk++;
+		    start = v->map[j].start+(chunk*me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+			end = v->map[j].end;
+		    } else {
+		    	end = start + chunk - 1;
+		    }
+		}
+		end -= MOD_SZ;	/* adjust the ending address */
+		pe = (ulong *)start;
+		p = start+offset;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+/* Original C code replaced with hand tuned assembly code
+ *			for (; p <= pe; p += MOD_SZ) {
+ *				*p = p1;
+ *			}
+ */
+			asm __volatile__ (
+				"jmp L60\n\t" \
+				".p2align 4,,7\n\t" \
+
+				"L60:\n\t" \
+				"movl %%eax,(%%edi)\n\t" \
+				"addl $80,%%edi\n\t" \
+				"cmpl %%edx,%%edi\n\t" \
+				"jb L60\n\t" \
+				: "=D" (p)
+				: "D" (p), "d" (pe), "a" (p1)
+			);
+		} while (!done);
+	}
+
+	/* Write the rest of memory "iter" times with the pattern complement */
+	for (l=0; l<iter; l++) {
+		for (j=0; j<segs; j++) {
+			if (run_cpus == 1) {
+			    start = v->map[j].start;
+			    end = v->map[j].end;
+			} else {
+			    chunk = v->map[j].end - v->map[j].start;
+			    chunk /= run_cpus;
+			    chunk++;
+			    start = v->map[j].start+(chunk*me);
+			    /* Set end addrs for the highest CPU num to the
+			     * end of the segment for rounding errors */
+			    if (me == mstr_cpu) {
+				end = v->map[j].end;
+			    } else {
+			    	end = start + chunk - 1;
+			    }
+			}
+			pe = (ulong *)start;
+			p = start;
+			done = 0;
+			k = 0;
+			do {
+				do_tick(me);
+				BAILR
+
+				/* Check for overflow */
+				if (pe + SPINSZ > pe && pe != 0) {
+					pe += SPINSZ;
+				} else {
+					pe = end;
+				}
+				if (pe >= end) {
+					pe = end;
+					done++;
+				}
+				if (p == pe ) {
+					break;
+				}
+/* Original C code replaced with hand tuned assembly code
+ *				for (; p <= pe; p++) {
+ *					if (k != offset) {
+ *						*p = p2;
+ *					}
+ *					if (++k > MOD_SZ-1) {
+ *						k = 0;
+ *					}
+ *				}
+ */
+				asm __volatile__ (
+					"jmp L50\n\t" \
+					".p2align 4,,7\n\t" \
+
+					"L54:\n\t" \
+					"addl $4,%%edi\n\t" \
+					"L50:\n\t" \
+					"cmpl %%ebx,%%ecx\n\t" \
+					"je L52\n\t" \
+					  "movl %%eax,(%%edi)\n\t" \
+					"L52:\n\t" \
+					"incl %%ebx\n\t" \
+					"cmpl $19,%%ebx\n\t" \
+					"jle L53\n\t" \
+					  "xorl %%ebx,%%ebx\n\t" \
+					"L53:\n\t" \
+					"cmpl %%edx,%%edi\n\t" \
+					"jb L54\n\t" \
+					: "=b" (k)
+					: "D" (p), "d" (pe), "a" (p2),
+						"b" (k), "c" (offset)
+				);
+				p = pe + 1;
+			} while (!done);
+		}
+	}
+
+	/* Now check every nth location */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    chunk++;
+		    start = v->map[j].start+(chunk*me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+			end = v->map[j].end;
+		    } else {
+		    	end = start + chunk - 1;
+		    }
+		}
+		pe = (ulong *)start;
+		p = start+offset;
+		done = 0;
+		end -= MOD_SZ;	/* adjust the ending address */
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+/* Original C code replaced with hand tuned assembly code
+ *			for (; p <= pe; p += MOD_SZ) {
+ *				if ((bad=*p) != p1) {
+ *					error((ulong*)p, p1, bad);
+ *				}
+ *			}
+ */
+			asm __volatile__ (
+				"jmp L70\n\t" \
+				".p2align 4,,7\n\t" \
+
+				"L70:\n\t" \
+				"movl (%%edi),%%ecx\n\t" \
+				"cmpl %%eax,%%ecx\n\t" \
+				"jne L71\n\t" \
+				"L72:\n\t" \
+				"addl $80,%%edi\n\t" \
+				"cmpl %%edx,%%edi\n\t" \
+				"jb L70\n\t" \
+				"jmp L73\n\t" \
+
+				"L71:\n\t" \
+				"pushl %%edx\n\t"
+				"pushl %%ecx\n\t"
+				"pushl %%eax\n\t"
+				"pushl %%edi\n\t"
+				"call error\n\t"
+				"popl %%edi\n\t"
+				"popl %%eax\n\t"
+				"popl %%ecx\n\t"
+				"popl %%edx\n\t"
+				"jmp L72\n"
+
+				"L73:\n\t" \
+				: "=D" (p)
+				: "D" (p), "d" (pe), "a" (p1)
+				: "ecx"
+			);
+		} while (!done);
+	}
+}
+
+/*
+ * Test memory using block moves 
+ * Adapted from Robert Redelmeier's burnBX test
+ */
+void block_move(int iter, int me)
+{
+	int i, j, done;
+	ulong len;
+	ulong *p, *pe, pp;
+	ulong *start, *end, chunk;
+
+        cprint(LINE_PAT, COL_PAT-2, "          ");
+
+	/* Initialize memory with the initial pattern.  */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    /* Force chunk to be a multiple of 64 */ 
+		    chunk = (chunk + 63) & 0xffffff80;
+		    start = v->map[j].start + (chunk * me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+		        end = v->map[j].end;
+		    } else {
+		        end = start + chunk - 1;
+		    }
+		}
+#ifdef USB_WAR
+		/* We can't do the block move test on low memory because
+		 * BIOS USB support clobbers location 0x410 and 0x4e0
+		 */
+		if (start < (ulong *)0x500) {
+			start = (ulong *)0x500;
+		}
+#endif
+		pe = start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+			len  = ((ulong)pe - (ulong)p) / 64;
+			len++;
+			asm __volatile__ (
+				"jmp L100\n\t"
+
+				".p2align 4,,7\n\t"
+				"L100:\n\t"
+				"movl %%eax, %%edx\n\t"
+				"notl %%edx\n\t"
+				"movl %%eax,0(%%edi)\n\t"
+				"movl %%eax,4(%%edi)\n\t"
+				"movl %%eax,8(%%edi)\n\t"
+				"movl %%eax,12(%%edi)\n\t"
+				"movl %%edx,16(%%edi)\n\t"
+				"movl %%edx,20(%%edi)\n\t"
+				"movl %%eax,24(%%edi)\n\t"
+				"movl %%eax,28(%%edi)\n\t"
+				"movl %%eax,32(%%edi)\n\t"
+				"movl %%eax,36(%%edi)\n\t"
+				"movl %%edx,40(%%edi)\n\t"
+				"movl %%edx,44(%%edi)\n\t"
+				"movl %%eax,48(%%edi)\n\t"
+				"movl %%eax,52(%%edi)\n\t"
+				"movl %%edx,56(%%edi)\n\t"
+				"movl %%edx,60(%%edi)\n\t"
+				"rcll $1, %%eax\n\t"
+				"leal 64(%%edi), %%edi\n\t"
+				"decl %%ecx\n\t"
+				"jnz  L100\n\t"
+				: "=D" (p)
+				: "D" (p), "c" (len), "a" (1)
+				: "edx"
+			);
+		} while (!done);
+	}
+	s_barrier();
+
+	/* Now move the data around 
+	 * First move the data up half of the segment size we are testing
+	 * Then move the data to the original location + 32 bytes
+	 */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    /* Force chunk to be a multiple of 64 */ 
+		    chunk = (chunk + 63) & 0xffffff80;
+		    start = v->map[j].start + (chunk * me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+		        end = v->map[j].end;
+		    } else {
+		        end = start + chunk - 1;
+		    }
+		}
+#ifdef USB_WAR
+		/* We can't do the block move test on low memory beacuase
+		 * BIOS USB support clobbers location 0x410 and 0x4e0
+		 */
+		if (start < (ulong *)0x500) {
+			start = (ulong *)0x500;
+		}
+#endif
+		pe = start;
+		p = start;
+		done = 0;
+		do {
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = (ulong *)((ulong)end & 0xfffffff0);
+			}
+			if (pe >= end) {
+				pe = (ulong *)((ulong)end & 0xfffffff0);
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+			pp = (ulong)p + (((ulong)pe - (ulong)p) / 2);
+			len  = ((ulong)pe - (ulong)p) / 8;
+			for(i=0; i<iter; i++) {
+				do_tick(me);
+				BAILR
+				asm __volatile__ (
+					"cld\n"
+					"jmp L110\n\t"
+
+					".p2align 4,,7\n\t"
+					"L110:\n\t"
+					"movl %1,%%edi\n\t"
+					"movl %0,%%esi\n\t"
+					"movl %2,%%ecx\n\t"
+					"rep\n\t"
+					"movsl\n\t"
+					"movl %0,%%edi\n\t"
+					"addl $32,%%edi\n\t"
+					"movl %1,%%esi\n\t"
+					"movl %2,%%ecx\n\t"
+					"subl $8,%%ecx\n\t"
+					"rep\n\t"
+					"movsl\n\t"
+					"movl %0,%%edi\n\t"
+					"movl $8,%%ecx\n\t"
+					"rep\n\t"
+					"movsl\n\t"
+					:: "g" (p), "g" (pp), "g" (len)
+					: "edi", "esi", "ecx"
+				);
+			}
+			p = pe;
+		} while (!done);
+	}
+	s_barrier();
+
+	/* Now check the data 
+	 * The error checking is rather crude.  We just check that the
+	 * adjacent words are the same.
+	 */
+	for (j=0; j<segs; j++) {
+		if (run_cpus == 1) {
+		    start = v->map[j].start;
+		    end = v->map[j].end;
+		} else {
+		    chunk = v->map[j].end - v->map[j].start;
+		    chunk /= run_cpus;
+		    /* Force chunk to be a multiple of 64 */ 
+		    chunk = (chunk + 63) & 0xffffff80;
+		    start = v->map[j].start + (chunk * me);
+		    /* Set end addrs for the highest CPU num to the
+		     * end of the segment for rounding errors */
+		    if (me == mstr_cpu) {
+		        end = v->map[j].end;
+		    } else {
+		        end = start + chunk - 1;
+		    }
+		}
+#ifdef USB_WAR
+		/* We can't do the block move test on low memory beacuase
+		 * BIOS USB support clobbers location 0x4e0 and 0x410
+		 */
+		if (start < (ulong *)0x500) {
+			start = (ulong *)0x500;
+		}
+#endif
+		pe = start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+			pe--;	/* adjust the end since we are testing pe+1 */
+			asm __volatile__ (
+				"jmp L120\n\t"
+
+				".p2align 4,,7\n\t"
+				"L124:\n\t"
+				"addl $8,%%edi\n\t"
+				"L120:\n\t"
+				"movl (%%edi),%%ecx\n\t"
+				"cmpl 4(%%edi),%%ecx\n\t"
+				"jnz L121\n\t"
+
+				"L122:\n\t"
+				"cmpl %%edx,%%edi\n\t"
+				"jb L124\n"
+				"jmp L123\n\t"
+
+				"L121:\n\t"
+				"pushl %%edx\n\t"
+				"pushl 4(%%edi)\n\t"
+				"pushl %%ecx\n\t"
+				"pushl %%edi\n\t"
+				"call error\n\t"
+				"popl %%edi\n\t"
+				"addl $8,%%esp\n\t"
+				"popl %%edx\n\t"
+				"jmp L122\n"
+				"L123:\n\t"
+				: "=D" (p)
+				: "D" (p), "d" (pe)
+				: "ecx"
+			);
+		} while (!done);
+	}
+}
+
+/*
+ * Test memory for bit fade, fill memory with pattern.
+ */
+void bit_fade_fill(ulong p1, int me)
+{
+	int j, done;
+	ulong *p, *pe;
+	ulong *start,*end;
+
+	/* Display the current pattern */
+	hprint(LINE_PAT, COL_PAT, p1);
+
+	/* Initialize memory with the initial pattern.  */
+	for (j=0; j<segs; j++) {
+		start = v->map[j].start;
+		end = v->map[j].end;
+		pe = (ulong *)start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+ 			for (; p < pe;) {
+				*p = p1;
+				p++;
+			}
+			p = pe + 1;
+		} while (!done);
+	}
+}
+
+void bit_fade_chk(ulong p1, int me)
+{
+	int j, done;
+	ulong *p, *pe, bad;
+	ulong *start,*end;
+
+	/* Make sure that nothing changed while sleeping */
+	for (j=0; j<segs; j++) {
+		start = v->map[j].start;
+		end = v->map[j].end;
+		pe = (ulong *)start;
+		p = start;
+		done = 0;
+		do {
+			do_tick(me);
+			BAILR
+
+			/* Check for overflow */
+			if (pe + SPINSZ > pe && pe != 0) {
+				pe += SPINSZ;
+			} else {
+				pe = end;
+			}
+			if (pe >= end) {
+				pe = end;
+				done++;
+			}
+			if (p == pe ) {
+				break;
+			}
+ 			for (; p < pe;) {
+ 				if ((bad=*p) != p1) {
+					error((ulong*)p, p1, bad);
+				}
+				p++;
+			}
+			p = pe + 1;
+		} while (!done);
+	}
+}
+
+/* Sleep for N seconds */
+void sleep(long n, int flag, int me)
+{
+	ulong sh, sl, l, h, t, ip=0;
+
+	/* save the starting time */
+	asm __volatile__(
+		"rdtsc":"=a" (sl),"=d" (sh));
+
+	/* loop for n seconds */
+	while (1) {
+		asm __volatile__(
+			"rep ; nop\n\t"
+			"rdtsc":"=a" (l),"=d" (h));
+		asm __volatile__ (
+			"subl %2,%0\n\t"
+			"sbbl %3,%1"
+			:"=a" (l), "=d" (h)
+			:"g" (sl), "g" (sh),
+			"0" (l), "1" (h));
+		t = h * ((unsigned)0xffffffff / v->clks_msec) / 1000;
+		t += (l / v->clks_msec) / 1000;
+
+		/* Is the time up? */
+		if (t >= n) {
+			break;
+		}
+
+		/* Only display elapsed time if flag is set */
+		if (flag == 0) {
+			continue;
+		}
+
+		if (t != ip) {
+			do_tick(me);
+			BAILR
+			ip = t;
+		}
+	}
+}
diff --git a/test.h b/test.h
new file mode 100644
index 0000000..967f1f2
--- /dev/null
+++ b/test.h
@@ -0,0 +1,289 @@
+/* test.h - MemTest-86  Version 3.4
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+
+#ifndef _TEST_H_
+#define _TEST_H_
+#define E88     0x00
+#define E801    0x04
+#define E820NR  0x08           /* # entries in E820MAP */
+#define E820MAP 0x0c           /* our map */
+#define E820MAX 32             /* number of entries in E820MAP */
+#define E820ENTRY_SIZE 20
+#define MEMINFO_SIZE 0x28c
+
+#ifndef __ASSEMBLY__
+
+#define E820_RAM        1
+#define E820_RESERVED   2
+#define E820_ACPI       3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS        4
+
+struct e820entry {
+        unsigned long long addr;        /* start of memory segment */
+        unsigned long long size;        /* size of memory segment */
+        unsigned long type;             /* type of memory segment */
+};
+
+struct mem_info_t {
+	unsigned long e88_mem_k;	/* 0x00 */
+	unsigned long e801_mem_k;	/* 0x04 */
+	unsigned long e820_nr;		/* 0x08 */
+	struct e820entry e820[E820MAX];	/* 0x0c */
+					/* 0x28c */
+};
+
+typedef unsigned long ulong;
+#define SPINSZ		0x4000000	/* 256 MB */
+#define MOD_SZ		20
+#define BAILOUT		if (bail) return(1);
+#define BAILR		if (bail) return;
+
+#define RES_START	0xa0000
+#define RES_END		0x100000
+#define SCREEN_ADR	0xb8000
+#define SCREEN_END_ADR  (SCREEN_ADR + 80*25*2)
+
+#define TITLE_WIDTH	28
+#define LINE_TST	3
+#define LINE_RANGE	4
+#define LINE_PAT        5
+#define LINE_STATUS	8
+#define LINE_INFO	10
+#define LINE_HEADER	12
+#define LINE_SCROLL	14
+#define LINE_MSG	18
+#define COL_INF1        15
+#define COL_INF2        32
+#define COL_INF3        51
+#define COL_INF4        70
+#define COL_MID		30
+#define COL_PAT		41
+#define BAR_SIZE	(78-COL_MID-9)
+#define COL_MSG		18
+
+#define POP_W	34
+#define POP_H	15
+#define POP_X	11
+#define POP_Y	8
+#define POP2_W  74
+#define POP2_H  21
+#define POP2_X  3
+#define POP2_Y  2
+
+/* CPU mode types */
+#define CPM_ALL    1
+#define CPM_RROBIN 2
+#define CPM_SEQ    3
+
+/* memspeed operations */
+#define MS_COPY		1
+#define MS_WRITE	2
+#define MS_READ		3
+
+#define SZ_MODE_BIOS		1
+#define SZ_MODE_PROBE		2
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+int memcmp(const void *s1, const void *s2, ulong count);
+void *memmove(void *dest, const void *src, ulong n);
+int strncmp(const char *s1, const char *s2, ulong n);
+int query_linuxbios(void);
+int query_pcbios(void);
+int insertaddress(ulong);
+void printpatn(void);
+void printpatn(void);
+void itoa(char s[], int n); 
+void reverse(char *p);
+void serial_console_setup(char *param);
+void serial_echo_init(void);
+void serial_echo_print(const char *s);
+void ttyprint(int y, int x, const char *s);
+void ttyprintc(int y, int x, char c);
+void cprint(int y,int x, const char *s);
+void cplace(int y,int x, const char s);
+void hprint(int y,int x, ulong val);
+void hprint2(int y,int x, ulong val, int len);
+void hprint3(int y,int x, ulong val, int len);
+void xprint(int y,int x,ulong val);
+void aprint(int y,int x,ulong page);
+void dprint(int y,int x,ulong val,int len, int right);
+void movinv1(int iter, ulong p1, ulong p2, int cpu);
+void movinvr(int cpu);
+void movinv32(int iter, ulong p1, ulong lb, ulong mb, int sval, int off,
+	int cpu);
+void modtst(int off, int iter, ulong p1, ulong p2, int cpu);
+void error(ulong* adr, ulong good, ulong bad);
+void ad_err1(ulong *adr1, ulong *adr2, ulong good, ulong bad);
+void ad_err2(ulong *adr, ulong bad);
+void do_tick();
+void init(void);
+struct eregs;
+void inter(struct eregs *trap_regs);
+void set_cache(int val);
+void check_input(void);
+void footer(void);
+void scroll(void);
+void clear_scroll(void);
+void popup(void);
+void popdown(void);
+void popclear(void);
+void pop2up(void);
+void pop2down(void);
+void pop2clear(void);
+void get_config(void);
+void get_menu(void);
+void get_printmode(void);
+void addr_tst1(int cpu);
+void addr_tst2(int cpu);
+void sleep(long sec, int flag, int cpu);
+void block_move(int iter, int cpu);
+void find_ticks(void);
+void print_err(ulong *adr, ulong good, ulong bad, ulong xor);
+void print_ecc_err(ulong page, ulong offset, int corrected, 
+	unsigned short syndrome, int channel);
+void mem_size(void);
+void adj_mem(void);
+ulong getval(int x, int y, int result_shift);
+int get_key(void);
+int ascii_to_keycode(int in);
+void wait_keyup(void);
+void print_hdr(void);
+void restart(void);
+void parity_err(ulong edi, ulong esi);
+void start_config(void);
+void clear_screen(void);
+void paging_off(void);
+void show_spd(void);
+int map_page(unsigned long page);
+void *mapping(unsigned long page_address);
+void *emapping(unsigned long page_address);
+ulong memspeed(ulong src, ulong len, int iter);
+unsigned long page_of(void *ptr);
+ulong correct_tsc(ulong el_org);
+void bit_fade_fill(unsigned long n, int cpu);
+void bit_fade_chk(unsigned long n, int cpu);
+void find_ticks_for_pass(void);
+
+#define PRINTMODE_SUMMARY   0
+#define PRINTMODE_ADDRESSES 1
+#define PRINTMODE_PATTERNS  2
+#define PRINTMODE_NONE      3
+
+#define BADRAM_MAXPATNS 10
+
+struct pair {
+       ulong adr;
+       ulong mask;
+};
+
+static inline void cache_off(void)
+{
+        asm(
+		"push %eax\n\t"
+		"movl %cr0,%eax\n\t"
+                "orl $0x40000000,%eax\n\t"  /* Set CD */
+                "movl %eax,%cr0\n\t"
+		"wbinvd\n\t"
+		"pop  %eax\n\t");
+}
+
+static inline void cache_on(void)
+{
+        asm(
+		"push %eax\n\t"
+		"movl %cr0,%eax\n\t"
+                "andl $0x9fffffff,%eax\n\t" /* Clear CD and NW */ 
+                "movl %eax,%cr0\n\t"
+		"pop  %eax\n\t");
+}
+
+struct mmap {
+	ulong pbase_addr;
+	ulong *start;
+	ulong *end;
+};
+
+struct pmap {
+	ulong start;
+	ulong end;
+};
+
+struct tseq {
+	short cpu_sel;
+	short pat;
+	short iter;
+	short errors;
+	char *msg;
+};
+
+struct xadr {
+	ulong page;
+	ulong offset;
+};
+
+struct err_info {
+	struct xadr   low_addr;
+	struct xadr   high_addr;
+	unsigned long ebits;
+	long	      tbits;
+	short         min_bits;
+	short         max_bits;
+	unsigned long maxl;
+	unsigned long eadr;
+        unsigned long exor;
+        unsigned long cor_err;
+	short         hdr_flag;
+};
+
+#define X86_FEATURE_PAE		(0*32+ 6) /* Physical Address Extensions */
+
+#define MAX_MEM_SEGMENTS E820MAX
+
+/* Define common variables accross relocations of memtest86 */
+struct vars {
+	int pass;
+	int msg_line;
+	int ecount;
+	int ecc_ecount;
+	int msegs;
+	int testsel;
+	int scroll_start;
+	int rdtsc;
+	int pae;
+	int pass_ticks;
+	int total_ticks;
+	int pptr;
+	int tptr;
+	struct err_info erri;
+	struct pmap pmap[MAX_MEM_SEGMENTS];
+	volatile struct mmap map[MAX_MEM_SEGMENTS];
+	ulong plim_lower;
+	ulong plim_upper;
+	ulong clks_msec;
+	ulong starth;
+	ulong startl;
+	ulong snaph;
+	ulong snapl;
+	int printmode;
+	int numpatn;
+	struct pair patn [BADRAM_MAXPATNS];
+	ulong test_pages;
+	ulong selected_pages;
+	ulong reserved_pages;
+};
+
+#define FIRMWARE_UNKNOWN   0
+#define FIRMWARE_PCBIOS    1
+#define FIRMWARE_LINUXBIOS 2
+
+extern struct vars * const v;
+extern unsigned char _start[], _end[], startup_32[];
+extern unsigned char _size, _pages;
+
+extern struct mem_info_t mem_info;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _TEST_H_ */
diff --git a/vmem.c b/vmem.c
new file mode 100644
index 0000000..17e717e
--- /dev/null
+++ b/vmem.c
@@ -0,0 +1,139 @@
+/* vmem.c - MemTest-86 
+ *
+ * Virtual memory handling (PAE)
+ *
+ * Released under version 2 of the Gnu Public License.
+ * By Chris Brady
+ */
+#include "test.h"
+
+static unsigned long mapped_win = 1;
+void paging_off(void)
+{
+	if (!v->pae)
+		return;
+	mapped_win = 1;
+	__asm__ __volatile__ (
+		/* Disable paging */
+		"movl %%cr0, %%eax\n\t"
+		"andl $0x7FFFFFFF, %%eax\n\t"
+		"movl %%eax, %%cr0\n\t"
+		/* Disable pae  & pse */
+		"movl %%cr4, %%eax\n\t"
+    		"and $0xCF, %%al\n\t"
+    		"movl %%eax, %%cr4\n\t"
+		:
+		:
+		: "ax"
+		);
+}
+
+static void paging_on(void *pdp)
+{
+	if (!v->pae)
+		return;
+	__asm__ __volatile__(
+		/* Load the page table address */
+		"movl %0, %%cr3\n\t"
+		/* Enable pae */
+		"movl %%cr4, %%eax\n\t"
+		"orl $0x00000020, %%eax\n\t"
+		"movl %%eax, %%cr4\n\t"
+		/* Enable paging */
+		"movl %%cr0, %%eax\n\t"
+		"orl $0x80000000, %%eax\n\t"
+		"movl %%eax, %%cr0\n\t"
+		:
+		: "r" (pdp)
+		: "ax"
+		);
+}
+
+int map_page(unsigned long page)
+{
+	unsigned long i;
+	struct pde {
+		unsigned long addr_lo;
+		unsigned long addr_hi;
+	};
+	extern unsigned char pdp[];
+	extern struct pde pd2[];
+	unsigned long win = page >> 19;
+	if ((win == mapped_win)) {
+		return 0;
+	}
+	/* Less than 2 GB so no mapping is required */
+	if (win == 0) {
+		return 0;
+	}
+	if (!v->pae || (page > 0x1000000)) {
+		/* Fail either we don't have pae support
+		 * or we want an address that is out of bounds (> 64GB)
+		 * even for pae.
+		 */
+		return -1;
+	}
+	/* Compute the page table entries... */
+	for(i = 0; i < 1024; i++) {
+		/*-----------------10/30/2004 12:37PM---------------
+		 * 0xE3 --
+		 * Bit 0 = Present bit.      1 = PDE is present
+		 * Bit 1 = Read/Write.       1 = memory is writable
+		 * Bit 2 = Supervisor/User.  0 = Supervisor only (CPL 0-2)
+		 * Bit 3 = Writethrough.     0 = writeback cache policy
+		 * Bit 4 = Cache Disable.    0 = page level cache enabled
+		 * Bit 5 = Accessed.         1 = memory has been accessed.
+		 * Bit 6 = Dirty.            1 = memory has been written to.
+		 * Bit 7 = Page Size.        1 = page size is 2 MBytes
+		 * --------------------------------------------------*/
+		pd2[i].addr_lo = ((win & 1) << 31) + ((i & 0x3ff) << 21) + 0xE3;
+		pd2[i].addr_hi = (win >> 1);
+	}
+	paging_off();
+	if (win > 1) {
+		paging_on(pdp);
+	}
+	mapped_win = win;
+	return 0;
+}
+
+void *mapping(unsigned long page_addr)
+{
+	void *result;
+	if (page_addr < 0x80000) {
+		/* If the address is less that 1GB directly use the address */
+		result = (void *)(page_addr << 12);
+	}
+	else {
+		unsigned long alias;
+		alias = page_addr & 0x7FFFF;
+		alias += 0x80000;
+		result = (void *)(alias << 12);
+	}
+	return result;
+}
+
+void *emapping(unsigned long page_addr)
+{
+	void *result;
+	result = mapping(page_addr -1);
+	/* Fill in the low address bits */
+	result = ((unsigned char *)result) + 0xffc;
+	return result;
+}
+
+unsigned long page_of(void *addr)
+{
+	unsigned long page;
+	page = ((unsigned long)addr) >> 12;
+	if (page >= 0x80000) {
+		page &= 0x7FFFF;
+		page += mapped_win << 19;
+	}
+#if 0
+	cprint(LINE_SCROLL -2, 0, "page_of(        )->            ");
+	hprint(LINE_SCROLL -2, 8, ((unsigned long)addr));
+	hprint(LINE_SCROLL -2, 20, page);
+#endif	
+	return page;
+}