BUTTERFLY-FIRMWARE: x86: Add optimized memcpy implementation

This patch adds coreboot's speed optimized memcpy()
to u-boot on x86.

BUG=chrome-os-partner:12896
TEST=manual, verified basic firmware functions.
BRANCH=butterfly
Signed-off-by: Stefan Reinauer <reinauer@chromium.org>

Change-Id: Id874e57499f2073821401643d9624f04cc92caef
Original-Change-Id: I5ce04a1330711bcff25726bfbfdcecb193be9a29
Reviewed-on: https://gerrit.chromium.org/gerrit/35770
Reviewed-by: Stefan Reinauer <reinauer@chromium.org>
Commit-Ready: Shawn Nematbakhsh <shawnn@google.com>
Reviewed-by: Shawn Nematbakhsh <shawnn@google.com>
Tested-by: Shawn Nematbakhsh <shawnn@google.com>
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 3aa6c11..0ad612f 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -14,7 +14,7 @@
 #undef __HAVE_ARCH_STRCHR
 extern char * strchr(const char * s, int c);
 
-#undef __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *, const void *, __kernel_size_t);
 
 #undef __HAVE_ARCH_MEMMOVE
diff --git a/arch/x86/lib/string.c b/arch/x86/lib/string.c
index f2ea7e4..cfa0d69 100644
--- a/arch/x86/lib/string.c
+++ b/arch/x86/lib/string.c
@@ -85,3 +85,19 @@
 
 	return dstpp;
 }
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	unsigned long d0, d1, d2;
+
+	asm volatile(
+		"rep ; movsl\n\t"
+		"movl %4,%%ecx\n\t"
+		"rep ; movsb\n\t"
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		: "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
+		: "memory"
+	);
+
+	return dest;
+}