blob: edc5e9ee4b9ef98088c93de09b2414c751259198 [file] [log] [blame]
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UTILS_MEMCOPY_H_
#define V8_UTILS_MEMCOPY_H_
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include "src/base/logging.h"
#include "src/base/macros.h"
#include "src/base/platform/wrappers.h"
namespace v8 {
namespace internal {
using Address = uintptr_t;
// ----------------------------------------------------------------------------
// Generated memcpy/memmove for ia32, arm, and mips.
void init_memcopy_functions();
#if defined(V8_TARGET_ARCH_IA32)
// Limit below which the extra overhead of the MemCopy function is likely
// to outweigh the benefits of faster copying.
const size_t kMinComplexMemCopy = 64;
// Copy memory area. No restrictions.
V8_EXPORT_PRIVATE void MemMove(void* dest, const void* src, size_t size);
using MemMoveFunction = void (*)(void* dest, const void* src, size_t size);
// Keep the distinction of "move" vs. "copy" for the benefit of other
// architectures.
V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
MemMove(dest, src, size);
}
#elif defined(V8_HOST_ARCH_ARM)
using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src,
size_t size);
V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function;
V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src,
size_t chars) {
memcpy(dest, src, chars);
}
// For values < 16, the assembler function is slower than the inlined C code.
const size_t kMinComplexMemCopy = 16;
V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
(*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
reinterpret_cast<const uint8_t*>(src), size);
}
V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
size_t size) {
memmove(dest, src, size);
}
// For values < 12, the assembler function is slower than the inlined C code.
const int kMinComplexConvertMemCopy = 12;
#elif defined(V8_HOST_ARCH_MIPS)
using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src,
size_t size);
V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function;
V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src,
size_t chars) {
memcpy(dest, src, chars);
}
// For values < 16, the assembler function is slower than the inlined C code.
const size_t kMinComplexMemCopy = 16;
V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
(*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
reinterpret_cast<const uint8_t*>(src), size);
}
V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
size_t size) {
memmove(dest, src, size);
}
#else
// Copy memory area to disjoint memory area.
inline void MemCopy(void* dest, const void* src, size_t size) {
// Fast path for small sizes. The compiler will expand the {memcpy} for small
// fixed sizes to a sequence of move instructions. This avoids the overhead of
// the general {memcpy} function.
switch (size) {
#define CASE(N) \
case N: \
memcpy(dest, src, N); \
return;
CASE(1)
CASE(2)
CASE(3)
CASE(4)
CASE(5)
CASE(6)
CASE(7)
CASE(8)
CASE(9)
CASE(10)
CASE(11)
CASE(12)
CASE(13)
CASE(14)
CASE(15)
CASE(16)
#undef CASE
default:
memcpy(dest, src, size);
return;
}
}
V8_EXPORT_PRIVATE inline void MemMove(void* dest, const void* src,
size_t size) {
// Fast path for small sizes. The compiler will expand the {memmove} for small
// fixed sizes to a sequence of move instructions. This avoids the overhead of
// the general {memmove} function.
switch (size) {
#define CASE(N) \
case N: \
memmove(dest, src, N); \
return;
CASE(1)
CASE(2)
CASE(3)
CASE(4)
CASE(5)
CASE(6)
CASE(7)
CASE(8)
CASE(9)
CASE(10)
CASE(11)
CASE(12)
CASE(13)
CASE(14)
CASE(15)
CASE(16)
#undef CASE
default:
memmove(dest, src, size);
return;
}
}
const size_t kMinComplexMemCopy = 8;
#endif // V8_TARGET_ARCH_IA32
// Copies words from |src| to |dst|. The data spans must not overlap.
// |src| and |dst| must be TWord-size aligned.
template <size_t kBlockCopyLimit, typename T>
inline void CopyImpl(T* dst_ptr, const T* src_ptr, size_t count) {
constexpr int kTWordSize = sizeof(T);
#ifdef DEBUG
Address dst = reinterpret_cast<Address>(dst_ptr);
Address src = reinterpret_cast<Address>(src_ptr);
DCHECK(IsAligned(dst, kTWordSize));
DCHECK(IsAligned(src, kTWordSize));
DCHECK(((src <= dst) && ((src + count * kTWordSize) <= dst)) ||
((dst <= src) && ((dst + count * kTWordSize) <= src)));
#endif
if (count == 0) return;
// Use block copying MemCopy if the segment we're copying is
// enough to justify the extra call/setup overhead.
if (count < kBlockCopyLimit) {
do {
count--;
*dst_ptr++ = *src_ptr++;
} while (count > 0);
} else {
MemCopy(dst_ptr, src_ptr, count * kTWordSize);
}
}
// Copies kSystemPointerSize-sized words from |src| to |dst|. The data spans
// must not overlap. |src| and |dst| must be kSystemPointerSize-aligned.
inline void CopyWords(Address dst, const Address src, size_t num_words) {
static const size_t kBlockCopyLimit = 16;
CopyImpl<kBlockCopyLimit>(reinterpret_cast<Address*>(dst),
reinterpret_cast<const Address*>(src), num_words);
}
// Copies data from |src| to |dst|. The data spans must not overlap.
template <typename T>
inline void CopyBytes(T* dst, const T* src, size_t num_bytes) {
STATIC_ASSERT(sizeof(T) == 1);
if (num_bytes == 0) return;
CopyImpl<kMinComplexMemCopy>(dst, src, num_bytes);
}
inline void MemsetUint32(uint32_t* dest, uint32_t value, size_t counter) {
#if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
#define STOS "stosl"
#endif
#if defined(MEMORY_SANITIZER)
// MemorySanitizer does not understand inline assembly.
#undef STOS
#endif
#if defined(__GNUC__) && defined(STOS)
asm volatile(
"cld;"
"rep ; " STOS
: "+&c"(counter), "+&D"(dest)
: "a"(value)
: "memory", "cc");
#else
for (size_t i = 0; i < counter; i++) {
dest[i] = value;
}
#endif
#undef STOS
}
inline void MemsetPointer(Address* dest, Address value, size_t counter) {
#if V8_HOST_ARCH_IA32
#define STOS "stosl"
#elif V8_HOST_ARCH_X64
#define STOS "stosq"
#endif
#if defined(MEMORY_SANITIZER)
// MemorySanitizer does not understand inline assembly.
#undef STOS
#endif
#if defined(__GNUC__) && defined(STOS)
asm volatile(
"cld;"
"rep ; " STOS
: "+&c"(counter), "+&D"(dest)
: "a"(value)
: "memory", "cc");
#else
for (size_t i = 0; i < counter; i++) {
dest[i] = value;
}
#endif
#undef STOS
}
template <typename T, typename U>
inline void MemsetPointer(T** dest, U* value, size_t counter) {
#ifdef DEBUG
T* a = nullptr;
U* b = nullptr;
a = b; // Fake assignment to check assignability.
USE(a);
#endif // DEBUG
MemsetPointer(reinterpret_cast<Address*>(dest),
reinterpret_cast<Address>(value), counter);
}
template <typename T>
inline void MemsetPointer(T** dest, std::nullptr_t, size_t counter) {
MemsetPointer(reinterpret_cast<Address*>(dest), Address{0}, counter);
}
// Copy from 8bit/16bit chars to 8bit/16bit chars. Values are zero-extended if
// needed. Ranges are not allowed to overlap.
// The separate declaration is needed for the V8_NONNULL, which is not allowed
// on a definition.
template <typename SrcType, typename DstType>
void CopyChars(DstType* dst, const SrcType* src, size_t count) V8_NONNULL(1, 2);
template <typename SrcType, typename DstType>
void CopyChars(DstType* dst, const SrcType* src, size_t count) {
STATIC_ASSERT(std::is_integral<SrcType>::value);
STATIC_ASSERT(std::is_integral<DstType>::value);
using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type;
using DstTypeUnsigned = typename std::make_unsigned<DstType>::type;
#ifdef DEBUG
// Check for no overlap, otherwise {std::copy_n} cannot be used.
Address src_start = reinterpret_cast<Address>(src);
Address src_end = src_start + count * sizeof(SrcType);
Address dst_start = reinterpret_cast<Address>(dst);
Address dst_end = dst_start + count * sizeof(DstType);
DCHECK(src_end <= dst_start || dst_end <= src_start);
#endif
auto* dst_u = reinterpret_cast<DstTypeUnsigned*>(dst);
auto* src_u = reinterpret_cast<const SrcTypeUnsigned*>(src);
// Especially Atom CPUs profit from this explicit instantiation for small
// counts. This gives up to 20 percent improvement for microbenchmarks such as
// joining an array of small integers (2019-10-16).
switch (count) {
#define CASE(N) \
case N: \
std::copy_n(src_u, N, dst_u); \
return;
CASE(1)
CASE(2)
CASE(3)
CASE(4)
CASE(5)
CASE(6)
CASE(7)
CASE(8)
CASE(9)
CASE(10)
CASE(11)
CASE(12)
CASE(13)
CASE(14)
CASE(15)
CASE(16)
#undef CASE
default:
std::copy_n(src_u, count, dst_u);
return;
}
}
} // namespace internal
} // namespace v8
#endif // V8_UTILS_MEMCOPY_H_