blob: f2c52cb5ee49f44e7e113c49041d7f0ca82baeef [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/cpu.h"
#include <inttypes.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <sstream>
#include <utility>
#include "base/stl_util.h"
#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID) || \
defined(OS_AIX)
#include "base/containers/flat_set.h"
#include "base/files/file_util.h"
#include "base/no_destructor.h"
#include "base/notreached.h"
#include "base/process/internal_linux.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/system/sys_info.h"
#include "base/threading/thread_restrictions.h"
#endif
#if defined(ARCH_CPU_ARM_FAMILY) && \
(defined(OS_ANDROID) || defined(OS_LINUX) || defined(OS_CHROMEOS))
#include "base/files/file_util.h"
#endif
#if defined(ARCH_CPU_X86_FAMILY)
#if defined(COMPILER_MSVC)
#include <intrin.h>
#include <immintrin.h> // For _xgetbv()
#endif
#endif
namespace base {
#if defined(ARCH_CPU_X86_FAMILY)
namespace internal {
std::tuple<int, int, int, int> ComputeX86FamilyAndModel(
const std::string& vendor,
int signature) {
int family = (signature >> 8) & 0xf;
int model = (signature >> 4) & 0xf;
int ext_family = 0;
int ext_model = 0;
// The "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
// specifies the Extended Model is defined only when the Base Family is
// 06h or 0Fh.
// The "AMD CPUID Specification" specifies that the Extended Model is
// defined only when Base Family is 0Fh.
// Both manuals define the display model as
// {ExtendedModel[3:0],BaseModel[3:0]} in that case.
if (family == 0xf || (family == 0x6 && vendor == "GenuineIntel")) {
ext_model = (signature >> 16) & 0xf;
model += ext_model << 4;
}
// Both the "Intel 64 and IA-32 Architectures Developer's Manual: Vol. 2A"
// and the "AMD CPUID Specification" specify that the Extended Family is
// defined only when the Base Family is 0Fh.
// Both manuals define the display family as {0000b,BaseFamily[3:0]} +
// ExtendedFamily[7:0] in that case.
if (family == 0xf) {
ext_family = (signature >> 20) & 0xff;
family += ext_family;
}
return {family, model, ext_family, ext_model};
}
} // namespace internal
#endif // defined(ARCH_CPU_X86_FAMILY)
CPU::CPU()
: signature_(0),
type_(0),
family_(0),
model_(0),
stepping_(0),
ext_model_(0),
ext_family_(0),
has_mmx_(false),
has_sse_(false),
has_sse2_(false),
has_sse3_(false),
has_ssse3_(false),
has_sse41_(false),
has_sse42_(false),
has_popcnt_(false),
has_avx_(false),
has_avx2_(false),
has_aesni_(false),
has_non_stop_time_stamp_counter_(false),
is_running_in_vm_(false),
cpu_vendor_("unknown") {
Initialize();
}
namespace {
#if defined(ARCH_CPU_X86_FAMILY)
#if !defined(COMPILER_MSVC)
#if defined(__pic__) && defined(__i386__)
void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile(
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
}
#else
void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type), "c"(0));
}
#endif
#endif // !defined(COMPILER_MSVC)
// xgetbv returns the value of an Intel Extended Control Register (XCR).
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
uint64_t xgetbv(uint32_t xcr) {
#if defined(COMPILER_MSVC)
return _xgetbv(xcr);
#else
uint32_t eax, edx;
__asm__ volatile (
"xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
#endif // defined(COMPILER_MSVC)
}
#endif // ARCH_CPU_X86_FAMILY
#if defined(ARCH_CPU_ARM_FAMILY) && \
(defined(OS_ANDROID) || defined(OS_LINUX) || defined(OS_CHROMEOS))
std::string* CpuInfoBrand() {
static std::string* brand = []() {
// This function finds the value from /proc/cpuinfo under the key "model
// name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
// and later for arm64) and is shown once per CPU. "Processor" is used in
// earler versions and is shown only once at the top of /proc/cpuinfo
// regardless of the number CPUs.
const char kModelNamePrefix[] = "model name\t: ";
const char kProcessorPrefix[] = "Processor\t: ";
std::string contents;
ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
DCHECK(!contents.empty());
std::istringstream iss(contents);
std::string line;
while (std::getline(iss, line)) {
if (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0)
return new std::string(line.substr(strlen(kModelNamePrefix)));
if (line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)
return new std::string(line.substr(strlen(kProcessorPrefix)));
}
return new std::string();
}();
return brand;
}
#endif // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) ||
// defined(OS_LINUX) || defined(OS_CHROMEOS))
} // namespace
void CPU::Initialize() {
#if defined(ARCH_CPU_X86_FAMILY)
int cpu_info[4] = {-1};
// This array is used to temporarily hold the vendor name and then the brand
// name. Thus it has to be big enough for both use cases. There are
// static_asserts below for each of the use cases to make sure this array is
// big enough.
char cpu_string[sizeof(cpu_info) * 3 + 1];
// __cpuid with an InfoType argument of 0 returns the number of
// valid Ids in CPUInfo[0] and the CPU identification string in
// the other three array elements. The CPU identification string is
// not in linear order. The code below arranges the information
// in a human readable form. The human readable order is CPUInfo[1] |
// CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
// before using memcpy() to copy these three array elements to |cpu_string|.
__cpuid(cpu_info, 0);
int num_ids = cpu_info[0];
std::swap(cpu_info[2], cpu_info[3]);
static constexpr size_t kVendorNameSize = 3 * sizeof(cpu_info[1]);
static_assert(kVendorNameSize < base::size(cpu_string),
"cpu_string too small");
memcpy(cpu_string, &cpu_info[1], kVendorNameSize);
cpu_string[kVendorNameSize] = '\0';
cpu_vendor_ = cpu_string;
// Interpret CPU feature information.
if (num_ids > 0) {
int cpu_info7[4] = {0};
__cpuid(cpu_info, 1);
if (num_ids >= 7) {
__cpuid(cpu_info7, 7);
}
signature_ = cpu_info[0];
stepping_ = cpu_info[0] & 0xf;
type_ = (cpu_info[0] >> 12) & 0x3;
std::tie(family_, model_, ext_family_, ext_model_) =
internal::ComputeX86FamilyAndModel(cpu_vendor_, signature_);
has_mmx_ = (cpu_info[3] & 0x00800000) != 0;
has_sse_ = (cpu_info[3] & 0x02000000) != 0;
has_sse2_ = (cpu_info[3] & 0x04000000) != 0;
has_sse3_ = (cpu_info[2] & 0x00000001) != 0;
has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
has_popcnt_ = (cpu_info[2] & 0x00800000) != 0;
// "Hypervisor Present Bit: Bit 31 of ECX of CPUID leaf 0x1."
// See https://lwn.net/Articles/301888/
// This is checking for any hypervisor. Hypervisors may choose not to
// announce themselves. Hypervisors trap CPUID and sometimes return
// different results to underlying hardware.
is_running_in_vm_ = (cpu_info[2] & 0x80000000) != 0;
// AVX instructions will generate an illegal instruction exception unless
// a) they are supported by the CPU,
// b) XSAVE is supported by the CPU and
// c) XSAVE is enabled by the kernel.
// See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
//
// In addition, we have observed some crashes with the xgetbv instruction
// even after following Intel's example code. (See crbug.com/375968.)
// Because of that, we also test the XSAVE bit because its description in
// the CPUID documentation suggests that it signals xgetbv support.
has_avx_ =
(cpu_info[2] & 0x10000000) != 0 &&
(cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
(cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
(xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
}
// Get the brand string of the cpu.
__cpuid(cpu_info, 0x80000000);
const int max_parameter = cpu_info[0];
static constexpr int kParameterStart = 0x80000002;
static constexpr int kParameterEnd = 0x80000004;
static constexpr int kParameterSize = kParameterEnd - kParameterStart + 1;
static_assert(kParameterSize * sizeof(cpu_info) + 1 == base::size(cpu_string),
"cpu_string has wrong size");
if (max_parameter >= kParameterEnd) {
size_t i = 0;
for (int parameter = kParameterStart; parameter <= kParameterEnd;
++parameter) {
__cpuid(cpu_info, parameter);
memcpy(&cpu_string[i], cpu_info, sizeof(cpu_info));
i += sizeof(cpu_info);
}
cpu_string[i] = '\0';
cpu_brand_ = cpu_string;
}
static constexpr int kParameterContainingNonStopTimeStampCounter = 0x80000007;
if (max_parameter >= kParameterContainingNonStopTimeStampCounter) {
__cpuid(cpu_info, kParameterContainingNonStopTimeStampCounter);
has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
}
if (!has_non_stop_time_stamp_counter_ && is_running_in_vm_) {
int cpu_info_hv[4] = {};
__cpuid(cpu_info_hv, 0x40000000);
if (cpu_info_hv[1] == 0x7263694D && // Micr
cpu_info_hv[2] == 0x666F736F && // osof
cpu_info_hv[3] == 0x76482074) { // t Hv
// If CPUID says we have a variant TSC and a hypervisor has identified
// itself and the hypervisor says it is Microsoft Hyper-V, then treat
// TSC as invariant.
//
// Microsoft Hyper-V hypervisor reports variant TSC as there are some
// scenarios (eg. VM live migration) where the TSC is variant, but for
// our purposes we can treat it as invariant.
has_non_stop_time_stamp_counter_ = true;
}
}
#elif defined(ARCH_CPU_ARM_FAMILY)
#if defined(OS_ANDROID) || defined(OS_LINUX) || defined(OS_CHROMEOS)
cpu_brand_ = *CpuInfoBrand();
#elif defined(OS_WIN)
// Windows makes high-resolution thread timing information available in
// user-space.
has_non_stop_time_stamp_counter_ = true;
#endif
#endif
}
CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
if (has_avx2()) return AVX2;
if (has_avx()) return AVX;
if (has_sse42()) return SSE42;
if (has_sse41()) return SSE41;
if (has_ssse3()) return SSSE3;
if (has_sse3()) return SSE3;
if (has_sse2()) return SSE2;
if (has_sse()) return SSE;
return PENTIUM;
}
#if defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID) || \
defined(OS_AIX)
namespace {
constexpr char kTimeInStatePath[] =
"/sys/devices/system/cpu/cpu%d/cpufreq/stats/time_in_state";
constexpr char kPhysicalPackageIdPath[] =
"/sys/devices/system/cpu/cpu%d/topology/physical_package_id";
constexpr char kCoreIdleStateTimePath[] =
"/sys/devices/system/cpu/cpu%d/cpuidle/state%d/time";
bool SupportsTimeInState() {
// Reading from time_in_state doesn't block (it amounts to reading a struct
// from the cpufreq-stats kernel driver).
ThreadRestrictions::ScopedAllowIO allow_io;
// Check if the time_in_state path for the first core is readable.
FilePath time_in_state_path(StringPrintf(kTimeInStatePath, /*core_index=*/0));
ScopedFILE file_stream(OpenFile(time_in_state_path, "rb"));
return static_cast<bool>(file_stream);
}
bool ParseTimeInState(const std::string& content,
CPU::CoreType core_type,
uint32_t core_index,
CPU::TimeInState& time_in_state) {
const char* begin = content.data();
size_t max_pos = content.size() - 1;
// Example time_in_state content:
// ---
// 300000 1
// 403200 0
// 499200 15
// ---
// Iterate over the individual lines.
for (size_t pos = 0; pos <= max_pos;) {
int num_chars = 0;
// Each line should have two integer fields, frequency (kHz) and time (in
// jiffies), separated by a space, e.g. "2419200 132".
uint64_t frequency;
uint64_t time;
int matches = sscanf(begin + pos, "%" PRIu64 " %" PRIu64 "\n%n", &frequency,
&time, &num_chars);
if (matches != 2)
return false;
// Skip zero-valued entries in the output list (no time spent at this
// frequency).
if (time > 0) {
time_in_state.push_back({core_type, core_index, frequency,
internal::ClockTicksToTimeDelta(time)});
}
// Advance line.
DCHECK_GT(num_chars, 0);
pos += num_chars;
}
return true;
}
bool SupportsCoreIdleTimes() {
// Reading from the cpuidle driver doesn't block.
ThreadRestrictions::ScopedAllowIO allow_io;
// Check if the path for the idle time in state 0 for core 0 is readable.
FilePath idle_state0_path(
StringPrintf(kCoreIdleStateTimePath, /*core_index=*/0, /*idle_state=*/0));
ScopedFILE file_stream(OpenFile(idle_state0_path, "rb"));
return static_cast<bool>(file_stream);
}
std::vector<CPU::CoreType> GuessCoreTypes() {
// Try to guess the CPU architecture and cores of each cluster by comparing
// the maximum frequencies of the available (online and offline) cores.
const char kCPUMaxFreqPath[] =
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq";
int num_cpus = SysInfo::NumberOfProcessors();
std::vector<CPU::CoreType> core_index_to_type(num_cpus,
CPU::CoreType::kUnknown);
std::vector<uint32_t> max_core_frequencies_mhz(num_cpus, 0);
flat_set<uint32_t> frequencies_mhz;
{
// Reading from cpuinfo_max_freq doesn't block (it amounts to reading a
// struct field from the cpufreq kernel driver).
ThreadRestrictions::ScopedAllowIO allow_io;
for (int core_index = 0; core_index < num_cpus; ++core_index) {
std::string content;
uint32_t frequency_khz = 0;
auto path = StringPrintf(kCPUMaxFreqPath, core_index);
if (ReadFileToString(FilePath(path), &content))
StringToUint(content, &frequency_khz);
uint32_t frequency_mhz = frequency_khz / 1000;
max_core_frequencies_mhz[core_index] = frequency_mhz;
if (frequency_mhz > 0)
frequencies_mhz.insert(frequency_mhz);
}
}
size_t num_frequencies = frequencies_mhz.size();
for (int core_index = 0; core_index < num_cpus; ++core_index) {
uint32_t core_frequency_mhz = max_core_frequencies_mhz[core_index];
CPU::CoreType core_type = CPU::CoreType::kOther;
if (num_frequencies == 1u) {
core_type = CPU::CoreType::kSymmetric;
} else if (num_frequencies == 2u || num_frequencies == 3u) {
auto it = frequencies_mhz.find(core_frequency_mhz);
if (it != frequencies_mhz.end()) {
// flat_set is sorted.
size_t frequency_index = it - frequencies_mhz.begin();
switch (frequency_index) {
case 0:
core_type = num_frequencies == 2u
? CPU::CoreType::kBigLittle_Little
: CPU::CoreType::kBigLittleBigger_Little;
break;
case 1:
core_type = num_frequencies == 2u
? CPU::CoreType::kBigLittle_Big
: CPU::CoreType::kBigLittleBigger_Big;
break;
case 2:
DCHECK_EQ(num_frequencies, 3u);
core_type = CPU::CoreType::kBigLittleBigger_Bigger;
break;
default:
NOTREACHED();
break;
}
}
}
core_index_to_type[core_index] = core_type;
}
return core_index_to_type;
}
} // namespace
// static
const std::vector<CPU::CoreType>& CPU::GetGuessedCoreTypes() {
static NoDestructor<std::vector<CoreType>> kCoreTypes(GuessCoreTypes());
return *kCoreTypes.get();
}
// static
bool CPU::GetTimeInState(TimeInState& time_in_state) {
time_in_state.clear();
// The kernel may not support the cpufreq-stats driver.
static const bool kSupportsTimeInState = SupportsTimeInState();
if (!kSupportsTimeInState)
return false;
static const std::vector<CoreType>& kCoreTypes = GetGuessedCoreTypes();
// time_in_state is reported per cluster. Identify the first cores of each
// cluster.
static NoDestructor<std::vector<int>> kFirstCoresIndexes([]() {
std::vector<int> first_cores;
int last_core_package_id = 0;
for (int core_index = 0; core_index < SysInfo::NumberOfProcessors();
core_index++) {
// Reading from physical_package_id doesn't block (it amounts to reading a
// struct field from the kernel).
ThreadRestrictions::ScopedAllowIO allow_io;
FilePath package_id_path(
StringPrintf(kPhysicalPackageIdPath, core_index));
std::string package_id_str;
if (!ReadFileToString(package_id_path, &package_id_str))
return std::vector<int>();
int package_id;
base::StringPiece trimmed = base::TrimWhitespaceASCII(
package_id_str, base::TrimPositions::TRIM_ALL);
if (!base::StringToInt(trimmed, &package_id))
return std::vector<int>();
if (last_core_package_id != package_id || core_index == 0)
first_cores.push_back(core_index);
last_core_package_id = package_id;
}
return first_cores;
}());
if (kFirstCoresIndexes->empty())
return false;
// Reading from time_in_state doesn't block (it amounts to reading a struct
// from the cpufreq-stats kernel driver).
ThreadRestrictions::ScopedAllowIO allow_io;
// Read the time_in_state for each cluster from the /sys directory of the
// cluster's first core.
for (int cluster_core_index : *kFirstCoresIndexes) {
FilePath time_in_state_path(
StringPrintf(kTimeInStatePath, cluster_core_index));
std::string buffer;
if (!ReadFileToString(time_in_state_path, &buffer))
return false;
if (!ParseTimeInState(buffer, kCoreTypes[cluster_core_index],
cluster_core_index, time_in_state)) {
return false;
}
}
return true;
}
// static
bool CPU::GetCumulativeCoreIdleTimes(CoreIdleTimes& idle_times) {
idle_times.clear();
// The kernel may not support the cpufreq-stats driver.
static const bool kSupportsIdleTimes = SupportsCoreIdleTimes();
if (!kSupportsIdleTimes)
return false;
// Reading from the cpuidle driver doesn't block.
ThreadRestrictions::ScopedAllowIO allow_io;
int num_cpus = SysInfo::NumberOfProcessors();
bool success = false;
for (int core_index = 0; core_index < num_cpus; ++core_index) {
std::string content;
TimeDelta idle_time;
// The number of idle states is system/CPU dependent, so we increment and
// try to read each state until we fail.
for (int state_index = 0;; ++state_index) {
auto path = StringPrintf(kCoreIdleStateTimePath, core_index, state_index);
uint64_t idle_state_time = 0;
if (!ReadFileToString(FilePath(path), &content))
break;
StringToUint64(content, &idle_state_time);
idle_time += TimeDelta::FromMicroseconds(idle_state_time);
}
idle_times.push_back(idle_time);
// At least one of the cores should have some idle time, otherwise we report
// a failure.
success |= idle_time > base::TimeDelta();
}
return success;
}
#endif // defined(OS_LINUX) || defined(OS_CHROMEOS) || defined(OS_ANDROID) ||
// defined(OS_AIX)
} // namespace base