| /** |
| * Code used to generate manual values for 'correctly rounded' AbstractFloat |
| * tests in the CTS. |
| * |
| * These are generated in a C++ program, because it allows for easy access to |
| * higher than 64-bit floating point numbers (specifically 128-bit), which |
| * allows for calculating roundings when infinitely precise calculations are not |
| * precisely representable in 64-bit floats. This gets around the fact that |
| * numbers in Typescript are internally 64-bits, thus making it difficult to |
| * detect when rounding occurs for AbstractFloats without importing a higher |
| * precision floating point library. |
| * |
| * This codes is not meant to be automatically built/used by the CTS, but |
| * instead is a reference for how the values in af_data.ts were generated |
| */ |
| #include <cassert> |
| #include <cstdint> |
| #include <iostream> |
| #include <cfenv> |
| #include <format> |
| #include <iomanip> |
| #include <cmath> |
| #include <map> |
| #include <memory> |
| #include <set> |
| #include <vector> |
| |
| /** The 'magic' that allows for calculating both roundings */ |
| // #pragma STDC FENV_ACCESS ON |
| |
| /** Magic constants that should match the entries in constants.ts's kBit.f64 */ |
| constexpr double kF64NegativeMin = std::bit_cast<double>(0xFFEFFFFFFFFFFFFFull); |
| constexpr double kF64NegativeMax = std::bit_cast<double>(0x8010000000000000ull); |
| constexpr double kF64NegativeSubnormalMin = std::bit_cast<double>(0x800FFFFFFFFFFFFFull); |
| constexpr double kF64NegativeSubnormalMax = std::bit_cast<double>(0x8000000000000001ull); |
| constexpr double kF64PositiveSubnormalMin = std::bit_cast<double>(0x0000000000000001ull); |
| constexpr double kF64PositiveSubnormalMax = std::bit_cast<double>(0x000FFFFFFFFFFFFFull); |
| constexpr double kF64PositiveMin = std::bit_cast<double>(0x0010000000000000ull); |
| constexpr double kF64PositiveMax = std::bit_cast<double>(0x7FEFFFFFFFFFFFFFull); |
| |
| /** |
| * Mapping from Numeric value -> TS representation, should include all the |
| * values that appear in kInterestingF64Values in math.ts |
| */ |
| const std::map<double, std::string> kInterestingF64s = { |
| { kF64NegativeMin, "kValue.f64.negative.min" }, |
| { -10.0, "-10.0" }, |
| { -1.0, "-1.0" }, |
| { -0.125, "-0.125" }, |
| { kF64NegativeMax, "kValue.f64.negative.max"}, |
| { kF64NegativeSubnormalMin, "kValue.f64.negative.subnormal.min" }, |
| { kF64NegativeSubnormalMax, "kValue.f64.negative.subnormal.max" }, |
| { 0.0, "0.0" }, |
| { kF64PositiveSubnormalMin, "kValue.f64.positive.subnormal.min" }, |
| { kF64PositiveSubnormalMax, "kValue.f64.positive.subnormal.max" }, |
| { kF64PositiveMin, "kValue.f64.positive.min" }, |
| { 0.125, "0.125" }, |
| { 1.0, "1.0" }, |
| { 10.0, "10.0" }, |
| { kF64PositiveMax, "kValue.f64.positive.max"} |
| }; |
| |
| /** Additional values to use for testing 'fract' */ |
| const std::map<double, std::string> kFractF64s = { |
| { 0.5, "0.5" }, // 0.5 -> 0.5 |
| { 1, "1" }, // 1 -> 0 |
| { 2, "2" }, // 2 -> 0 |
| { -0.5, "-0.5" }, // -0.5 -> 0.5 |
| { -1, "-1" }, // -1 -> 0 |
| { -2, "-2" }, // -2 -> 0 |
| { 10.0000999999999997669, "10.0000999999999997669" }, // ~10.0001 -> ~0.0001 |
| { -10.0000999999999997669, "-10.0000999999999997669" }, // -10.0001 -> ~0.9999 |
| { 3937509.87755102012306, "3937509.87755102012306" }, // 3937509.87755102012306 -> ~0.877551..., not [0, 0.75], https://github.com/gpuweb/gpuweb/issues/4523 |
| }; |
| |
| /** |
| * Print out a string representation of a specific value that can be copied in |
| * a CTS test |
| */ |
| std::string printAbstractFloat(const double val) { |
| if (!std::isfinite(val)) { |
| if (val > 0) { |
| return "kValue.f64.positive.infinity"; |
| } |
| if (val < 0) { |
| return "kValue.f64.negative.infinity"; |
| } |
| assert("Generated a NaN"); |
| } |
| |
| if (const auto iter = kInterestingF64s.find(val); iter != kInterestingF64s.end()) { |
| return iter->second; |
| } |
| |
| std::stringstream ss; |
| // Print 'easy' to read integers as literals, otherwise dump the hex value |
| if ( val == round(val) && fabs(val) < 100000) { |
| ss << val; |
| } else { |
| ss << "reinterpretU64AsF64(0x" << std::hex << std::setfill('0') << std::setw(16) << std::bit_cast<uint64_t>(val) << "n) /* " << val << " */"; |
| } |
| return ss.str(); |
| } |
| |
| /** Could this value potentially be affected by FTZ behaviour */ |
| bool couldBeFlushed(const double val) { |
| return std::fpclassify(val) == FP_SUBNORMAL; |
| } |
| |
| /** |
| * Generate the 64-bit float interval that a higher precision value will |
| * quantized down to. |
| * |
| * If the value if exactly representable in 64-bit floating point this will be |
| * a singular value, otherwise it will be the two 64-bit values nearest to the |
| * value. |
| * |
| * This is done via manipulating the global process rounding mode, thus this |
| * code is non-reentrant, so should not be used in concurrent/asynchronous |
| * processes. |
| */ |
| std::tuple<double, double> quantizeToAbstractFloat(const long double val) { |
| const int round_mode = fegetround(); |
| |
| assert(0 == fesetround(FE_DOWNWARD)); |
| const auto downward = static_cast<double>(val); |
| assert(0 == fesetround(FE_UPWARD)); |
| const auto upward = static_cast<double>(val); |
| |
| assert(0 == fesetround(round_mode)); |
| |
| return { downward, upward }; |
| } |
| |
| /** |
| * Generates a string for an unary operation result that can be copied into a |
| * CTS test file. |
| */ |
| std::string printBinaryCase(const std::string &input, const std::vector<double> &result) { |
| assert(!result.empty()); |
| std::stringstream ss; |
| ss << "{ input: "; |
| ss << input; |
| ss << ", "; |
| ss << "expected: [ "; |
| if (!result.empty()) { |
| for (auto i = 0; i < result.size() - 1; i++) { |
| ss << "" << printAbstractFloat(result[i]) << ", "; |
| } |
| ss << printAbstractFloat(result.back()); |
| } |
| ss << " ] }"; |
| return ss.str(); |
| } |
| |
| /** |
| * Generates a string for a binary operation result that can be copied into a |
| * CTS test file. |
| */ |
| std::string printBinaryCase(const std::string &lhs, const std::string &rhs, const std::vector<double> &result) { |
| assert(!result.empty()); |
| std::stringstream ss; |
| ss << "{ lhs: "; |
| ss << lhs; |
| ss << ", rhs: "; |
| ss << rhs; |
| ss << ", "; |
| ss << "expected: [ "; |
| if (!result.empty()) { |
| for (auto i = 0; i < result.size() - 1; i++) { |
| ss << "" << printAbstractFloat(result[i]) << ", "; |
| } |
| ss << printAbstractFloat(result.back()); |
| } |
| ss << " ] }"; |
| return ss.str(); |
| } |
| |
| /** Function that performs a binary operation, i.e. addition, etc */ |
| typedef long double (*BinaryOp)(long double, long double); |
| |
| const BinaryOp kAdditionOp= [](const long double lhs, const long double rhs) { |
| return lhs + rhs; |
| }; |
| |
| const BinaryOp kSubtractionOp= [](const long double lhs, const long double rhs) { |
| return lhs - rhs; |
| }; |
| |
| const BinaryOp kMultiplicationOp= [](const long double lhs, const long double rhs) { |
| return lhs * rhs; |
| }; |
| |
| /** |
| * Calculates all of the possible results for a binary operation given the |
| * provided inputs. This handles both quantization and flushing behaviours. |
| */ |
| std::vector<double> calculateBinaryResults(const BinaryOp op, long double lhs, long double rhs) { |
| // CTS needs to consider that subnormals may be flushed to zero at |
| // any point, so applying potential flushings to get additional |
| // results. |
| std::set<double> results; |
| for (const auto l: couldBeFlushed(lhs) ? std::vector{0, lhs} : std::vector{lhs}) { |
| for (const auto r: couldBeFlushed(rhs) ? std::vector{0, rhs} : std::vector{rhs}) { |
| const auto [downward, upward] = quantizeToAbstractFloat(op(l, r)); |
| results.insert(downward); |
| results.insert(upward); |
| } |
| } |
| |
| return { results.begin(), results.end() }; |
| } |
| |
| /** |
| * Generates a string, that can be copied into a CTS test file, for all of the |
| * tests cases for a binary operation. |
| */ |
| std::string printBinaryOpCases(const BinaryOp op, const std::string& name) { |
| std::stringstream ss; |
| ss << "BEGIN " << name << " CASES" << std::endl; |
| for (const auto& [lhs, lhs_str] : kInterestingF64s) { |
| for (const auto& [rhs, rhs_str] : kInterestingF64s) { |
| ss << printBinaryCase(lhs_str, rhs_str, calculateBinaryResults(op, lhs, rhs)) << "," << std::endl; |
| } |
| } |
| ss << "END " << name << " CASES" << std::endl; |
| return ss.str(); |
| } |
| |
| /** |
| * Generates a string, that can be copied into a CTS test file, for all of the |
| * tests cases for `fract`. WGSL defines frac(x) = x - floor(x). |
| */ |
| std::string printFractCases() { |
| std::stringstream ss; |
| ss << "BEGIN FRACT CASES" << std::endl; |
| // Do not have to calculate quantization/roundings for floor(input), |
| // because floor of a double is guaranteed to be a double, and all of |
| // the values in kInterestingF64s and kFractF64s are doubles. |
| for (const auto& [input, input_str] : kInterestingF64s) { |
| ss << printBinaryCase(input_str, calculateBinaryResults(kSubtractionOp, input, floor(input))) << "," << std::endl; |
| } |
| for (const auto& [input, input_str] : kFractF64s) { |
| ss << printBinaryCase(input_str, calculateBinaryResults(kSubtractionOp, input, floor(input))) << "," << std::endl; |
| } |
| ss << "END FRACT CASES" << std::endl; |
| return ss.str(); |
| } |
| |
| int main() { |
| assert(sizeof(double) < sizeof(long double) && "Need higher precision long double"); |
| assert(sizeof(long double) == 16 && "Code assume 'proper' quad support, not some other higher precision floating point implementation"); |
| |
| { |
| // Confirms that calculating f64 imprecise results generates two possible |
| // roundings. |
| const auto [begin, end] = |
| quantizeToAbstractFloat(static_cast<long double>(0.1) * static_cast<long double>(0.1)); |
| assert(std::bit_cast<uint64_t>(begin) == 0x3F847AE147AE147bull && |
| std::bit_cast<uint64_t>(end) == 0x3F847AE147AE147Cull && |
| "0.1 * 0.1 returned unexpected values"); |
| } |
| |
| std::cout << printBinaryOpCases(kAdditionOp, "ADDITION") << std::endl; |
| std::cout << printBinaryOpCases(kSubtractionOp, "SUBTRACTION") << std::endl; |
| std::cout << printBinaryOpCases(kMultiplicationOp, "MULTIPLICATION") << std::endl; |
| std::cout << printFractCases() << std::endl; |
| |
| return 0; |
| } |