components/language_detection/core/quantization_utils.h - chromium/src.git - Git at Google

 // Copyright 2021 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_
 #define COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_

 #include <cmath>
 #include <cstdint>

 namespace language_detection {

 // Converts the given floating point value (`x`) to a quantized value, with
 // `num_bits` of precision.
 //
 // Floating Point `min_val` (and all values below it) map to Quantized 0, and,
 // Floating Point `max_val` (and all values above it) map to Quantized
 //   (1 << num_bits) - 1.
 //
 // `num_bits` must be greater than 1, and less than 32.
 uint32_t FloatToQuantized(float x, float min_val, float max_val, int num_bits);

 // Converts the given quantized value (`x`) to a floating point value, with
 // `num_bits` of precision.
 //
 // Floating Point `min_val` (and all values below it) map to Quantized 0, and,
 // Floating Point `max_val` (and all values above it) map to Quantized
 //   (1 << num_bits) - 1.
 //
 // `num_bits` must be greater than 1, and less than 32.
 float QuantizedToFloat(uint32_t x, float min_val, float max_val, int num_bits);

 // Params required for quantizing / dequantizing a given value.
 // These are populated by the `GetQuantizationParams` method, and are
 // used internally by the `QuantizedToFloat` and `FloatToQuantized`
 // methods, or can be used by the caller to cache these values once, and use
 // when invoking the `QuantizedToFloatWithQuantParams` method repeatedly on
 // values from the same tensor.
 struct QuantizationParams {
   float nudged_scale;
   float nudged_min;
   float nudged_max;
   uint32_t quant_max_uint32;
 };

 // Compute the params required for quantization / dequantization.
 // This is the first part of the `FloatToQuantized` and
 // `QuantizedToFloat` methods, and is useful to compute once when there are
 // a large number of values from the same tensor, that need to be quantized or
 // dequantized.
 //
 // `num_bits` must be greater than 1, and less than 32.
 QuantizationParams GetQuantizationParams(float min_val,
                                          float max_val,
                                          int num_bits);

 // Converts the given quantized value (`x`) to a floating point value, using
 // the QuantizationParams obtained from the `GetQuantizationParams`
 // method.
 //
 // This is the second part of the `QuantizedToFloat` method, and is
 // useful to quickly compute the dequantized value for a large number of
 // quantized values, once the scale and min have been computed.
 inline float QuantizedToFloatWithQuantParams(uint32_t x,
                                              const QuantizationParams& params) {
   return (x * params.nudged_scale + params.nudged_min);
 }

 }  // namespace language_detection

 #endif  // COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_
	// Copyright 2021 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_
	#define COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_

	#include <cmath>
	#include <cstdint>

	namespace language_detection {

	// Converts the given floating point value (`x`) to a quantized value, with
	// `num_bits` of precision.
	//
	// Floating Point `min_val` (and all values below it) map to Quantized 0, and,
	// Floating Point `max_val` (and all values above it) map to Quantized
	// (1 << num_bits) - 1.
	//
	// `num_bits` must be greater than 1, and less than 32.
	uint32_t FloatToQuantized(float x, float min_val, float max_val, int num_bits);

	// Converts the given quantized value (`x`) to a floating point value, with
	// `num_bits` of precision.
	//
	// Floating Point `min_val` (and all values below it) map to Quantized 0, and,
	// Floating Point `max_val` (and all values above it) map to Quantized
	// (1 << num_bits) - 1.
	//
	// `num_bits` must be greater than 1, and less than 32.
	float QuantizedToFloat(uint32_t x, float min_val, float max_val, int num_bits);

	// Params required for quantizing / dequantizing a given value.
	// These are populated by the `GetQuantizationParams` method, and are
	// used internally by the `QuantizedToFloat` and `FloatToQuantized`
	// methods, or can be used by the caller to cache these values once, and use
	// when invoking the `QuantizedToFloatWithQuantParams` method repeatedly on
	// values from the same tensor.
	struct QuantizationParams {
	float nudged_scale;
	float nudged_min;
	float nudged_max;
	uint32_t quant_max_uint32;
	};

	// Compute the params required for quantization / dequantization.
	// This is the first part of the `FloatToQuantized` and
	// `QuantizedToFloat` methods, and is useful to compute once when there are
	// a large number of values from the same tensor, that need to be quantized or
	// dequantized.
	//
	// `num_bits` must be greater than 1, and less than 32.
	QuantizationParams GetQuantizationParams(float min_val,
	float max_val,
	int num_bits);

	// Converts the given quantized value (`x`) to a floating point value, using
	// the QuantizationParams obtained from the `GetQuantizationParams`
	// method.
	//
	// This is the second part of the `QuantizedToFloat` method, and is
	// useful to quickly compute the dequantized value for a large number of
	// quantized values, once the scale and min have been computed.
	inline float QuantizedToFloatWithQuantParams(uint32_t x,
	const QuantizationParams& params) {
	return (x * params.nudged_scale + params.nudged_min);
	}

	} // namespace language_detection

	#endif // COMPONENTS_LANGUAGE_DETECTION_CORE_QUANTIZATION_UTILS_H_