| /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| // Unit test for TFLite FULLY_CONNECTED op. |
| |
| #include "tensorflow/lite/kernels/fully_connected.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include <algorithm> |
| #include <initializer_list> |
| #include <limits> |
| #include <map> |
| #include <memory> |
| #include <random> |
| #include <string> |
| #include <vector> |
| |
| #include <gmock/gmock.h> |
| #include <gtest/gtest.h> |
| #include "absl/memory/memory.h" |
| #include "flatbuffers/flatbuffers.h" // from @flatbuffers |
| #include "tensorflow/lite/core/api/op_resolver.h" |
| #include "tensorflow/lite/interpreter.h" |
| #include "tensorflow/lite/kernels/internal/tensor_utils.h" |
| #include "tensorflow/lite/kernels/test_util.h" |
| #include "tensorflow/lite/schema/schema_generated.h" |
| #include "tensorflow/lite/string_type.h" |
| |
| namespace tflite { |
| namespace { |
| |
| using ::testing::ElementsAre; |
| using ::testing::ElementsAreArray; |
| |
| static float fully_connected_input[] = { |
| 0.503691, 0.196961, 0.521017, 0.554248, 0.288678, 0.792476, 0.561653, |
| 0.462230, 0.650736, 0.163132, 0.029658, 0.411544, 0.470539, 0.572390, |
| 0.538755, 0.212030, 0.264309, 0.193908, 0.777480, 0.745661, 0.423314, |
| 0.470804, 0.175501, 0.492225, 0.192743, 0.540183, 0.372514, 0.446550, |
| 0.498173, 0.126472, 0.132706, 0.001864, 0.323433, 0.653723, 0.556112, |
| 0.612111, 0.446199, 0.117765, 0.074341, 0.096935, 0.280897, 0.103999, |
| 0.508479, 0.751437, 0.676389, 0.047234, 0.963467, 0.940698, 0.241142, |
| 0.740947, 0.686359, 0.664456, 0.211751, 0.861860, 0.156681, 0.404494, |
| 0.402043, 0.529195, 0.851044, 0.900216, 0.655667, 0.983750, 0.902081, |
| 0.979100, 0.637473, 0.458193, 0.591211, 0.083671, 0.575958, 0.665552, |
| 0.180606, 0.856856, 0.769551, 0.689086, 0.608293, 0.445940, 0.736320, |
| 0.571760, 0.386637, 0.977461, 0.312707, 0.072996, 0.641918, 0.524458, |
| 0.934856, 0.798598, 0.928951, 0.336899, 0.327793, 0.779995, 0.237115, |
| 0.983460, 0.763746, 0.139196, 0.962560, 0.401218, 0.597389, 0.553771, |
| 0.484890, 0.173347, 0.219322, 0.665496, 0.030203, 0.988873, 0.354582, |
| 0.638496, 0.434813, 0.090902, 0.210256, 0.821450, 0.068363, 0.522962, |
| 0.894446, 0.710280, 0.047420, 0.829302, 0.508879, 0.976371, 0.166202, |
| 0.836672, 0.756367, 0.403317, 0.820132, 0.520112, 0.542513, 0.782691, |
| 0.921330, 0.139902}; |
| |
| static float fully_connected_golden_output[] = { |
| 0, 0.0732134, 0, 0, 0, 0.280859, |
| 0, 0.128927, 0, 0.0777251, 0, 0.270268, |
| 0.271435, 0.0173503, 0.335465, 0.235562, |
| |
| 0, 0.0745866, 0, 0.051611, 0, 0.253876, |
| 0, 0.0814873, 0, 0.104104, 0, 0.248529, |
| 0.264194, 0, 0.302973, 0.166252, |
| |
| 0, 0.0170409, 0, 0.0509851, 0, 0.212834, |
| 0, 0.0208326, 0, 0.129932, 0.203978, 0.103428, |
| 0.298051, 0, 0.332233, 0.00445903, |
| |
| 0, 0.125246, 0, 0.0735336, 0, 0.0910256, |
| 0, 0, 0, 0.18933, 0.378111, 0.0712443, |
| 0.277298, 0.0123414, 0.267454, 0, |
| |
| 0, 0.14687, 0, 0.155495, 0.0300215, 0.147256, |
| 0, 0, 0, 0.156412, 0.434914, 0.0461529, |
| 0.246508, 0, 0.363138, 0, |
| |
| 0, 0, 0, 0.0212949, 0, 0.301708, |
| 0, 0.35497, 0, 0.406223, 0.0260211, 0.049195, |
| 0.197161, 0, 0.37316, 0, |
| |
| 0, 0.221783, 0, 0, 0.0116515, 0.281945, |
| 0, 0, 0, 0, 0.285626, 0.181773, |
| 0.296401, 0.170452, 0.367135, 0.142597, |
| |
| 0, 0, 0, 0, 0, 0.418886, |
| 0, 0.291063, 0, 0.227541, 0.0424759, 0.27589, |
| 0.398286, 0.177146, 0.40359, 0.121452, |
| |
| 0, 0.0834884, 0, 0, 0, 0.287441, |
| 0, 0.0046838, 0, 0.0122087, 0, 0.217376, |
| 0.140183, 0.0948412, 0.436677, 0.0589876, |
| |
| 0, 0.0289969, 0, 0.0921397, 0, 0.396802, |
| 0, 0.0126157, 0, 0.0968433, 0, 0.172271, |
| 0.173295, 0.0664741, 0.53645, 0.00915603, |
| |
| 0, 0, 0, 0, 0, 0.147942, |
| 0, 0.263795, 0, 0.39782, 0, 0.382435, |
| 0.561072, 0.0579847, 0.145712, 0.13508, |
| |
| 0, 0, 0, 0.16382, 0, 0.322294, |
| 0, 0.163798, 0, 0.405211, 0.367953, 0.076852, |
| 0.342473, 0.0834118, 0.377537, 0, |
| |
| 0, 0.206, 0, 0, 0, 0.375769, |
| 0, 0, 0, 0, 0, 0.125165, |
| 0, 0.105591, 0.52055, 0.0536445, |
| |
| 0, 0.259261, 0, 0, 0, 0.247707, |
| 0, 0, 0, 0, 0, 0.215862, |
| 0.149153, 0.224678, 0.359519, 0.129419, |
| |
| 0, 0.17611, 0, 0.280895, 0, 0.576484, |
| 0, 0.000418848, 0, 0, 0, 0.151112, |
| 0.211902, 0, 0.566341, 0.106305, |
| |
| 0, 0.0246284, 0, 0, 0, 0.196267, |
| 0, 0.0248624, 0, 0.265635, 0, 0.436199, |
| 0.408079, 0.134514, 0.328489, 0.411368}; |
| |
| class BaseFullyConnectedOpModel : public SingleOpModel { |
| public: |
| // TODO(ahentz): test different activation types too. |
| BaseFullyConnectedOpModel( |
| TfLiteRegistration* registration, int units, int batches, |
| const TensorData& input, const TensorData& output = {TensorType_FLOAT32}, |
| bool keep_num_dims = false, bool bias_tensor_optional = false, |
| ActivationFunctionType activation_func = ActivationFunctionType_RELU, |
| FullyConnectedOptionsWeightsFormat weights_format = |
| FullyConnectedOptionsWeightsFormat_DEFAULT, |
| bool add_bias_for_quantized = true) |
| : batches_(batches), units_(units) { |
| int total_input_size = 1; |
| for (size_t i = 0; i < input.shape.size(); ++i) { |
| total_input_size *= input.shape[i]; |
| } |
| input_size_ = total_input_size / batches_; |
| |
| input_ = AddInput(input); |
| if (input.type == TensorType_INT16) { |
| weights_ = AddInput({TensorType_INT8, {units_, input_size_}, -63.5, 64}); |
| } else { |
| weights_ = |
| AddInput({input.type, {units_, input_size_}, input.min, input.max}); |
| } |
| |
| if (bias_tensor_optional) { |
| bias_ = AddNullInput(); |
| } else if (input.type == TensorType_FLOAT32) { |
| bias_ = AddInput({TensorType_FLOAT32, {units_}}); |
| } else if (add_bias_for_quantized) { |
| // This is a quantized version. The scale of 'bias' depends on the scales |
| // of input and filter. Supposedly this is correctly set during quantized |
| // training. |
| auto bias_scale = GetScale(input_) * GetScale(weights_); |
| if (input.type == TensorType_INT16) { |
| TensorData bias{TensorType_INT64, {units_}, 0, 0, bias_scale}; |
| bias_ = AddInput(bias); |
| } else { |
| TensorData bias{TensorType_INT32, {units_}, 0, 0, bias_scale}; |
| bias_ = AddInput(bias); |
| } |
| } |
| |
| output_ = AddOutput(output); |
| if (weights_format != FullyConnectedOptionsWeightsFormat_DEFAULT) { |
| AddOutput({TensorType_UINT8, input.shape}); |
| } |
| |
| SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED, |
| BuiltinOptions_FullyConnectedOptions, |
| CreateFullyConnectedOptions(builder_, activation_func, |
| weights_format, keep_num_dims) |
| .Union()); |
| resolver_ = absl::make_unique<SingleOpResolver>( |
| BuiltinOperator_FULLY_CONNECTED, registration); |
| std::vector<std::vector<int>> inputs = {GetShape(input_), |
| GetShape(weights_)}; |
| if (add_bias_for_quantized) { |
| inputs.push_back((bias_ == kTfLiteOptionalTensor) ? std::vector<int>() |
| : GetShape(bias_)); |
| } |
| BuildInterpreter(inputs); |
| } |
| |
| int input_size() { return input_size_; } |
| int num_units() { return units_; } |
| int num_batches() { return batches_; } |
| |
| protected: |
| int input_; |
| int weights_; |
| int bias_; |
| int output_; |
| |
| int batches_; |
| int units_; |
| int input_size_; |
| }; |
| |
| class FloatFullyConnectedOpModel : public BaseFullyConnectedOpModel { |
| public: |
| using BaseFullyConnectedOpModel::BaseFullyConnectedOpModel; |
| |
| void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); } |
| |
| void SetWeights(const std::vector<float>& f) { PopulateTensor(weights_, f); } |
| |
| void SetInput(const std::vector<float>& data) { |
| PopulateTensor(input_, data); |
| } |
| void SetInput(int offset, float* begin, float* end) { |
| PopulateTensor(input_, offset, begin, end); |
| } |
| |
| std::vector<float> GetOutput() { return ExtractVector<float>(output_); } |
| std::vector<int> GetOutputShape() { return GetTensorShape(output_); } |
| }; |
| |
| class QuantizedFullyConnectedOpModel : public BaseFullyConnectedOpModel { |
| public: |
| using BaseFullyConnectedOpModel::BaseFullyConnectedOpModel; |
| |
| void SetBias(const std::vector<float>& data) { |
| QuantizeAndPopulate<int32_t>(bias_, data); |
| } |
| void SetBias64(const std::vector<float>& data) { |
| QuantizeAndPopulate<int64_t>(bias_, data); |
| } |
| template <typename T> |
| void SetWeights(const std::vector<float>& data) { |
| QuantizeAndPopulate<T>(weights_, data); |
| } |
| |
| template <typename T> |
| void ShuffleAndSetWeights(const std::vector<float>& data, int input_depth, |
| int output_depth) { |
| std::vector<float> shuffled_data(data.size()); |
| CHECK_EQ(input_depth % 16, 0); |
| CHECK_EQ(output_depth % 4, 0); |
| float* shuffled_data_ptr = shuffled_data.data(); |
| for (int block_o = 0; block_o < output_depth; block_o += 4) { |
| for (int block_i = 0; block_i < input_depth; block_i += 16) { |
| for (int o = 0; o < 4; o++) { |
| for (int i = 0; i < 16; i++) { |
| *shuffled_data_ptr++ = |
| data[(block_o + o) * input_depth + block_i + i]; |
| } |
| } |
| } |
| } |
| TfLiteTensor* t = interpreter_->tensor(weights_); |
| auto quantized_data = |
| Quantize<T>(shuffled_data, t->params.scale, t->params.zero_point); |
| for (T& q : quantized_data) { |
| q ^= 0x80; |
| } |
| PopulateTensor(weights_, 0, quantized_data.data(), |
| quantized_data.data() + quantized_data.size()); |
| } |
| |
| template <typename T> |
| void SetInput(const std::vector<float>& data) { |
| QuantizeAndPopulate<T>(input_, data); |
| } |
| |
| template <typename T> |
| std::vector<T> GetOutput() { |
| return ExtractVector<T>(output_); |
| } |
| |
| template <typename T> |
| std::vector<float> GetDequantizedOutput() { |
| return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_), |
| GetZeroPoint(output_)); |
| } |
| }; |
| |
| // In the hybrid model the weights are quantized (to uint8). But the bias, |
| // input (and output) are expected to be in float precision. |
| class HybridFullyConnectedOpModel : public SingleOpModel { |
| public: |
| HybridFullyConnectedOpModel(int units, int batches, const TensorData& input, |
| const TensorData& weights, |
| const TensorData& output = {TensorType_FLOAT32}, |
| bool asymmetric_inputs = false) |
| : batches_(batches), units_(units) { |
| int total_input_size = 1; |
| for (size_t i = 0; i < input.shape.size(); ++i) { |
| total_input_size *= input.shape[i]; |
| } |
| input_size_ = total_input_size / batches_; |
| |
| input_ = AddInput(input); |
| weights_ = AddInput(weights); |
| |
| TensorData bias{TensorType_FLOAT32, {units_}}; |
| bias_ = AddInput(bias); |
| |
| output_ = AddOutput(output); |
| |
| auto options = CreateFullyConnectedOptions( |
| builder_, ActivationFunctionType_RELU, |
| tflite::FullyConnectedOptionsWeightsFormat_DEFAULT, |
| false, asymmetric_inputs) |
| .Union(); |
| SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED, |
| BuiltinOptions_FullyConnectedOptions, options); |
| resolver_ = absl::make_unique<SingleOpResolver>( |
| BuiltinOperator_FULLY_CONNECTED, |
| ops::builtin::Register_FULLY_CONNECTED_PIE()); |
| BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)}); |
| } |
| void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); } |
| void SetWeights(const std::vector<float>& data) { |
| SymmetricQuantizeAndPopulate(weights_, data); |
| } |
| |
| void SetSignedWeights(std::initializer_list<float> f) { |
| SignedSymmetricQuantizeAndPopulate(weights_, f); |
| } |
| |
| void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); } |
| std::vector<float> GetOutput() { return ExtractVector<float>(output_); } |
| std::vector<int> GetOutputShape() { return GetTensorShape(output_); } |
| |
| int input_size() { return input_size_; } |
| int num_units() { return units_; } |
| int num_batches() { return batches_; } |
| |
| protected: |
| int input_; |
| int weights_; |
| int bias_; |
| int output_; |
| |
| int batches_; |
| int units_; |
| int input_size_; |
| }; |
| |
| const auto kKernelMap = new std::map<string, TfLiteRegistration*>({ |
| {"Reference", ops::builtin::Register_FULLY_CONNECTED_REF()}, |
| {"GenericOptimized", ops::builtin::Register_FULLY_CONNECTED_GENERIC_OPT()}, |
| {"Pie", ops::builtin::Register_FULLY_CONNECTED_PIE()}, |
| }); |
| |
| class FloatFullyConnectedOpTest : public SingleOpTest { |
| protected: |
| const std::map<string, TfLiteRegistration*>& GetKernelMap() override { |
| return *kKernelMap; |
| } |
| }; |
| |
| const auto kKernelMapNoPie = new std::map<string, TfLiteRegistration*>({ |
| {"Reference", ops::builtin::Register_FULLY_CONNECTED_REF()}, |
| {"GenericOptimized", ops::builtin::Register_FULLY_CONNECTED_GENERIC_OPT()}, |
| }); |
| |
| class QuantizedFullyConnectedOpTest : public SingleOpTest { |
| protected: |
| const std::map<string, TfLiteRegistration*>& GetKernelMap() override { |
| return *kKernelMapNoPie; |
| } |
| }; |
| |
| const auto kKernelMapHybrid = new std::map<string, TfLiteRegistration*>({ |
| {"Pie", ops::builtin::Register_FULLY_CONNECTED_PIE()}, |
| // Only Pie supports the hybrid path, so the optimized kernel should fall |
| // back to the Pie path in such cases. |
| {"GenericOptimized", ops::builtin::Register_FULLY_CONNECTED_GENERIC_OPT()}, |
| }); |
| |
| // Hybrid mode is used by the Pie quantized kernel. |
| class HybridFullyConnectedOpTest : public SingleOpTest { |
| protected: |
| const std::map<string, TfLiteRegistration*>& GetKernelMap() override { |
| return *kKernelMapHybrid; |
| } |
| }; |
| |
| // TODO(ahentz): add more small tests like this one, focused on making sure the |
| // calculations are correct. |
| TEST_P(FloatFullyConnectedOpTest, SimpleTest) { |
| FloatFullyConnectedOpModel m(GetRegistration(), /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}); |
| m.SetWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(24, 25, 26, 58, 59, 60)); |
| } |
| |
| TEST_P(FloatFullyConnectedOpTest, SimpleTest2) { |
| FloatFullyConnectedOpModel m(GetRegistration(), /*units=*/1, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 2}}); |
| m.SetWeights({ |
| 2, 4, // u = 0 |
| }); |
| m.SetBias({1}); |
| |
| m.SetInput({ |
| 1, 2, // b = 0 |
| 2, 1, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 1)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(11, 9)); |
| } |
| |
| TEST(FloatFullyConnectedOpTest, SimpleTestNoBias) { |
| // The optimized kernel assumes that the bias is specified. |
| FloatFullyConnectedOpModel m(ops::builtin::Register_FULLY_CONNECTED_PIE(), |
| /*units=*/1, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 2}}, |
| /*output=*/{TensorType_FLOAT32}, |
| /*keep_num_dims=*/false, |
| /*bias_tensor_optional=*/true); |
| m.SetWeights({ |
| 2, 4, // u = 0 |
| }); |
| |
| m.SetInput({ |
| 1, 2, // b = 0 |
| 2, 1, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 1)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(10, 8)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedUint8) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_UINT8, {2, 10}, -63.5, 64}, |
| /*output=*/{TensorType_UINT8, {}, -127, 128}); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }))); |
| EXPECT_THAT(m.GetOutput<uint8_t>(), |
| ElementsAre(151, 152, 153, 185, 186, 187)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedUint8NoBias) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_UINT8, {2, 10}, -63.5, 64}, |
| /*output=*/{TensorType_UINT8, {}, -127, 128}, |
| /*keep_num_dims =*/false, /*bool bias_tensor_optional =*/false, |
| /*ActivationFunctionType activation_func =*/ActivationFunctionType_RELU, |
| /*FullyConnectedOptionsWeightsFormat weights_format =*/ |
| FullyConnectedOptionsWeightsFormat_DEFAULT, |
| /*add_bias_for_quantized =*/false); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| |
| m.SetInput<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 23, 23, 23, // |
| 57, 57, 57, // |
| }))); |
| EXPECT_THAT(m.GetOutput<uint8_t>(), |
| ElementsAre(150, 150, 150, 184, 184, 184)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedInt8) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_INT8, {2, 10}, -63.5, 64}, |
| /*output=*/{TensorType_INT8, {}, -127, 128}); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int8_t>(), |
| ElementsAreArray(ArrayFloatNear({24, 25, 26, 58, 59, 60}))); |
| EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(23, 24, 25, 57, 58, 59)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedInt16) { |
| const float scale = 128.0 / 65536; |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_INT16, {2, 10}, 0, 0, scale, 0}, |
| /*output=*/{TensorType_INT16, {}, 0, 0, scale, 0}); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias64({1, 2, 3}); |
| |
| m.SetInput<int16_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int16_t>(), |
| ElementsAreArray(ArrayFloatNear({24, 25, 26, 58, 59, 60}))); |
| EXPECT_THAT(m.GetOutput<int16_t>(), |
| ElementsAre(12288, 12800, 13312, 29696, 30208, 30720)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestQuantizedInt8NoBias) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_INT8, {2, 10}, -63.5, 64}, |
| /*output=*/{TensorType_INT8, {}, -127, 128}, |
| /*keep_num_dims =*/false, /*bool bias_tensor_optional =*/false, |
| /*ActivationFunctionType activation_func =*/ActivationFunctionType_RELU, |
| /*FullyConnectedOptionsWeightsFormat weights_format =*/ |
| FullyConnectedOptionsWeightsFormat_DEFAULT, |
| /*add_bias_for_quantized =*/false); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| |
| m.SetInput<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int8_t>(), |
| ElementsAreArray(ArrayFloatNear({23, 23, 23, 57, 57, 57}))); |
| EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(22, 22, 22, 56, 56, 56)); |
| } |
| |
| // Test the GEMV path. |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTestSingleBatchQuantizedInt8) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/4, /*batches*/ 1, |
| /*input=*/{TensorType_INT8, {1, 10}, -63.5, 64}, |
| /*output=*/{TensorType_INT8, {}, -127, 128}); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 3 |
| }); |
| m.SetBias({1, 2, 3, 4}); |
| |
| m.SetInput<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10 // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int8_t>(), |
| ElementsAreArray(ArrayFloatNear({58, 59, 60, 61}))); |
| EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(57, 58, 59, 60)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, |
| SimpleTestQuantizedOutputMultiplierGreaterThan1Uint8) { |
| // real_multiplier = 2. |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_UINT8, {2, 10}, -127, 128}, |
| /*output=*/{TensorType_UINT8, {}, -63.5, 64}); |
| |
| m.SetWeights<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 24, 25, 26, // first batch |
| 58, 59, 60, // second batch |
| }))); |
| EXPECT_THAT(m.GetOutput<uint8_t>(), |
| ElementsAre(175, 177, 179, 243, 245, 247)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, |
| SimpleTestQuantizedOutputMultiplierGreaterThan1Int8) { |
| // real_multiplier = 2. |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches*/ 2, |
| /*input=*/{TensorType_INT8, {2, 10}, -127, 128}, |
| /*output=*/{TensorType_INT8, {}, -63.5, 64}); |
| |
| m.SetWeights<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<int8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 24, 25, 26, // first batch |
| 58, 59, 60, // second batch |
| }))); |
| EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAre(47, 49, 51, 115, 117, 119)); |
| } |
| |
| void SimpleTestQuantizedInt16OutputCase( |
| TfLiteRegistration* registration, int input_depth, int output_depth, |
| int batches, FullyConnectedOptionsWeightsFormat weights_format) { |
| const uint8_t kWeightsZeroPoint = 128; |
| const float kWeightsScale = 1.f / 128.f; |
| const uint8_t kInputZeroPoint = 128; |
| const float kInputScale = 1.f / 128.f; |
| const float kInputMin = (0 - kInputZeroPoint) * kInputScale; |
| const float kInputMax = (255 - kInputZeroPoint) * kInputScale; |
| // Output ranges in [-8..8] encoded as int16 |
| const float kOutputScale = 8.f / 32768.f; |
| const float kOutputMin = -32768 * kOutputScale; |
| const float kOutputMax = 32767 * kOutputScale; |
| |
| QuantizedFullyConnectedOpModel m( |
| registration, output_depth, batches, |
| /*input=*/ |
| {TensorType_UINT8, {batches, input_depth}, kInputMin, kInputMax}, |
| /*output=*/{TensorType_INT16, {}, kOutputMin, kOutputMax}, |
| /*keep_num_dims=*/false, |
| /*bias_tensor_optional=*/false, |
| /*activation_func=*/ActivationFunctionType_NONE, weights_format); |
| |
| std::mt19937 random_engine; |
| // Some compilers don't support uint8_t for uniform_distribution. |
| std::uniform_int_distribution<uint32_t> weights_dist( |
| 0, std::numeric_limits<uint8_t>::max()); |
| |
| std::vector<float> weights_data(input_depth * output_depth); |
| for (auto& w : weights_data) { |
| uint8_t q = static_cast<uint8_t>(weights_dist(random_engine)); |
| w = (q - kWeightsZeroPoint) * kWeightsScale; |
| } |
| |
| // Based on weights_format, enforce any shape requirement for that format/path |
| // and set the (possibly shuffled) weights. |
| switch (weights_format) { |
| case FullyConnectedOptionsWeightsFormat_DEFAULT: |
| m.SetWeights<uint8_t>(weights_data); |
| break; |
| case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8: |
| // The shuffled path currently supports only a restrictive subset of |
| // shapes, described by the following assertions: |
| CHECK_EQ(input_depth % 16, 0); |
| CHECK_EQ(output_depth % 4, 0); |
| CHECK(batches == 1 || batches == 4); |
| m.ShuffleAndSetWeights<uint8_t>(weights_data, input_depth, output_depth); |
| break; |
| default: |
| LOG(FATAL) << "Unhandled weights format"; |
| } |
| |
| // Some compilers don't support uint8_t for uniform_distribution. |
| std::uniform_int_distribution<uint32_t> input_dist( |
| 0, std::numeric_limits<uint8_t>::max()); |
| std::vector<float> input_data(input_depth * batches); |
| for (auto& i : input_data) { |
| uint8_t q = static_cast<uint8_t>(input_dist(random_engine)); |
| i = (q - kInputZeroPoint) * kInputScale; |
| } |
| |
| std::vector<float> bias_data(output_depth); |
| // As the output ranges in [-8, 8], it's reasonable to have bias values |
| // in [-1, 1], this won't result in too much saturation. |
| std::uniform_real_distribution<float> bias_dist(-1.f, 1.f); |
| for (auto& b : bias_data) { |
| b = bias_dist(random_engine); |
| } |
| |
| m.SetBias(bias_data); |
| m.SetInput<uint8_t>(input_data); |
| |
| m.Invoke(); |
| |
| std::vector<float> expected_output_data(output_depth * batches); |
| for (int b = 0; b < batches; b++) { |
| for (int o = 0; o < output_depth; o++) { |
| float accum = bias_data[o]; |
| for (int i = 0; i < input_depth; i++) { |
| accum += |
| input_data[b * input_depth + i] * weights_data[o * input_depth + i]; |
| } |
| accum = std::min(accum, kOutputMax); |
| accum = std::max(accum, kOutputMin); |
| expected_output_data[b * output_depth + o] = accum; |
| } |
| } |
| |
| EXPECT_THAT(m.GetDequantizedOutput<int16_t>(), |
| ElementsAreArray(ArrayFloatNear(expected_output_data, 3e-4f))); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, |
| SimpleTestQuantizedInt16OutputDefaultWeights) { |
| for (int input_depth : {1, 3, 10, 100}) { |
| for (int output_depth : {1, 3, 10, 100}) { |
| for (int batch : {1, 3, 10, 100}) { |
| SimpleTestQuantizedInt16OutputCase( |
| GetRegistration(), input_depth, output_depth, batch, |
| FullyConnectedOptionsWeightsFormat_DEFAULT); |
| } |
| } |
| } |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, |
| SimpleTestQuantizedInt16OutputShuffled4x16Int8Weights) { |
| // The shuffled weights block shape is 4x16. The shape of the weights matrix |
| // is: rows = output_depth, cols = input_depth. It must be a multiple of 4x16. |
| // This means that output_depth must be a multiple of 4, and input_depth must |
| // be a multiple of 16. |
| for (int input_depth_numblocks : {1, 3}) { |
| for (int output_depth_numblocks : {1, 3}) { |
| int input_depth = 16 * input_depth_numblocks; |
| int output_depth = 4 * output_depth_numblocks; |
| // The fast shuffled path is currently supporting only batch sizes of 1 |
| // and 4. The idea is that the whole point of that path is to go as fast |
| // as possible for small batch size, which requires fully specializing |
| // it for each batch size, and for larger batch sizes the generic |
| // gemmlowp-based implementation is fast enough. |
| for (int batch : {1, 4}) { |
| SimpleTestQuantizedInt16OutputCase( |
| GetRegistration(), input_depth, output_depth, batch, |
| FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8); |
| } |
| } |
| } |
| } |
| |
| TEST(HybridFullyConnectedOpTest, SimpleTestQuantizedUint8) { |
| HybridFullyConnectedOpModel m( |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}, |
| /*weights=*/ |
| {TensorType_UINT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}); // Hybrid |
| |
| m.SetWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( |
| { |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }, |
| /*max_abs_error=*/1.3f))); |
| } |
| |
| TEST(HybridFullyConnectedOpTest, SimpleTestQuantizedInt8) { |
| HybridFullyConnectedOpModel m( |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}, |
| /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}); // Hybrid |
| |
| m.SetSignedWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( |
| { |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }, |
| /*max_abs_error=*/1.3f))); |
| } |
| |
| TEST(HybridAsymmetricInputFullyConnectedOpTest, SimpleTestQuantizedUint8) { |
| HybridFullyConnectedOpModel m( |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}, |
| /*weights=*/ |
| {TensorType_UINT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}, {TensorType_FLOAT32}, |
| /*asymmetric_quantize_input*/ true); // Hybrid asymmetric |
| |
| m.SetWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( |
| { |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }, |
| /*max_abs_error=*/0.64f))); |
| } |
| |
| TEST(HybridAsymmetricInputFullyConnectedOpTest, SimpleTestQuantizedInt8) { |
| HybridFullyConnectedOpModel m( |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}, |
| /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}, |
| {TensorType_FLOAT32}, |
| /*asymmetric_quantize_input*/ true); |
| |
| m.SetSignedWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( |
| { |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }, |
| /*max_abs_error=*/1.3f))); |
| } |
| |
| TEST_P(FloatFullyConnectedOpTest, SimpleTest4DInput) { |
| // Note that it is not required that the first dimension be the number of |
| // batches. All we care is that the input can be evenly distributed in |
| // batches. In this case, we need the input to have multiples of '2'. |
| FloatFullyConnectedOpModel m(GetRegistration(), |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {4, 1, 5, 1}}); |
| m.SetWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // first batch |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // second batch |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray({ |
| 24, 25, 26, // first batch |
| 58, 59, 60, // second batch |
| })); |
| } |
| |
| TEST_P(FloatFullyConnectedOpTest, SimpleTest4DInput4DOutput) { |
| // Note that it is not required that the first dimension be the number of |
| // batches. All we care is that the input can be evenly distributed in |
| // batches. In this case, we need the input to have multiples of '2'. |
| FloatFullyConnectedOpModel m(GetRegistration(), |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {1, 2, 1, 10}}, |
| /*output=*/{TensorType_FLOAT32}, |
| /*keep_num_dims=*/true); |
| m.SetWeights({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // first batch |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // second batch |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(1, 2, 1, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray({ |
| 24, 25, 26, // first batch |
| 58, 59, 60, // second batch |
| })); |
| } |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| TEST_P(FloatFullyConnectedOpTest, SimpleTest4DInputInvalidShape) { |
| // Note that it is not required that the first dimension be the number of |
| // batches. But it is required that the last dimension is the 'input_dim'. |
| // |
| // For this particular test, it is required for the output to be reformattable |
| // into a shape of form {4, 1, 5, ?} but since the output size (the product of |
| // output dimensions: units times batches) is 6, this is not possible. |
| EXPECT_DEATH(FloatFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {4, 1, 5, 1}}, |
| /*output=*/{TensorType_FLOAT32}, |
| /*keep_num_dims=*/true), |
| "Cannot allocate tensors"); |
| } |
| #endif |
| |
| TEST_P(QuantizedFullyConnectedOpTest, SimpleTest4dInputQuantizedUint8) { |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -63.5, 64}, |
| /*output=*/{TensorType_UINT8, {}, -127, 128}); |
| |
| // input_product_scale < output_scale was not true. |
| m.SetWeights<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 24, 25, 26, // |
| 58, 59, 60, // |
| }))); |
| EXPECT_THAT(m.GetOutput<uint8_t>(), |
| ElementsAre(151, 152, 153, 185, 186, 187)); |
| } |
| |
| TEST_P(QuantizedFullyConnectedOpTest, |
| SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8) { |
| // real_multiplier = 2. |
| QuantizedFullyConnectedOpModel m( |
| GetRegistration(), /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_UINT8, {4, 1, 5, 1}, -127, 128}, |
| /*output=*/{TensorType_UINT8, {}, -63.5, 64}); |
| |
| m.SetWeights<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput<uint8_t>({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(), |
| ElementsAreArray(ArrayFloatNear({ |
| 24, 25, 26, // first batch |
| 58, 59, 60, // second batch |
| }))); |
| EXPECT_THAT(m.GetOutput<uint8_t>(), |
| ElementsAre(175, 177, 179, 243, 245, 247)); |
| } |
| |
| INSTANTIATE_TEST_SUITE_P( |
| FloatFullyConnectedOpTest, FloatFullyConnectedOpTest, |
| ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); |
| |
| INSTANTIATE_TEST_SUITE_P( |
| QuantizedFullyConnectedOpTest, QuantizedFullyConnectedOpTest, |
| ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMapNoPie))); |
| |
| // TODO(ahentz): Reconsider this test. Having arbitrary weights makes it hard |
| // to debug errors and doesn't necessarily test all the important details. |
| TEST_P(FloatFullyConnectedOpTest, BlackBoxTest) { |
| FloatFullyConnectedOpModel m(GetRegistration(), /*units=*/16, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 8}}); |
| m.SetWeights( |
| {0.091327, 0.103366, -0.316505, -0.083120, 0.149366, -0.196636, |
| -0.123672, 0.062800, 0.063031, 0.191670, -0.062001, -0.061504, |
| -0.275581, 0.059388, -0.118497, -0.079224, 0.109758, 0.008307, |
| -0.062657, -0.060962, -0.049782, -0.106719, -0.319482, -0.103650, |
| 0.266455, 0.051517, -0.123448, 0.322464, 0.043282, -0.173782, |
| -0.190381, 0.002013, 0.096086, 0.131157, 0.031164, 0.100638, |
| -0.312191, -0.080923, -0.101318, -0.116614, 0.142238, 0.086540, |
| -0.139154, 0.174268, -0.073161, 0.080072, 0.006874, 0.229382, |
| -0.104321, -0.176035, -0.208587, -0.001019, -0.162032, 0.080824, |
| -0.025021, 0.074460, -0.252595, -0.161750, -0.136403, 0.008308, |
| 0.005710, 0.096600, 0.289839, 0.218816, -0.304651, -0.070958, |
| 0.054598, 0.147113, -0.139112, -0.072798, -0.163335, -0.167863, |
| -0.128762, -0.035780, 0.117262, 0.017177, 0.263335, -0.176612, |
| 0.262961, -0.093654, -0.339283, 0.333071, 0.180827, 0.287583, |
| 0.066350, -0.197947, -0.114449, -0.236035, 0.103532, -0.034284, |
| 0.093299, -0.145361, 0.054001, 0.250570, 0.157010, -0.143480, |
| -0.139061, -0.048873, 0.067557, 0.139038, 0.324106, 0.227041, |
| 0.037793, -0.225747, -0.241619, 0.357835, 0.135762, -0.306764, |
| -0.125982, 0.091916, 0.266587, 0.030135, 0.265148, 0.141627, |
| 0.020120, 0.083815, -0.124556, -0.100124, -0.048159, 0.181172, |
| 0.302309, -0.041084, 0.146334, -0.061511, -0.232605, 0.281324, |
| 0.145408, -0.221897}); |
| m.SetBias({-0.160594, 0.205770, -0.078307, -0.077984, 0.001937, 0.015860, |
| 0.036810, 0.012346, 0.001028, 0.038551, 0.075415, 0.020804, |
| 0.048478, -0.032270, 0.175688, -0.085662}); |
| |
| const int input_sequence_size = sizeof(fully_connected_input) / |
| sizeof(float) / |
| (m.input_size() * m.num_batches()); |
| for (int i = 0; i < input_sequence_size; i++) { |
| // TODO(ahentz): This is what the original test was doing: two equal |
| // batches per invocation. We could instead use two different batches. |
| float* batch_start = fully_connected_input + i * m.input_size(); |
| float* batch_end = batch_start + m.input_size(); |
| m.SetInput(0, batch_start, batch_end); |
| m.SetInput(m.input_size(), batch_start, batch_end); |
| |
| m.Invoke(); |
| |
| float* golden_start = fully_connected_golden_output + i * m.num_units(); |
| float* golden_end = golden_start + m.num_units(); |
| std::vector<float> expected; |
| expected.insert(expected.end(), golden_start, golden_end); |
| expected.insert(expected.end(), golden_start, golden_end); |
| |
| EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(expected))); |
| } |
| } |
| |
| template <typename T> |
| class SparseFullyConnectedOpModel : public SingleOpModel { |
| public: |
| SparseFullyConnectedOpModel(TfLiteRegistration* registration, int units, |
| int batches, const TensorData& input, |
| const TensorData& weights, |
| const std::vector<T>& weights_data, |
| int num_threads = 1) |
| : batches_(batches), units_(units) { |
| int total_input_size = 1; |
| for (size_t i = 0; i < input.shape.size(); ++i) { |
| total_input_size *= input.shape[i]; |
| } |
| input_size_ = total_input_size / batches_; |
| |
| input_ = AddInput(input); |
| weights_ = AddConstSparseInput(weights, weights_data); |
| |
| TensorData bias{input.type, {units_}}; |
| bias_ = AddInput(bias); |
| |
| output_ = AddOutput({input.type}); |
| |
| SetBuiltinOp( |
| BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions, |
| CreateFullyConnectedOptions(builder_, ActivationFunctionType_RELU) |
| .Union()); |
| resolver_ = absl::make_unique<SingleOpResolver>( |
| BuiltinOperator_FULLY_CONNECTED, registration); |
| BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)}, |
| num_threads, /*allow_fp32_relax_to_fp16=*/false, |
| /*apply_delegate=*/false); |
| } |
| void SetBias(const std::vector<T>& data) { PopulateTensor(bias_, data); } |
| void SetInput(const std::vector<T>& data) { PopulateTensor(input_, data); } |
| std::vector<T> GetOutput() { return ExtractVector<T>(output_); } |
| std::vector<int> GetOutputShape() { return GetTensorShape(output_); } |
| |
| int input_size() { return input_size_; } |
| int num_units() { return units_; } |
| int num_batches() { return batches_; } |
| |
| protected: |
| int input_; |
| int weights_; |
| int bias_; |
| int output_; |
| |
| int batches_; |
| int units_; |
| int input_size_; |
| }; |
| |
| class SparseFullyConnectedOpTest : public SingleOpTest { |
| protected: |
| const std::map<string, TfLiteRegistration*>& GetKernelMap() override { |
| return *kKernelMapNoPie; |
| } |
| }; |
| |
| TEST_P(SparseFullyConnectedOpTest, SimpleTest) { |
| std::initializer_list<float> weight_data = { |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 |
| }; |
| TensorData weight = {}; |
| weight.type = TensorType_FLOAT32; |
| weight.shape = {3, 10}; |
| weight.traversal_order = {0, 1}; |
| weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR}; |
| SparseFullyConnectedOpModel<float> m( |
| GetRegistration(), /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 10}}, weight, weight_data); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(24, 25, 26, 58, 59, 60)); |
| } |
| |
| TEST_P(SparseFullyConnectedOpTest, SimpleTest2) { |
| std::initializer_list<float> weight_data = { |
| 2, 4 // u = 0 |
| }; |
| TensorData weight = {}; |
| weight.type = TensorType_FLOAT32; |
| weight.shape = {1, 2}; |
| weight.traversal_order = {0, 1}; |
| weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR}; |
| SparseFullyConnectedOpModel<float> m( |
| GetRegistration(), /*units=*/1, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 2}}, weight, weight_data); |
| m.SetBias({1}); |
| |
| m.SetInput({ |
| 1, 2, // b = 0 |
| 2, 1 // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 1)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(11, 9)); |
| } |
| |
| TEST_P(SparseFullyConnectedOpTest, Simple1x4Test) { |
| std::initializer_list<float> weight_data = { |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 2 |
| }; |
| TensorData weight = {}; |
| weight.type = TensorType_FLOAT32; |
| weight.shape = {3, 12}; |
| weight.traversal_order = {0, 1, 2}; |
| weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR}; |
| weight.block_map = {1}; |
| weight.block_size = {4}; |
| SparseFullyConnectedOpModel<float> m(GetRegistration(), |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 12}}, |
| weight, weight_data); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, 11, 12, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, -11, 12, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(289, 290, 291, 81, 82, 83)); |
| } |
| |
| TEST_P(SparseFullyConnectedOpTest, Simple1x4TestMultiThreaded) { |
| std::initializer_list<float> weight_data = { |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 2 |
| }; |
| TensorData weight = {}; |
| weight.type = TensorType_FLOAT32; |
| weight.shape = {3, 12}; |
| weight.traversal_order = {0, 1, 2}; |
| weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR}; |
| weight.block_map = {1}; |
| weight.block_size = {4}; |
| for (int num_threads = 1; num_threads <= 4; num_threads++) { |
| SparseFullyConnectedOpModel<float> m( |
| GetRegistration(), |
| /*units=*/3, /*batches=*/2, |
| /*input=*/{TensorType_FLOAT32, {2, 12}}, weight, weight_data, |
| num_threads); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, 11, 12, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, -11, 12, // b = 1 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(2, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(289, 290, 291, 81, 82, 83)); |
| } |
| } |
| |
| TEST_P(SparseFullyConnectedOpTest, Simple1x4TestMultiThreadedMoreBatches) { |
| std::initializer_list<float> weight_data = { |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 0 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // u = 2 |
| }; |
| TensorData weight = {}; |
| weight.type = TensorType_FLOAT32; |
| weight.shape = {3, 12}; |
| weight.traversal_order = {0, 1, 2}; |
| weight.format = {kTfLiteDimDense, kTfLiteDimSparseCSR}; |
| weight.block_map = {1}; |
| weight.block_size = {4}; |
| for (int num_threads = 1; num_threads <= 4; num_threads++) { |
| SparseFullyConnectedOpModel<float> m( |
| GetRegistration(), |
| /*units=*/3, /*batches=*/6, |
| /*input=*/{TensorType_FLOAT32, {6, 12}}, weight, weight_data, |
| num_threads); |
| m.SetBias({1, 2, 3}); |
| |
| m.SetInput({ |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, 11, 12, // b = 0 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, -11, 12, // b = 1 |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, 11, 12, // b = 2 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, -11, 12, // b = 3 |
| 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, 11, 12, // b = 4 |
| 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, -11, 12, // b = 5 |
| }); |
| |
| m.Invoke(); |
| |
| EXPECT_THAT(m.GetOutputShape(), ElementsAre(6, 3)); |
| EXPECT_THAT(m.GetOutput(), ElementsAre(289, 290, 291, // b = 0 |
| 81, 82, 83, // b = 1 |
| 289, 290, 291, // b = 2 |
| 81, 82, 83, // b = 3 |
| 289, 290, 291, // b = 4 |
| 81, 82, 83 // b = 5 |
| )); |
| } |
| } |
| // TODO(b/148391360): Add tests for unsupported sparsity format. |
| // TEST_P(SparseFullyConnectedOpTest, TestUnsupportedSparsityFormat) |
| |
| INSTANTIATE_TEST_SUITE_P( |
| SparseFullyConnectedOpTest, SparseFullyConnectedOpTest, |
| ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMapNoPie))); |
| |
| } // namespace |
| } // namespace tflite |