Stop fusing clamps with binary operators
We are going to split these later anyways.
PiperOrigin-RevId: 677790032
diff --git a/src/subgraph.c b/src/subgraph.c
index 625c0de..54c7452 100644
--- a/src/subgraph.c
+++ b/src/subgraph.c
@@ -1153,17 +1153,13 @@
// Try to fuse Clamp Node upstream into producer Node
if (consumer->type == xnn_node_type_clamp) {
switch (producer->type) {
- case xnn_node_type_add2:
case xnn_node_type_average_pooling_2d:
case xnn_node_type_clamp:
case xnn_node_type_convolution_2d:
- case xnn_node_type_divide:
case xnn_node_type_deconvolution_2d:
case xnn_node_type_depthwise_convolution_2d:
case xnn_node_type_fully_connected:
- case xnn_node_type_multiply2:
case xnn_node_type_max_pooling_2d:
- case xnn_node_type_subtract:
xnn_log_info("fuse Clamp Node #%"PRIu32" into upstream Node #%"PRIu32, consumer_id, producer_id);
assert(producer->num_outputs == 1);
assert(consumer->num_inputs == 1);
diff --git a/test/fusion.cc b/test/fusion.cc
index 5d717d1..bccf3f5 100644
--- a/test/fusion.cc
+++ b/test/fusion.cc
@@ -17,36 +17,6 @@
namespace xnnpack {
-TEST(ADD_THEN_CLAMP, fusion) {
- RuntimeTester tester(4);
- float output_min = -0.5f;
- float output_max = 0.5f;
- uint32_t input1_id = 0;
- uint32_t input2_id = 1;
- uint32_t intermediate_id = 2;
- uint32_t output_id = 3;
- tester
- .AddInputTensorF32({1, 2, 2, 3}, input1_id)
- .AddInputTensorF32({1, 2, 2, 3}, input2_id)
- .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
- .AddOutputTensorF32({1, 2, 2, 3}, output_id)
- .AddAddition(input1_id, input2_id, intermediate_id)
- .AddClamp(output_min, output_max, intermediate_id, output_id);
-
- std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
- ASSERT_EQ(tester.NumOperators(), 2);
-
- std::vector<float> optimized_output = tester.RunWithFusion<float>();
-
- ASSERT_EQ(tester.NumOperators(), 1);
- ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
- ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
- ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
- ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
-
- ASSERT_EQ(unoptimized_output, optimized_output);
-}
-
TEST(AVERAGE_POOLING_2D_THEN_CLAMP, fusion) {
RuntimeTester tester(3);
float output_min = -0.5f;
@@ -148,36 +118,6 @@
ASSERT_EQ(unoptimized_output, optimized_output);
}
-TEST(DIVIDE_THEN_CLAMP, fusion) {
- RuntimeTester tester(4);
- float output_min = -0.5f;
- float output_max = 0.5f;
- uint32_t input1_id = 0;
- uint32_t input2_id = 1;
- uint32_t intermediate_id = 2;
- uint32_t output_id = 3;
- tester
- .AddInputTensorF32({1, 2, 2, 3}, input1_id)
- .AddInputTensorF32({1, 2, 2, 3}, input2_id)
- .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
- .AddOutputTensorF32({1, 2, 2, 3}, output_id)
- .AddDivide(input1_id, input2_id, intermediate_id)
- .AddClamp(output_min, output_max, intermediate_id, output_id);
-
- std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
- ASSERT_EQ(tester.NumOperators(), 2);
-
- std::vector<float> optimized_output = tester.RunWithFusion<float>();
-
- ASSERT_EQ(tester.NumOperators(), 1);
- ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
- ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
- ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
- ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
-
- ASSERT_EQ(unoptimized_output, optimized_output);
-}
-
TEST(DECONVOLUTION_2D_THEN_CLAMP, fusion) {
RuntimeTester tester(5);
float output_min = -0.5f;
@@ -328,36 +268,6 @@
ASSERT_EQ(unoptimized_output, optimized_output);
}
-TEST(MULTIPLY_THEN_CLAMP, fusion) {
- RuntimeTester tester(4);
- float output_min = -0.5f;
- float output_max = 0.5f;
- uint32_t input1_id = 0;
- uint32_t input2_id = 1;
- uint32_t intermediate_id = 2;
- uint32_t output_id = 3;
- tester
- .AddInputTensorF32({1, 2, 2, 3}, input1_id)
- .AddInputTensorF32({1, 2, 2, 3}, input2_id)
- .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
- .AddOutputTensorF32({1, 2, 2, 3}, output_id)
- .AddMultiply(input1_id, input2_id, intermediate_id)
- .AddClamp(output_min, output_max, intermediate_id, output_id);
-
- std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
- ASSERT_EQ(tester.NumOperators(), 2);
-
- std::vector<float> optimized_output = tester.RunWithFusion<float>();
-
- ASSERT_EQ(tester.NumOperators(), 1);
- ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
- ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
- ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
- ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
-
- ASSERT_EQ(unoptimized_output, optimized_output);
-}
-
TEST(MAX_POOLING_THEN_CLAMP, fusion) {
RuntimeTester tester(3);
float output_min = -0.5f;
@@ -386,36 +296,6 @@
ASSERT_EQ(unoptimized_output, optimized_output);
}
-TEST(SUBTRACT_THEN_CLAMP, fusion) {
- RuntimeTester tester(4);
- float output_min = -0.5f;
- float output_max = 0.5f;
- uint32_t input1_id = 0;
- uint32_t input2_id = 1;
- uint32_t intermediate_id = 2;
- uint32_t output_id = 3;
- tester
- .AddInputTensorF32({1, 2, 2, 3}, input1_id)
- .AddInputTensorF32({1, 2, 2, 3}, input2_id)
- .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
- .AddOutputTensorF32({1, 2, 2, 3}, output_id)
- .AddSubtract(input1_id, input2_id, intermediate_id)
- .AddClamp(output_min, output_max, intermediate_id, output_id);
-
- std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
- ASSERT_EQ(tester.NumOperators(), 2);
-
- std::vector<float> optimized_output = tester.RunWithFusion<float>();
-
- ASSERT_EQ(tester.NumOperators(), 1);
- ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
- ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
- ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
- ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
-
- ASSERT_EQ(unoptimized_output, optimized_output);
-}
-
TEST(CONSTANT_PAD_THEN_CONVOLUTION, fusion) {
RuntimeTester tester(5);
uint32_t input_id = 0;