internal/compute.h - external/github.com/google/gemmlowp - Git at Google

 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // compute.h: the central stage of the Gemm computation, operates
 // on already-packed LHS and RHS blocks and calls the Gemm kernel
 // to compute a block of the product.

 #ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
 #define GEMMLOWP_INTERNAL_COMPUTE_H_

 #include "block_params.h"
 #include "kernel.h"
 #include "pack.h"

 namespace gemmlowp {

 template <typename PackedLhs, typename PackedRhs, typename PackedResult>
 class ComputeImpl {
   typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
   typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
   typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;

   const KernelBase& kernel_;
   const BlockParams& block_params_;

   PackedResult* const packed_result_;
   const PackedLhs& packed_lhs_;
   const PackedRhs& packed_rhs_;

  public:
   ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
               PackedResult* _packed_result, const PackedLhs& _packed_lhs,
               const PackedRhs& _packed_rhs)
       : kernel_(_kernel),
         block_params_(_block_params),
         packed_result_(_packed_result),
         packed_lhs_(_packed_lhs),
         packed_rhs_(_packed_rhs) {}

   void Compute(int depth) {
     depth = RoundUp<Format::kDepth>(depth);
     assert(depth <= block_params_.l2_depth);
     for (int d = 0; d < depth; d += block_params_.l1_depth) {
       int ds = std::min(block_params_.l1_depth, depth - d);

       for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
         int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);

         ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
       }
     }
   }

  private:
   static void MarkPackedResultBlockAsInitialized(
       const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
 #ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
     for (int col = 0; col < packed_result_block.cols(); col++) {
       MarkMemoryAsInitialized(
           packed_result_block.data() + col * packed_result_block.cols_stride(),
           packed_result_block.rows());
     }
 #else
     (void)packed_result_block;
 #endif
   }

   void ComputeRun(int start_row, int start_col, int start_depth,
                   int depth) GEMMLOWP_NOINLINE {
     packed_lhs_.seek_run(start_row, start_depth);
     packed_rhs_.seek_run(start_col, start_depth);
     auto packed_result_block = packed_result_->Map().block(
         start_row, start_col, Format::kRows, Format::kCols);
     kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
                 packed_result_block.cols_stride(), packed_lhs_.current_data(),
                 packed_rhs_.current_data(), start_depth, depth);
     MarkPackedResultBlockAsInitialized(packed_result_block);
   }

   void ComputeL1(int start_row, int rows, int start_col, int cols,
                  int start_depth, int depth) {
     assert(rows % Format::kRows == 0);
     assert(cols % Format::kCols == 0);
     assert(depth % Format::kDepth == 0);

     for (int c = 0; c < cols; c += Format::kCols) {
       for (int r = 0; r < rows; r += Format::kRows) {
         ComputeRun(start_row + r, start_col + c, start_depth, depth);
       }
     }
   }
 };

 template <typename PackedLhs, typename PackedRhs, typename PackedResult>
 void Compute(const KernelBase& kernel, const BlockParams& block_params,
              PackedResult* packed_result, const PackedLhs& packed_lhs,
              const PackedRhs& packed_rhs, int depth) {
   ScopedProfilingLabel label("compute");
   ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
       kernel, block_params, packed_result, packed_lhs, packed_rhs);

   impl.Compute(depth);
 }

 }  // namespace gemmlowp

 #endif  // GEMMLOWP_INTERNAL_COMPUTE_H_
	// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// compute.h: the central stage of the Gemm computation, operates
	// on already-packed LHS and RHS blocks and calls the Gemm kernel
	// to compute a block of the product.

	#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
	#define GEMMLOWP_INTERNAL_COMPUTE_H_

	#include "block_params.h"
	#include "kernel.h"
	#include "pack.h"

	namespace gemmlowp {

	template <typename PackedLhs, typename PackedRhs, typename PackedResult>
	class ComputeImpl {
	typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
	typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
	typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;

	const KernelBase& kernel_;
	const BlockParams& block_params_;

	PackedResult* const packed_result_;
	const PackedLhs& packed_lhs_;
	const PackedRhs& packed_rhs_;

	public:
	ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
	PackedResult* _packed_result, const PackedLhs& _packed_lhs,
	const PackedRhs& _packed_rhs)
	: kernel_(_kernel),
	block_params_(_block_params),
	packed_result_(_packed_result),
	packed_lhs_(_packed_lhs),
	packed_rhs_(_packed_rhs) {}

	void Compute(int depth) {
	depth = RoundUp<Format::kDepth>(depth);
	assert(depth <= block_params_.l2_depth);
	for (int d = 0; d < depth; d += block_params_.l1_depth) {
	int ds = std::min(block_params_.l1_depth, depth - d);

	for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
	int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);

	ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
	}
	}
	}

	private:
	static void MarkPackedResultBlockAsInitialized(
	const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
	#ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
	for (int col = 0; col < packed_result_block.cols(); col++) {
	MarkMemoryAsInitialized(
	packed_result_block.data() + col * packed_result_block.cols_stride(),
	packed_result_block.rows());
	}
	#else
	(void)packed_result_block;
	#endif
	}

	void ComputeRun(int start_row, int start_col, int start_depth,
	int depth) GEMMLOWP_NOINLINE {
	packed_lhs_.seek_run(start_row, start_depth);
	packed_rhs_.seek_run(start_col, start_depth);
	auto packed_result_block = packed_result_->Map().block(
	start_row, start_col, Format::kRows, Format::kCols);
	kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
	packed_result_block.cols_stride(), packed_lhs_.current_data(),
	packed_rhs_.current_data(), start_depth, depth);
	MarkPackedResultBlockAsInitialized(packed_result_block);
	}

	void ComputeL1(int start_row, int rows, int start_col, int cols,
	int start_depth, int depth) {
	assert(rows % Format::kRows == 0);
	assert(cols % Format::kCols == 0);
	assert(depth % Format::kDepth == 0);

	for (int c = 0; c < cols; c += Format::kCols) {
	for (int r = 0; r < rows; r += Format::kRows) {
	ComputeRun(start_row + r, start_col + c, start_depth, depth);
	}
	}
	}
	};

	template <typename PackedLhs, typename PackedRhs, typename PackedResult>
	void Compute(const KernelBase& kernel, const BlockParams& block_params,
	PackedResult* packed_result, const PackedLhs& packed_lhs,
	const PackedRhs& packed_rhs, int depth) {
	ScopedProfilingLabel label("compute");
	ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
	kernel, block_params, packed_result, packed_lhs, packed_rhs);

	impl.Compute(depth);
	}

	} // namespace gemmlowp

	#endif // GEMMLOWP_INTERNAL_COMPUTE_H_