tree: 9a1fc294d07f82a44553c5dc2abbbabb1a350f87 [path history] [tgz]
  1. data/
  2. llvm_gpu_backend/
  3. runtime/
  4. tests/
  5. alias_passthrough_params.cc
  6. alias_passthrough_params.h
  7. alias_passthrough_params_test.cc
  8. all_reduce_blueconnect.cc
  9. all_reduce_blueconnect.h
  10. all_reduce_blueconnect_test.cc
  11. amdgpu_compiler.cc
  12. amdgpu_compiler.h
  13. amdgpu_compiler_registration.cc
  14. backend_configs.proto
  15. buffer_allocations.cc
  16. buffer_allocations.h
  17. buffer_comparator.cc
  18. buffer_comparator.h
  19. buffer_comparator_test.cc
  20. BUILD
  21. cholesky_thunk.cc
  22. cholesky_thunk.h
  23. conditional_thunk.cc
  24. conditional_thunk.h
  25. conv_layout_normalization.cc
  26. conv_layout_normalization.h
  27. conv_layout_normalization_test.cc
  28. convolution_thunk.cc
  29. convolution_thunk.h
  30. copy_thunk.cc
  31. copy_thunk.h
  32. cublas_cudnn.cc
  33. cublas_cudnn.h
  34. cublas_lt_matmul_thunk.cc
  35. cublas_lt_matmul_thunk.h
  36. cublas_pad_for_gemms.cc
  37. cublas_pad_for_gemms.h
  38. cublas_pad_for_gemms_test.cc
  39. cudnn_fused_conv_rewriter.cc
  40. cudnn_fused_conv_rewriter.h
  41. cudnn_fused_conv_rewriter_test.cc
  42. cudnn_pad_for_convolutions.cc
  43. cudnn_pad_for_convolutions.h
  44. cudnn_pad_for_convolutions_test.cc
  45. cudnn_simplify_padding.cc
  46. cudnn_simplify_padding.h
  47. cudnn_simplify_padding_test.cc
  48. cudnn_support_utils.cc
  49. cudnn_support_utils.h
  50. cudnn_support_utils_test.cc
  51. cudnn_vectorize_convolutions.cc
  52. cudnn_vectorize_convolutions.h
  53. cudnn_vectorize_convolutions_test.cc
  54. cusolver_context.cc
  55. cusolver_context.h
  56. cusolver_rewriter.cc
  57. cusolver_rewriter.h
  58. custom_call_test.cc
  59. custom_call_thunk.cc
  60. custom_call_thunk.h
  61. elemental_ir_emitter.cc
  62. elemental_ir_emitter.h
  63. executable.proto
  64. fft_thunk.cc
  65. fft_thunk.h
  66. for_thunk.cc
  67. for_thunk.h
  68. fusion_merger.cc
  69. fusion_merger.h
  70. fusion_merger_test.cc
  71. gemm_algorithm_picker.cc
  72. gemm_algorithm_picker.h
  73. gemm_broadcast_folding_rewriter.cc
  74. gemm_broadcast_folding_rewriter.h
  75. gemm_rewriter.cc
  76. gemm_rewriter.h
  77. gemm_thunk.cc
  78. gemm_thunk.h
  79. gpu_aot_compilation_test.cc
  80. gpu_asm_opts_util.cc
  81. gpu_asm_opts_util.h
  82. gpu_autotuning.proto
  83. gpu_compiler.cc
  84. gpu_compiler.h
  85. gpu_compiler_test.cc
  86. gpu_constants.h
  87. gpu_conv_algorithm_picker.cc
  88. gpu_conv_algorithm_picker.h
  89. gpu_conv_padding_legalization.cc
  90. gpu_conv_padding_legalization.h
  91. gpu_conv_padding_legalization_test.cc
  92. gpu_conv_rewriter.cc
  93. gpu_conv_rewriter.h
  94. gpu_conv_rewriter_test.cc
  95. gpu_conv_runner.cc
  96. gpu_conv_runner.h
  97. gpu_device_info.cc
  98. gpu_device_info.h
  99. gpu_executable.cc
  100. gpu_executable.h
  101. gpu_executable_run_options.cc
  102. gpu_executable_run_options.h
  103. gpu_fusible.cc
  104. gpu_fusible.h
  105. gpu_fusible_test.cc
  106. gpu_hlo_cost_analysis.cc
  107. gpu_hlo_cost_analysis.h
  108. gpu_hlo_cost_analysis_test.cc
  109. gpu_hlo_schedule.cc
  110. gpu_hlo_schedule.h
  111. gpu_hlo_schedule_test.cc
  112. gpu_layout_assignment.cc
  113. gpu_layout_assignment.h
  114. gpu_layout_assignment_test.cc
  115. gpu_performance_model.cc
  116. gpu_performance_model.h
  117. gpu_performance_model_test.cc
  118. gpu_reduce_scatter_creator.cc
  119. gpu_reduce_scatter_creator.h
  120. gpu_sanitize_constant_names.cc
  121. gpu_sanitize_constant_names.h
  122. gpu_sanitize_constant_names_test.cc
  123. gpu_scatter_expander.cc
  124. gpu_scatter_expander.h
  125. gpu_shape_verifier.cc
  126. gpu_shape_verifier.h
  127. gpu_transfer_manager.cc
  128. gpu_transfer_manager.h
  129. gpu_types.h
  130. hlo_algorithm_denylist.cc
  131. hlo_algorithm_denylist.h
  132. hlo_algorithm_denylist_test.cc
  133. hlo_fusion_stats.cc
  134. hlo_fusion_stats.h
  135. hlo_fusion_stats_test.cc
  136. hlo_to_ir_bindings.cc
  137. hlo_to_ir_bindings.h
  138. horizontal_input_fusion.cc
  139. horizontal_input_fusion.h
  140. horizontal_input_fusion_test.cc
  141. horizontal_loop_fusion.cc
  142. horizontal_loop_fusion.h
  143. horizontal_loop_fusion_test.cc
  144. infeed_manager.cc
  145. infeed_manager.h
  146. infeed_thunk.cc
  147. infeed_thunk.h
  148. instruction_fusion.cc
  149. instruction_fusion.h
  150. instruction_fusion_test.cc
  151. ir_emission_utils.cc
  152. ir_emission_utils.h
  153. ir_emission_utils_test.cc
  154. ir_emitter.cc
  155. ir_emitter.h
  156. ir_emitter_context.cc
  157. ir_emitter_context.h
  158. ir_emitter_nested.cc
  159. ir_emitter_nested.h
  160. ir_emitter_unnested.cc
  161. ir_emitter_unnested.h
  162. jitrt_custom_calls.cc
  163. jitrt_custom_calls.h
  164. kernel_mapping_scheme.h
  165. kernel_thunk.cc
  166. kernel_thunk.h
  167. launch_dimensions.cc
  168. launch_dimensions.h
  169. matmul_utils.cc
  170. matmul_utils.h
  171. matmul_utils_test.cc
  172. memset_thunk.cc
  173. memset_thunk.h
  174. metrics.cc
  175. metrics.h
  176. move_copy_to_users.cc
  177. move_copy_to_users.h
  178. move_copy_to_users_test.cc
  179. multi_output_fusion.cc
  180. multi_output_fusion.h
  181. multi_output_fusion_test.cc
  182. nccl_all_gather_thunk.cc
  183. nccl_all_gather_thunk.h
  184. nccl_all_reduce_thunk.cc
  185. nccl_all_reduce_thunk.h
  186. nccl_all_to_all_thunk.cc
  187. nccl_all_to_all_thunk.h
  188. nccl_collective_permute_thunk.cc
  189. nccl_collective_permute_thunk.h
  190. nccl_collective_thunk.cc
  191. nccl_collective_thunk.h
  192. nccl_utils.cc
  193. nccl_utils.h
  194. nvptx_compiler.cc
  195. nvptx_compiler.h
  196. nvptx_compiler_registration.cc
  197. nvptx_compiler_test.cc
  198. nvptx_helper.cc
  199. nvptx_helper.h
  200. outfeed_manager.cc
  201. outfeed_manager.h
  202. outfeed_thunk.cc
  203. outfeed_thunk.h
  204. parallel_loop_emitter.cc
  205. parallel_loop_emitter.h
  206. precompiled_kernels.cc
  207. precompiled_kernels.h
  208. reduction_degenerate_dim_remover.cc
  209. reduction_degenerate_dim_remover.h
  210. reduction_dimension_grouper.cc
  211. reduction_dimension_grouper.h
  212. reduction_layout_normalizer.cc
  213. reduction_layout_normalizer.h
  214. reduction_splitter.cc
  215. reduction_splitter.h
  216. reduction_splitter_test.cc
  217. replica_id_thunk.cc
  218. replica_id_thunk.h
  219. runtime_intrinsics.cc
  220. runtime_intrinsics.h
  221. scatter_slice_simplifier.cc
  222. scatter_slice_simplifier.h
  223. scatter_slice_simplifier_test.cc
  224. sequential_thunk.cc
  225. sequential_thunk.h
  226. softmax_fusion.cc
  227. softmax_fusion.h
  228. softmax_fusion_test.cc
  229. stream_executor_util.cc
  230. stream_executor_util.h
  231. target_constants.h
  232. target_util.cc
  233. target_util.h
  234. target_util_test.cc
  235. thunk.cc
  236. thunk.h
  237. tree_reduction_rewriter.cc
  238. tree_reduction_rewriter.h
  239. triangular_solve_rewriter.cc
  240. triangular_solve_rewriter.h
  241. triangular_solve_thunk.cc
  242. triangular_solve_thunk.h
  243. variadic_op_splitter.cc
  244. variadic_op_splitter.h
  245. variadic_op_splitter_test.cc
  246. while_thunk.cc
  247. while_thunk.h
  248. while_transformer_test.cc
  249. xfeed_queue.h
  250. xla_executor_state.h