Add support for cl_arm_non_uniform_work_group_size (#720)

Support non-uniform work-groups in OpenCL 1.x as defined in the
cl_arm_non_uniform_work_group_size extension specification.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
diff --git a/include/clspv/Option.h b/include/clspv/Option.h
index 3b08149..87d9f7d 100644
--- a/include/clspv/Option.h
+++ b/include/clspv/Option.h
@@ -173,11 +173,7 @@
 bool GlobalOffsetPushConstant();
 
 // Returns true when support for non uniform NDRanges is enabled.
-inline bool NonUniformNDRangeSupported() {
-  return (Language() == SourceLanguage::OpenCL_CPP) ||
-         (Language() == SourceLanguage::OpenCL_C_20) ||
-         (Language() == SourceLanguage::OpenCL_C_30);
-}
+bool NonUniformNDRangeSupported();
 
 enum class StorageClass : int {
   kSSBO = 0,
diff --git a/lib/Option.cpp b/lib/Option.cpp
index 9d971c0..724db79 100644
--- a/lib/Option.cpp
+++ b/lib/Option.cpp
@@ -155,6 +155,10 @@
     "long-vector", llvm::cl::init(false),
     llvm::cl::desc("Allow vectors of 8 and 16 elements. Experimental"));
 
+llvm::cl::opt<bool> cl_arm_non_uniform_work_group_size(
+    "cl-arm-non-uniform-work-group-size", llvm::cl::init(false),
+    llvm::cl::desc("Enable the cl_arm_non_uniform_work_group_size extension."));
+
 llvm::cl::opt<clspv::Option::SourceLanguage> cl_std(
     "cl-std", llvm::cl::desc("Select OpenCL standard"),
     llvm::cl::init(clspv::Option::SourceLanguage::OpenCL_C_12),
@@ -280,6 +284,12 @@
 bool WorkDim() { return work_dim; }
 bool GlobalOffset() { return global_offset; }
 bool GlobalOffsetPushConstant() { return global_offset_push_constant; }
+bool NonUniformNDRangeSupported() {
+  return (Language() == SourceLanguage::OpenCL_CPP) ||
+         (Language() == SourceLanguage::OpenCL_C_20) ||
+         (Language() == SourceLanguage::OpenCL_C_30) ||
+         cl_arm_non_uniform_work_group_size;
+}
 bool ClusterPodKernelArgs() { return cluster_non_pointer_kernel_args; }
 
 bool Supports16BitStorageClass(StorageClass sc) {
diff --git a/test/reqd_work_group_size-non-uniform.cl b/test/reqd_work_group_size-non-uniform.cl
index 98ee161..a0f927d 100644
--- a/test/reqd_work_group_size-non-uniform.cl
+++ b/test/reqd_work_group_size-non-uniform.cl
@@ -2,6 +2,11 @@
 // RUN: spirv-dis -o %t2.spvasm %t.spv
 // RUN: FileCheck %s < %t2.spvasm
 // RUN: spirv-val --target-env vulkan1.0 %t.spv
+//
+// RUN: clspv -cl-arm-non-uniform-work-group-size %s -o %t.spv
+// RUN: spirv-dis -o %t2.spvasm %t.spv
+// RUN: FileCheck %s < %t2.spvasm
+// RUN: spirv-val --target-env vulkan1.0 %t.spv
 
 // CHECK:     OpEntryPoint GLCompute %[[__original_id_16:[0-9]+]] "test"
 // CHECK:     OpDecorate %[[gl_WorkGroupSize:[0-9a-zA-Z_]+]] BuiltIn WorkgroupSize