longvector: add missing Builtins & Intrinsics (#807)

diff --git a/lib/LongVectorLoweringPass.cpp b/lib/LongVectorLoweringPass.cpp
index 9a11771..ce979a3 100644
--- a/lib/LongVectorLoweringPass.cpp
+++ b/lib/LongVectorLoweringPass.cpp
@@ -235,9 +235,11 @@
   case Intrinsic::ceil:
   case Intrinsic::copysign:
   case Intrinsic::cos:
+  case Intrinsic::ctlz:
   case Intrinsic::exp:
   case Intrinsic::fabs:
   case Intrinsic::fmuladd:
+  case Intrinsic::fshl:
   case Intrinsic::log:
   case Intrinsic::pow:
   case Intrinsic::sin: {
@@ -301,6 +303,7 @@
 
   // TODO Add support for other builtins by providing testcases and listing the
   // builtins here.
+  case clspv::Builtins::kAbs:
   case clspv::Builtins::kAcosh:
   case clspv::Builtins::kAcos:
   case clspv::Builtins::kAcospi:
@@ -336,6 +339,7 @@
   case clspv::Builtins::kLog2:
   case clspv::Builtins::kMax:
   case clspv::Builtins::kMin:
+  case clspv::Builtins::kPopcount:
   case clspv::Builtins::kPow:
   case clspv::Builtins::kPowr:
   case clspv::Builtins::kRadians:
diff --git a/test/LongVectorLowering/abs.ll b/test/LongVectorLowering/abs.ll
new file mode 100644
index 0000000..4c7bfb8
--- /dev/null
+++ b/test/LongVectorLowering/abs.ll
@@ -0,0 +1,24 @@
+; RUN: clspv-opt --LongVectorLowering %s -o %t
+; RUN: FileCheck %s < %t
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+define spir_kernel void @abs(<8 x i32> addrspace(1)* %src, <8 x i32> addrspace(1)* %dst) {
+entry:
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %src, align 32
+  %call = call spir_func <8 x i32> @_Z3absDv8_i(<8 x i32> %0)
+  store <8 x i32> %call, <8 x i32> addrspace(1)* %dst, align 32
+  ret void
+}
+
+declare spir_func <8 x i32> @_Z3absDv8_i(<8 x i32>)
+
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
+; CHECK: call spir_func i32 @_Z3absi(i32
diff --git a/test/LongVectorLowering/clz.ll b/test/LongVectorLowering/clz.ll
new file mode 100644
index 0000000..1da74ef
--- /dev/null
+++ b/test/LongVectorLowering/clz.ll
@@ -0,0 +1,24 @@
+; RUN: clspv-opt --LongVectorLowering %s -o %t
+; RUN: FileCheck %s < %t
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+define spir_kernel void @abs(<8 x i32> addrspace(1)* %src, <8 x i32> addrspace(1)* %dst) {
+entry:
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %src, align 32
+  %call = call spir_func <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %0, i1 false)
+  store <8 x i32> %call, <8 x i32> addrspace(1)* %dst, align 32
+  ret void
+}
+
+declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1 immarg) #1
+
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
+; CHECK: call spir_func i32 @llvm.ctlz.i32(i32
diff --git a/test/LongVectorLowering/popcount.ll b/test/LongVectorLowering/popcount.ll
new file mode 100644
index 0000000..f30ee69
--- /dev/null
+++ b/test/LongVectorLowering/popcount.ll
@@ -0,0 +1,24 @@
+; RUN: clspv-opt --LongVectorLowering %s -o %t
+; RUN: FileCheck %s < %t
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+define spir_kernel void @popcount(<8 x i32> addrspace(1)* %src, <8 x i32> addrspace(1)* %dst) {
+entry:
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %src, align 32
+  %call = call spir_func <8 x i32> @_Z8popcountDv8_i(<8 x i32> %0)
+  store <8 x i32> %call, <8 x i32> addrspace(1)* %dst, align 32
+  ret void
+}
+
+declare spir_func <8 x i32> @_Z8popcountDv8_i(<8 x i32>)
+
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
+; CHECK: call spir_func i32 @_Z8popcounti(i32
diff --git a/test/LongVectorLowering/rotate.ll b/test/LongVectorLowering/rotate.ll
new file mode 100644
index 0000000..88b6ef6
--- /dev/null
+++ b/test/LongVectorLowering/rotate.ll
@@ -0,0 +1,25 @@
+; RUN: clspv-opt --LongVectorLowering %s -o %t
+; RUN: FileCheck %s < %t
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir-unknown-unknown"
+
+define spir_kernel void @abs(<8 x i32> addrspace(1)* %srcA, <8 x i32> addrspace(1)* %srcB, <8 x i32> addrspace(1)* %dst) {
+entry:
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %srcA, align 32
+  %1 = load <8 x i32>, <8 x i32> addrspace(1)* %srcB, align 32
+  %call = call spir_func <8 x i32> @llvm.fshl.v8i32(<8 x i32> %0, <8 x i32> %0, <8 x i32> %1)
+  store <8 x i32> %call, <8 x i32> addrspace(1)* %dst, align 32
+  ret void
+}
+
+declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
+
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32
+; CHECK: call spir_func i32 @llvm.fshl.i32(i32