PPC: [wasm-simd] Implement S8x16Swizzle

Change-Id: I0362b4123ccce5d2709b1705453a32697581e526
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2339551
Reviewed-by: Junliang Yan <jyan@ca.ibm.com>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#69279}
diff --git a/src/compiler/backend/ppc/code-generator-ppc.cc b/src/compiler/backend/ppc/code-generator-ppc.cc
index 053efc2..337cd79 100644
--- a/src/compiler/backend/ppc/code-generator-ppc.cc
+++ b/src/compiler/backend/ppc/code-generator-ppc.cc
@@ -3282,6 +3282,24 @@
                  i.InputSimd128Register(1));
       break;
     }
+    case kPPC_S8x16Swizzle: {
+      // Reverse the input to match IBM lane numbering.
+      Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
+      __ addi(sp, sp, Operand(-16));
+      __ stxvd(i.InputSimd128Register(0), MemOperand(r0, sp));
+      __ ldbrx(r0, MemOperand(r0, sp));
+      __ li(ip, Operand(8));
+      __ ldbrx(ip, MemOperand(ip, sp));
+      __ stdx(ip, MemOperand(r0, sp));
+      __ li(ip, Operand(8));
+      __ stdx(r0, MemOperand(ip, sp));
+      __ lxvd(kScratchDoubleReg, MemOperand(r0, sp));
+      __ addi(sp, sp, Operand(16));
+      __ vxor(tempFPReg1, tempFPReg1, tempFPReg1);
+      __ vperm(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
+               i.InputSimd128Register(1));
+      break;
+    }
     case kPPC_StoreCompressTagged: {
       ASSEMBLE_STORE_INTEGER(StoreTaggedField, StoreTaggedFieldX);
       break;
diff --git a/src/compiler/backend/ppc/instruction-codes-ppc.h b/src/compiler/backend/ppc/instruction-codes-ppc.h
index 8e4dc9a..fdc8082 100644
--- a/src/compiler/backend/ppc/instruction-codes-ppc.h
+++ b/src/compiler/backend/ppc/instruction-codes-ppc.h
@@ -333,6 +333,7 @@
   V(PPC_I8x16AddSaturateU)           \
   V(PPC_I8x16SubSaturateU)           \
   V(PPC_S8x16Shuffle)                \
+  V(PPC_S8x16Swizzle)                \
   V(PPC_V64x2AnyTrue)                \
   V(PPC_V32x4AnyTrue)                \
   V(PPC_V16x8AnyTrue)                \
diff --git a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
index 4400a61..1aab2df 100644
--- a/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
+++ b/src/compiler/backend/ppc/instruction-scheduler-ppc.cc
@@ -256,6 +256,7 @@
     case kPPC_I8x16AddSaturateU:
     case kPPC_I8x16SubSaturateU:
     case kPPC_S8x16Shuffle:
+    case kPPC_S8x16Swizzle:
     case kPPC_V64x2AnyTrue:
     case kPPC_V32x4AnyTrue:
     case kPPC_V16x8AnyTrue:
diff --git a/src/compiler/backend/ppc/instruction-selector-ppc.cc b/src/compiler/backend/ppc/instruction-selector-ppc.cc
index e448b88..50ca46c 100644
--- a/src/compiler/backend/ppc/instruction-selector-ppc.cc
+++ b/src/compiler/backend/ppc/instruction-selector-ppc.cc
@@ -2222,7 +2222,8 @@
   V(I8x16SubSaturateU)     \
   V(S128And)               \
   V(S128Or)                \
-  V(S128Xor)
+  V(S128Xor)               \
+  V(S8x16Swizzle)
 
 #define SIMD_UNOP_LIST(V)   \
   V(F64x2Abs)               \
@@ -2436,8 +2437,6 @@
 
 void InstructionSelector::VisitF32x4Max(Node* node) { UNIMPLEMENTED(); }
 
-void InstructionSelector::VisitS8x16Swizzle(Node* node) { UNIMPLEMENTED(); }
-
 void InstructionSelector::VisitF64x2Div(Node* node) { UNIMPLEMENTED(); }
 
 void InstructionSelector::VisitF64x2Min(Node* node) { UNIMPLEMENTED(); }