baby steps for aarch64 support
So far this is just as easy as I had hoped.
Change-Id: I5f69a900b32d9bf70156b55e334233d7376b820f
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/223340
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index 9a66c70..99df6f6 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -704,6 +704,29 @@
this->byte(mod_rm(Mod::Indirect, src&7, dst&7));
}
+ void Assembler::word(uint32_t w) {
+ this->byte(&w, 4);
+ }
+
+ // https://static.docs.arm.com/ddi0596/a/DDI_0596_ARM_a64_instruction_set_architecture.pdf
+
+ void Assembler::op(uint32_t hi, V m, uint32_t lo, V n, V d) {
+ this->word( (hi & 2047) << 21
+ | (m & 31) << 16
+ | (lo & 63) << 10
+ | (n & 31) << 5
+ | (d & 31) << 0 );
+ }
+
+ void Assembler::add4s(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b10000'1, n, d); }
+ void Assembler::sub4s(V d, V n, V m) { this->op(0b0'1'1'01110'10'1, m, 0b10000'1, n, d); }
+ void Assembler::mul4s(V d, V n, V m) { this->op(0b0'1'0'01110'10'1, m, 0b10011'1, n, d); }
+
+ void Assembler::fadd4s(V d, V n, V m) { this->op(0b0'1'0'01110'0'0'1, m, 0b11010'1, n, d); }
+ void Assembler::fsub4s(V d, V n, V m) { this->op(0b0'1'0'01110'1'0'1, m, 0b11010'1, n, d); }
+ void Assembler::fmul4s(V d, V n, V m) { this->op(0b0'1'1'01110'0'0'1, m, 0b11011'1, n, d); }
+ void Assembler::fdiv4s(V d, V n, V m) { this->op(0b0'1'1'01110'0'0'1, m, 0b11111'1, n, d); }
+
#if defined(SKVM_JIT)
static bool can_jit(int regs, int nargs) {
return true
diff --git a/src/core/SkVM.h b/src/core/SkVM.h
index 7f651f8..f9e57b6 100644
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@@ -25,7 +25,7 @@
// Order matters... GP64, Xmm, Ymm values match 4-bit register encoding for each.
enum GP64 {
rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
- r8 , r9, r10, r11, r12, r13, r14, r15,
+ r8 , r9 , r10, r11, r12, r13, r14, r15,
};
enum Xmm {
xmm0, xmm1, xmm2 , xmm3 , xmm4 , xmm5 , xmm6 , xmm7 ,
@@ -36,10 +36,28 @@
ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15,
};
+ // X and V values match 5-bit encoding for each (nothing tricky).
+ enum X {
+ x0 , x1 , x2 , x3 , x4 , x5 , x6 , x7 ,
+ x8 , x9 , x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23,
+ x24, x25, x26, x27, x28, x29, x30, x31,
+ };
+ enum V {
+ v0 , v1 , v2 , v3 , v4 , v5 , v6 , v7 ,
+ v8 , v9 , v10, v11, v12, v13, v14, v15,
+ v16, v17, v18, v19, v20, v21, v22, v23,
+ v24, v25, v26, v27, v28, v29, v30, v31,
+ };
+
void byte(const void*, int);
void byte(uint8_t);
template <typename... Rest> void byte(uint8_t, Rest...);
+ void word(uint32_t);
+
+ // x86-64
+
void nop();
void align(int mod);
@@ -80,6 +98,13 @@
void vmovups(GP64 dst, Ymm src);
void vmovq (GP64 dst, Xmm src);
+ // aarch64
+
+ // d = op(n,m)
+ using DOpNM = void(V d, V n, V m);
+ DOpNM add4s, sub4s, mul4s,
+ fadd4s, fsub4s, fmul4s, fdiv4s;
+
private:
// dst = op(dst, imm)
void op(int opcode, int opcode_ext, GP64 dst, int imm);
@@ -104,6 +129,11 @@
// *ptr = ymm or ymm = *ptr, depending on opcode.
void load_store(int prefix, int map, int opcode, Ymm ymm, GP64 ptr);
+ // General layout top to bottom is:
+ // [11 bits hi] [5 bits m] [6 bits lo] [5 bits n] [5 bits d]
+ // where the opcode is split between hi and lo.
+ void op(uint32_t hi, V m, uint32_t lo, V n, V d);
+
uint8_t* fCode;
size_t fSize;
};
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index b44cfc6..d74d0fa 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -369,4 +369,26 @@
},{
0xc5, 0x9d, 0xdf, 0xda,
});
+
+ // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
+
+ test_asm(r, [&](A& a) {
+ a.fadd4s(A::v4, A::v3, A::v1);
+ a.fsub4s(A::v4, A::v3, A::v1);
+ a.fmul4s(A::v4, A::v3, A::v1);
+ a.fdiv4s(A::v4, A::v3, A::v1);
+
+ a.add4s(A::v4, A::v3, A::v1);
+ a.sub4s(A::v4, A::v3, A::v1);
+ a.mul4s(A::v4, A::v3, A::v1);
+ },{
+ 0x64,0xd4,0x21,0x4e,
+ 0x64,0xd4,0xa1,0x4e,
+ 0x64,0xdc,0x21,0x6e,
+ 0x64,0xfc,0x21,0x6e,
+
+ 0x64,0x84,0xa1,0x4e,
+ 0x64,0x84,0xa1,0x6e,
+ 0x64,0x9c,0xa1,0x4e,
+ });
}