i#1551 port to ARM: add more debug sanity checks for the decoder and encoder

Refactors the existing reg_check_reg_fixer() into a general
decode_debug_checks() which calls into the encoder and decoder to verify
the sizes of various arrays and sanity check the decoding tables.

For ARM, adds checks of the encoder chains and checks that register lists
obey certain limitations to make decoding and encoding simpler.

git-svn-id: https://dynamorio.googlecode.com/svn/trunk@3022 49cc7528-f6fd-11dd-9d1a-b59b2e1864b6
diff --git a/core/arch/arch.c b/core/arch/arch.c
index a0f4787..62212ff 100644
--- a/core/arch/arch.c
+++ b/core/arch/arch.c
@@ -566,7 +566,7 @@
     ASSERT((uint)LINK_FINAL_INSTR_SHARED_FLAG <
            (uint)INSTR_FIRST_NON_LINK_SHARED_FLAG);
     ASSERT_TRUNCATE(byte, byte, OPSZ_LAST_ENUM);
-    DODEBUG({ reg_check_reg_fixer(); });
+    DODEBUG({ decode_debug_checks(); });
 
     /* Verify that the structures used for a register spill area and to hold IBT
      * table addresses & masks for IBL code are laid out as expected. We expect
diff --git a/core/arch/arm/decode.c b/core/arch/arm/decode.c
index a94f66d..1d65636 100644
--- a/core/arch/arm/decode.c
+++ b/core/arch/arm/decode.c
@@ -1136,6 +1136,135 @@
     return false;
 }
 
+bool
+optype_is_reg(int optype)
+{
+    switch (optype) {
+    case TYPE_R_A:
+    case TYPE_R_B:
+    case TYPE_R_C:
+    case TYPE_R_D:
+    case TYPE_R_A_TOP:
+    case TYPE_R_B_TOP:
+    case TYPE_R_C_TOP:
+    case TYPE_R_D_TOP:
+    case TYPE_R_D_NEGATED:
+    case TYPE_R_B_EVEN:
+    case TYPE_R_B_PLUS1:
+    case TYPE_R_D_EVEN:
+    case TYPE_R_D_PLUS1:
+    case TYPE_CR_A:
+    case TYPE_CR_B:
+    case TYPE_CR_C:
+    case TYPE_CR_D:
+    case TYPE_V_A:
+    case TYPE_V_B:
+    case TYPE_V_C:
+    case TYPE_V_C_3b:
+    case TYPE_V_C_4b:
+    case TYPE_W_A:
+    case TYPE_W_B:
+    case TYPE_W_C:
+    case TYPE_W_C_PLUS1:
+    case TYPE_SPSR:
+    case TYPE_CPSR:
+    case TYPE_FPSCR:
+    case TYPE_LR:
+    case TYPE_SP:
+        return true;
+    }
+    return false;
+}
+
+#ifdef DEBUG
+# ifndef STANDALONE_DECODER
+static bool
+optype_is_reglist(int optype)
+{
+    switch (optype) {
+    case TYPE_L_8b:
+    case TYPE_L_13b:
+    case TYPE_L_16b:
+    case TYPE_L_CONSEC:
+    case TYPE_L_VBx2:
+    case TYPE_L_VBx3:
+    case TYPE_L_VBx4:
+    case TYPE_L_VBx2D:
+    case TYPE_L_VBx3D:
+    case TYPE_L_VBx4D:
+        return true;
+    }
+    return false;
+}
+
+static void
+decode_check_opnds(int optype[], uint num_types)
+{
+    /* Ensure at most 1 reglist, and at most 1 reg after a reglist */
+    uint i, num_reglist = 0, reglist_idx;
+    bool post_reglist = false;
+    for (i = 0; i < num_types; i++) {
+        if (optype_is_reglist(optype[i])) {
+            num_reglist++;
+            reglist_idx = i;
+            post_reglist = true;
+        } else if (post_reglist) {
+            if (optype_is_reg(optype[i]))
+                ASSERT(reglist_idx == i - 1);
+            else
+                post_reglist = false;
+        }
+    }
+    ASSERT(num_reglist <= 1);
+}
+# endif /* STANDALONE_DECODER */
+
+void
+decode_debug_checks_arch(void)
+{
+#   define MAX_TYPES 8
+    DOCHECK(2, {
+        uint opc;
+        for (opc = OP_FIRST; opc < OP_AFTER_LAST; opc++) {
+            const instr_info_t *info = opcode_to_encoding_info(opc, DR_ISA_ARM_A32);
+            while (info != NULL && info != &invalid_instr && info->type != OP_CONTD) {
+                const instr_info_t *ops = info;
+                uint num_srcs = 0;
+                uint num_dsts = 0;
+                /* XXX: perhaps we should make an iterator and use it everywhere.
+                 * For now, for simplicity here we use two passes.
+                 */
+                int src_type[MAX_TYPES];
+                int dst_type[MAX_TYPES];
+                while (ops != NULL) {
+                    dst_type[num_dsts++] = ops->dst1_type;
+                    if (TEST(DECODE_4_SRCS, ops->flags))
+                        src_type[num_srcs++] = ops->dst2_type;
+                    else
+                        dst_type[num_dsts++] = ops->dst2_type;
+                    if (TEST(DECODE_3_DSTS, ops->flags))
+                        dst_type[num_dsts++] = ops->src1_type;
+                    else
+                        src_type[num_srcs++] = ops->src1_type;
+                    src_type[num_srcs++] = ops->src2_type;
+                    src_type[num_srcs++] = ops->src3_type;
+                    ops = instr_info_extra_opnds(ops);
+                }
+                ASSERT(num_dsts <= MAX_TYPES);
+                ASSERT(num_srcs <= MAX_TYPES);
+
+                /* Sanity-check encoding chain */
+                ASSERT(info->type == opc);
+
+                decode_check_opnds(dst_type, num_dsts);
+                decode_check_opnds(src_type, num_srcs);
+
+                info = get_next_instr_info(info);
+            }
+        }
+    });
+}
+#endif
 
 #ifdef DECODE_UNIT_TEST
 /* FIXME i#1551: add unit tests here.  How divide vs suite/tests/api/ tests? */
diff --git a/core/arch/arm/decode_private.h b/core/arch/arm/decode_private.h
index c8f625a..d8f47f0 100644
--- a/core/arch/arm/decode_private.h
+++ b/core/arch/arm/decode_private.h
@@ -122,6 +122,9 @@
     byte *final_pc;
     byte *orig_pc;
 
+    /* For decoding reglists.  Max 1 reglist per template (we check this in
+     * decode_debug_checks_arch()).
+     */
     size_t reglist_sz;
 
     /* For instr_t* target encoding */
@@ -285,8 +288,11 @@
 
     TYPE_K,    /* integer constant, size ignored, value stored in size */
 
-   /* when adding new types, update type_names[] in encode.c */
+    /* when adding new types, update type_names[] in encode.c */
+    TYPE_BEYOND_LAST_ENUM,
 
+
+    /* Non-incremental-enum valus */
     DECODE_INDEX_SHIFT_TYPE_BITPOS   = 5,
     DECODE_INDEX_SHIFT_TYPE_SIZE     = OPSZ_2b,
     DECODE_INDEX_SHIFT_AMOUNT_BITPOS = 7,
@@ -345,5 +351,7 @@
 opnd_size_t
 resolve_size_downward(opnd_size_t size);
 
+bool
+optype_is_reg(int optype);
 
 #endif /* DECODE_PRIVATE_H */
diff --git a/core/arch/arm/encode.c b/core/arch/arm/encode.c
index 561c943..5319a3b 100644
--- a/core/arch/arm/encode.c
+++ b/core/arch/arm/encode.c
@@ -301,10 +301,14 @@
 
 #ifdef DEBUG
 void
-reg_check_reg_fixer(void)
+encode_debug_checks(void)
 {
     CLIENT_ASSERT(sizeof(dr_reg_fixer)/sizeof(dr_reg_fixer[0]) == REG_LAST_ENUM + 1,
                   "internal register enum error");
+    CLIENT_ASSERT(sizeof(reg_names)/sizeof(reg_names[0]) == REG_LAST_ENUM + 1,
+                  "reg_names missing an entry");
+    CLIENT_ASSERT(sizeof(type_names)/sizeof(type_names[0]) == TYPE_BEYOND_LAST_ENUM,
+                  "type_names missing an entry");
 }
 #endif
 
diff --git a/core/arch/decode.h b/core/arch/decode.h
index 6b5278f..f438393 100644
--- a/core/arch/decode.h
+++ b/core/arch/decode.h
@@ -560,6 +560,11 @@
 #endif
 /* DR_API EXPORT END */
 
+#ifdef DEBUG
+void
+decode_debug_checks(void);
+#endif
+
 /* for debugging: printing out types and sizes */
 extern const char * const type_names[];
 extern const char * const size_names[];
diff --git a/core/arch/decode_shared.c b/core/arch/decode_shared.c
index 5878920..3a42bb2 100644
--- a/core/arch/decode_shared.c
+++ b/core/arch/decode_shared.c
@@ -54,6 +54,12 @@
 # define ASSERT_NOT_REACHED DO_NOT_USE_ASSERT_USE_CLIENT_ASSERT_INSTEAD
 #endif
 
+/* Arch-specific routines */
+#ifdef DEBUG
+void encode_debug_checks(void);
+void decode_debug_checks_arch(void);
+#endif
+
 const char * const size_names[] = {
 #ifdef X86
     "<invalid>"/* was <NULL> */,
@@ -270,3 +276,14 @@
     } else
         return dcontext->isa_mode;
 }
+
+#ifdef DEBUG
+void
+decode_debug_checks(void)
+{
+    CLIENT_ASSERT(sizeof(size_names)/sizeof(size_names[0]) == OPSZ_LAST_ENUM,
+                  "size_names missing an entry");
+    encode_debug_checks();
+    decode_debug_checks_arch();
+}
+#endif
diff --git a/core/arch/opnd.h b/core/arch/opnd.h
index caee636..3110f03 100644
--- a/core/arch/opnd.h
+++ b/core/arch/opnd.h
@@ -1620,11 +1620,6 @@
 
 /* utility functions */
 
-#ifdef DEBUG
-void
-reg_check_reg_fixer(void);
-#endif
-
 DR_API
 /**
  * Assumes that \p reg is a DR_REG_ 32-bit register constant.
diff --git a/core/arch/x86/decode.c b/core/arch/x86/decode.c
index 45a87a2..64030c9 100644
--- a/core/arch/x86/decode.c
+++ b/core/arch/x86/decode.c
@@ -2166,6 +2166,14 @@
     return op_instr[opc];
 }
 
+#ifdef DEBUG
+void
+decode_debug_checks_arch(void)
+{
+    /* empty */
+}
+#endif
+
 #ifdef DECODE_UNIT_TEST
 # include "instr_create.h"
 
diff --git a/core/arch/x86/decode_private.h b/core/arch/x86/decode_private.h
index 790f7b4..dc79518 100644
--- a/core/arch/x86/decode_private.h
+++ b/core/arch/x86/decode_private.h
@@ -413,6 +413,7 @@
                                   * 3 * regular size for 32-bit, 5 * regular
                                   * size for 64-bit */
     /* when adding new types, update type_names[] in encode.c */
+    TYPE_BEYOND_LAST_ENUM,
 };
 
 #define MODRM_BYTE(mod, reg, rm) ((byte) (((mod) << 6) | ((reg) << 3) | (rm)))
diff --git a/core/arch/x86/encode.c b/core/arch/x86/encode.c
index e409e1b..fe85a18 100644
--- a/core/arch/x86/encode.c
+++ b/core/arch/x86/encode.c
@@ -172,10 +172,14 @@
 
 #ifdef DEBUG
 void
-reg_check_reg_fixer(void)
+encode_debug_checks(void)
 {
     CLIENT_ASSERT(sizeof(dr_reg_fixer)/sizeof(dr_reg_fixer[0]) == REG_LAST_ENUM + 1,
                   "internal register enum error");
+    CLIENT_ASSERT(sizeof(reg_names)/sizeof(reg_names[0]) == REG_LAST_ENUM + 1,
+                  "reg_names missing an entry");
+    CLIENT_ASSERT(sizeof(type_names)/sizeof(type_names[0]) == TYPE_BEYOND_LAST_ENUM,
+                  "type_names missing an entry");
 }
 #endif