blob: d99d0a257d3492649afc24d89f26a4314602d1c1 [file] [log] [blame]
#!/usr/bin/perl
# **********************************************************
# Copyright (c) 2014-2015 Google, Inc. All rights reserved.
# **********************************************************
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of Google, Inc. nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
# Feed this the text from the ARM manual for the A32 instructions.
my $verbose = 0;
my $line = 0;
my $pred = 1; # Process predicated instrs, or non-pred?
my $simd = 0; # Assume SIMD?
my $t32 = 0; # Look for T32 instrs
while ($#ARGV >= 0) {
if ($ARGV[0] eq '-nopred') {
$pred = 0;
} elsif ($ARGV[0] eq '-v') {
$verbose++;
} elsif ($ARGV[0] eq '-simd') {
$simd = 1;
} elsif ($ARGV[0] eq '-t32') {
$t32 = 1;
} else {
die "Unknown argument $ARGV[0]\n";
}
shift;
}
while (<>) {
$line++;
chomp;
chomp if (/\r$/); # DOS
print "xxx $line $_\n" if ($verbose > 1);
startover:
if ((!$t32 && (/^Encoding A/ || /^Encoding ..\/A/)) ||
($t32 && (/^Encoding T/))) {
my $name;
my $asm;
while (<>) {
$line++;
chomp;
chomp if (/\r$/); # DOS
if (/^ARMv/) {
$flags .= "|v8" if (/^ARMv8/);
last;
} elsif (/^[A-Z].*<.*<.*>$/) {
# Sometimes the encoding is after the name in the .text version
goto at_name;
}
goto startover if (/^Encoding /); # some descriptions have Encoding A...
}
while (<>) {
$line++;
chomp;
chomp if (/\r$/); # DOS
next if (/^ARMv/);
next if ($_ !~ /^[A-Z][A-Z]/ && $_ !~ /^[A-Z]</);
last;
}
last if eof();
at_name:
if (/^(\w+)/) {
$name = $1;
$asm = $_;
} else {
print "unexpected asm on line $line: $_\n";
}
print "found $name: $asm\n" if ($verbose);
my $last = "";
while (<>) {
$line++;
chomp;
chomp if (/\r$/); # DOS
my $prefix = '';
if ($t32) {
if ($last =~ /^15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 15/) {
$prefix = "1 1 1 .";
}
} elsif (!$pred) {
if ($last =~ /^31 30 29/ && /^1 1 1 1/) {
$prefix = "1 1 1 1";
}
} elsif (/^cond/) {
$prefix = "cond";
}
if ($prefix ne '') {
# We encode the "x x x P U {D,R} W S" specifiers either into
# our opcodes or we have multiple entries with encoding chains.
my $enc = $_;
if (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){8})(.*)/) {
my $opc = $1;
my $rest = $3;
print "matched $name $enc\n" if ($verbose);
# Ignore parens: go w/ value inside.
$opc =~ s/\(//g;
$opc =~ s/\)//g;
generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
} elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){6})(.*)/) {
my $opc = $1 . "0 0";
my $rest = $3;
print "matched $name $enc\n" if ($verbose);
# Ignore parens: go w/ value inside.
$opc =~ s/\(//g;
$opc =~ s/\)//g;
generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
} elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){4})(.*)/) {
my $opc = $1 . "0 0 0 0";
my $rest = $3;
print "matched $name $enc\n" if ($verbose);
# Ignore parens: go w/ value inside.
$opc =~ s/\(//g;
$opc =~ s/\)//g;
generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
} elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){2})(.*)/) {
my $opc = $1 . "0 0 0 0 0 0";
my $rest = $3;
print "matched $name $enc\n" if ($verbose);
# Ignore parens: go w/ value inside.
$opc =~ s/\(//g;
$opc =~ s/\)//g;
generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
} else {
print "no match for $name: $_\n";
}
last;
}
goto startover if (/^Encoding /);
$last = $_;
}
}
}
sub generate_entry($,$,$,$,$,$)
{
my ($name, $asm, $enc, $opc, $rest, $PUW) = @_;
my $eflags = "x";
my $other_opc;
my $other_enc;
my $other_rest;
my $negative = 0;
my $hexopc = 0;
# Ensure we've got all the bits, and fill in opcode for lower bits
my %bitlen = (
'(0)' => 1, '(1)' => 1, '(S)' => 1,
'J1' => 1, 'J2' => 1,
'Rn' => '4', 'Rd' => 4, 'Rt' => 4, 'Rt2' => 4, 'Rs' => 4, 'Rm' => 4, 'Ra' => 4,
'RdHi' => 4, 'RdLo' => 4,
'CRd' => 4, 'CRn' => 4, 'CRm' => 4,
'Vn' => '4', 'Vd' => 4, 'Vt' => 4, 'Vm' => 4, 'Va' => 4,
'imm2' => 2, 'imm3' => 3, 'imm4' => 4, 'imm5' => 5, 'imm6' => 6, 'imm8' => 8,
'imm10' => 10, 'imm11' => 11, 'imm12' => 12, 'imm24' => 24,
'imm10H' => 10, 'imm10L' => 10, 'imm4H' => 4, 'imm4L' => 4,
'sat_imm4' => 4, 'sat_imm5' => 5,
'type' => 2, # shift type
'type_vld' => 4, # OP_vld1
'cond' => 4,
'option' => 4,
'msb' => 5, 'lsb' => 5,
'coproc' => 4, 'opc1' => 4, 'opc2' => 3, # OP_cdp
'opc1_mcr' => 3, # OP_mcr
'opc1_vmov' => 2, 'opc2_vmov' => 2, # OP_vmov
'opt' => 2, # OP_dcps
'register_list_t32' => 13, # for T32
'register_list' => 16, # for A32
'register_list_priv' => 15, # for A32 priv ldm
'mask' => 2, # OP_msr
'mask_priv' => 4, # OP_msr priv
'tb' => 1, # OP_pkh
'widthm1' => 5, # OP_sbfx
'sh' => 1, # OP_ssat
'rotate' => 2, # OP_sxtab
'imod' => 2, 'mode' => 5, # OP_cps
'M1' => 4, # OP_mrs
'reg' => 4, # OP_vmrs
'opcode' => 4, # OP_subs pc
'sz_crc32' => 2, # OP_crc32
'sz' => 1, # OP_vabs
# SIMD
'size' => 2, 'size=8' => 2, 'size=16' => 2, 'size=32' => 2, 'size=64' => 2,
'size=s8' => 2, 'size=s16' => 2, 'size=s32' => 2, 'size=s64' => 2,
'size=u8' => 2, 'size=u16' => 2, 'size=u32' => 2, 'size=u64' => 2,
'size=i8' => 2, 'size=i16' => 2, 'size=i32' => 2, 'size=i64' => 2,
'sz=0' => 1, 'sz=1' => 1, 'cmode' => 4,
'op' => 1, # OP_vacge
'op_2b' => 2, # OP_vbif, OP_vcvt, OP_vqmov, OP_vrev
'op_3b' => 3, # OP_vrint
'sf' => 1, 'sx' => 1, 'RM' => 2, # OP_vcvt
'align' => 2, 'index_align' => 4,
'cc' => 2, # OP_vsel
'len' => 2, # OP_vtbl
);
my @encbits = split(' ', $enc);
my $totlen = 0;
for (my $i = 0; $i <= $#encbits; $i++) {
my $token = $encbits[$i];
$token =~ s/register_list/register_list_t32/ if ($t32);
$token =~ s/register_list/register_list_priv/
if ($name eq 'ldm' && $asm =~ /amode/);
$token =~ s/opc1/opc1_mcr/ if ($name eq 'mcr' || $name eq 'mcr2' ||
$name eq 'mrc' || $name eq 'mrc2');
$token =~ s/mask/mask_priv/ if ($name eq 'msr' && $enc =~ / R /);
$token =~ s/\bsz\b/sz_crc32/ if ($name eq 'crc32');
$token =~ s/\bop\b/op_2b/
if (($name eq 'v' && $enc =~ /D op V/) ||
($name =~ /^vcvt/ && $enc =~ /1 op Q/) ||
($name =~ /^vqmov/) ||
($name =~ /^vrev/));
$token =~ s/\bop\b/op_3b/ if ($name =~ /^vrint/ && $enc =~ /1 op Q/);
$token =~ s/\btype\b/type_vld/ if ($name =~ /^vld/ || $name =~ /^vst/);
$token =~ s/\b(opc\d)\b/\1_vmov/ if ($name =~ /^vmov/);
my $len = 0;
if (length($token) == 1) {
$len = 1;
} elsif (defined($bitlen{$token})) {
$len = $bitlen{$token};
my $unmod = $encbits[$i];
if ($unmod eq 'type') {
$rest =~ s/\btype\b/sh2/;
} elsif ($unmod !~ /^R/ && $unmod !~ /^CR/ && $unmod !~ /^\(/) {
my $pos = 32 - $totlen - $len;
my $repl = $len . "_" . $pos;
$rest =~ s/\b$unmod\b/imm$repl/;
}
} else {
die "Unknown length for $name: \"$token\" ($enc)\n";
}
$totlen += $len;
if ($token eq '1' || $token eq '(1)') {
$hexopc |= 1 << (32 - $totlen);
}
}
die "Missing chars (have $totlen) for $name $asm: $enc\n" unless ($totlen == 32);
# Handle "x x x P U {D,R} W S" by expanding the chars
my @bits = split(' ', $opc);
for (my $i = 0; $i <= $#bits; $i++) {
if ($bits[$i] eq 'S') {
$other_opc = $opc;
$other_opc =~ s/S/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$name .= "s";
$bits[$i] = '1';
$eflags = "fWNZCV";
} elsif ($bits[$i] eq 'P') {
$PUW = 1;
$other_opc = $opc;
$other_opc =~ s/P/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$opc =~ s/P/1/;
$bits[$i] = '1';
} elsif ($bits[$i] eq 'U') {
$PUW = 1;
$other_opc = $opc;
$other_opc =~ s/U/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$opc =~ s/U/1/;
$bits[$i] = '1';
$negative = 1;
} elsif ($bits[$i] eq 'W') {
$PUW = 1;
$other_opc = $opc;
$other_opc =~ s/W/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$bits[$i] = '1';
$opc =~ s/W/1/;
} elsif ($bits[$i] eq 'D' || $bits[$i] eq 'R' || $bits[$i] eq 'i') {
$other_opc = $opc;
$other_opc =~ s/$bits[$i]/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$opc =~ s/$bits[$i]/1/;
$bits[$i] = '1';
} elsif ($bits[$i] eq 'Q') {
$other_opc = $opc;
$other_opc =~ s/Q/0/;
generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
$bits[$i] = '1';
$opc =~ s/Q/1/;
$rest =~ s/ V/ VQ/g;
}
if ($bits[$i] eq '1' || $bits[$i] eq '0') {
$hexopc |= $bits[$i] << (27 - $i);
} else {
die "invalid code $bits[$i]\n";
}
}
# Floating-point precision bit: bit 8 == "sz"
if ($simd && $enc =~ / sz /) {
$other_name = $name . ".f32";
$other_enc = $enc;
$other_enc =~ s/ sz / sz=0 /;
generate_entry($other_name, $asm, $other_enc, $opc, $rest, $PUW);
$name .= ".f64";
$enc =~ s/ sz / sz=1 /;
$hexopc |= 0x100;
}
# For SIMD, Q bit is down low
if ($simd && $rest =~ / Q /) {
$other_rest = $rest;
$other_rest =~ s/Q //;
$other_rest =~ s/Vn/VAq/;
$other_rest =~ s/Vd/VBq/;
$other_rest =~ s/Vm/VCq/;
generate_entry($name, $asm, $enc, $opc, $other_rest, $PUW);
$rest =~ s/Q //;
$rest =~ s/Vn/VAdq/;
$rest =~ s/Vd/VBdq/;
$rest =~ s/Vm/VCdq/;
$hexopc |= 0x40;
}
# Data type: "<dt>" or "<size>"
if ($simd && $enc =~ / size /) {
# We bail on the precise hex encoding: we just try to pre-generate
# entries that can be manually tweaked
my @subtypes;
if ($asm =~ /.<dt>/) {
if ($enc =~ / U /) {
@subtypes = ('s8', 's16', 's32', 'u8', 'u16', 'u32');
} else {
@subtypes = ('i8', 'i16', 'i32', 'i64');
}
} else {
@subtypes = ('8', '16', '32', '64');
}
$rest =~ s/size\s*//;
foreach my $sub (@subtypes) {
$other_name = $name . "." . $sub;
$other_enc = $enc;
$other_enc =~ s/ size / size=$sub /;
generate_entry($other_name, $asm, $other_enc, $opc, $rest, $PUW);
}
}
if ($t32) {
my @topbits = split(' ', $enc);
for (my $i = 0; $i < 4; $i++) {
if ($topbits[$i] eq '1' || $topbits[$i] eq '0') {
$hexopc |= $topbits[$i] << (31 - $i);
}
}
} elsif (!$pred) {
$hexopc |= 0xf0000000;
}
$opname = $name;
$opname =~ s/\./_/g;
$opname .= ",";
$name .= "\",";
printf " {OP_%-8s 0x%08x, \"%-8s ", $opname, $hexopc, $name;
# Clean up extra spaces, parens, digits
$enc =~ s/\s\s+/ /g;
$rest =~ s/\s\s+/ /g;
$rest =~ s/\(//g;
$rest =~ s/\)//g;
$rest =~ s/\s\d+\s/ /g;
# Put Rd or Rt first, as dst
$rest =~ s/(.*) (R[dt])/\2 \1/;
# Put shift last, in disasm order
$rest =~ s/imm5 type (.*)/\1 type imm5/;
$rest =~ s/Rs type (.*)/\1 type Rs/;
# Rn is (usually) before Rm
$rest =~ s/Rm (.*) Rn/Rn \1 Rm/;
# Names of types
$rest =~ s/imm(\d+)/i\1/g;
$rest =~ s/type/sh2/g;
$rest =~ s/Rm/-Rm/ if ($negative);
# Get the 2nd empty dest in there for SIMD with Q.
# XXX: do the same for the others!
$rest =~ s/(VA\w+) (VB\w+)/\2 xx \1/;
my @opnds = split(' ', $rest);
my $opcnt = 0;
for (my $i = 0; $i <= $#opnds; $i++) {
if ($opnds[$i] ne '0' && $opnds[$i] ne '1' &&
(!$simd || ($opnds[$i] ne 'sz' && $opnds[$i] ne 'N' &&
$opnds[$i] ne 'M' && $opnds[$i] ne 'F'))) {
if ($opcnt == 0 && $opnds[$i] !~ /Rd$/) {
print "xx, xx, ";
$opcnt += 2;
}
# Convert to the new types
my $toprint = $opnds[$i];
$toprint =~ s/Rn/RAw/;
# XXX: convert these based on bit positions up above -- but keep dst
# vs src info too
if ($t32) {
$toprint =~ s/Rd/RCw/;
$toprint =~ s/Rt/RCw/;
die "No Rs in T32!\n" if ($toprint =~ /Rs/);
} else {
$toprint =~ s/Rd/RBw/;
$toprint =~ s/Rt/RBw/;
$toprint =~ s/Rs/RCw/;
}
$toprint =~ s/Rm/RDw/;
print "$toprint, ";
$opcnt++;
if ($opcnt == 1) {
print "xx, ";
$opcnt++;
}
}
}
for (my $i = $opcnt; $i < 5; $i++) {
print "xx, ";
}
print (($pred && !$t32) ? "pred" : "no");
print ", $eflags, END_LIST},";
if ($PUW) {
$PUW_str = $bits[3] . $bits[4] . $bits[6];
print "/*PUW=$PUW_str*/";
}
print "/* ($asm) */ /* <$enc> */\n";
}