#!/usr/bin/perl # # optab.pl # Generate source/include files from input file optab-x86.dat # # Note: this script sets up the assembler/disassembler files # in similar manner to NASM. However, because of NASM's # restricted licensing, I was forced to "reinvent the wheel" # so to speak. As a result, the overall idea of how this # works is similar to NASM, but the actual implementation # is different. # # $Id: optab.pl,v 1.1.1.1 2004/04/26 00:40:21 pa33 Exp $ $InputFile = "optab-x86.dat"; $SourceOutputFileD = "dtab-x86.c"; $SourceOutputFileA = "atab-x86.c"; $IncludeOutputFileN = "inames-x86.h"; $IncludeOutputFile = "optab-x86.h"; open (INFILE, $InputFile) || die "Unable to open $InputFile"; # Current line number $linenum = 0; # Offset in our opcode array $offset = 0; while () { $linenum++; # skip comments next if /^\s*#/; # Remove newlines chomp; # Split line into substrings split; # skip blank lines next if $#_ == -1; # Make sure there are at least three fields in the line if ($#_ < 2) { warn "Line $linenum contains less than three fields\n"; next; } # Format the line - convert intel syntax to our bitmask names # etc ($FormattedLine) = &format(@_); if ($FormattedLine) { ## print "format = $FormattedLine\n"; ## print "0 = $_[0], 1 = $_[1], 2 = $_[2] 3 = $_[3]\n"; $CodeName = "Code_$_[0]"; push @$CodeName, $FormattedLine; # Our opcodes are stored as two digits separated by # commas (XX,YY,ZZ), so extract the first byte of the # opcode, and store it in our Op array $ii = substr($_[2], 0, 2); $CodeName = "Op_$ii"; push @$CodeName, $offset; } $oparray{$_[0]}++; $offset++; } close INFILE; @opcodes = sort keys(%oparray); # Generate assembly table file print STDERR "Generating ../source/$SourceOutputFileA\n"; open (OUTFILE, ">../source/$SourceOutputFileA") || die "Unable to open ../source/$SourceOutputFileA"; print OUTFILE "/*\n"; print OUTFILE " * This file was generated by optab.pl - do not edit.\n"; print OUTFILE " */\n\n"; print OUTFILE "#include \"common-x86.h\"\n"; print OUTFILE "#include \"operands-x86.h\"\n"; print OUTFILE "#include \"optab-x86.h\"\n"; print OUTFILE "#include \"regs-x86.h\"\n"; print OUTFILE "\n"; foreach $ii (@opcodes) { print OUTFILE "static struct x86OpCode Instruction_${ii}[] = {\n"; $CodeName = "Code_${ii}"; foreach $jj (@$CodeName) { print OUTFILE " $jj\n"; } print OUTFILE " { -1, -1, { -1, -1, -1 }, 0, -1, -1, { -1, -1, -1 } }\n};\n\n"; } print OUTFILE "struct x86OpCode *x86Instructions[] = {"; $first = 1; foreach $ii (@opcodes) { if (!$first) { print OUTFILE ","; } else { $first = 0; } print OUTFILE "\n Instruction_${ii}"; } print OUTFILE "\n};\n"; close OUTFILE; # Generate disassembly table file print STDERR "Generating ../source/$SourceOutputFileD\n"; open (OUTFILE, ">../source/$SourceOutputFileD") || die "Unable to open ../source/$SourceOutputFileD"; print OUTFILE "/*\n"; print OUTFILE " * This file was generated by optab.pl - do not edit.\n"; print OUTFILE " */\n\n"; print OUTFILE "#include \"common-x86.h\"\n"; print OUTFILE "#include \"operands-x86.h\"\n"; print OUTFILE "#include \"$IncludeOutputFile\"\n"; print OUTFILE "#include \"regs-x86.h\"\n"; print OUTFILE "\nstatic struct x86OpCode Instructions[] = {\n"; foreach $ii (@opcodes) { $CodeName = "Code_$ii"; foreach $jj (@$CodeName) { print OUTFILE " $jj\n"; } } print OUTFILE " { 0, 0, { 0, 0, 0 }, 0, 0, 0, { 0, 0, 0 } }\n"; print OUTFILE "};\n\n"; for ($c = 0; $c < 256; $c++) { $h = sprintf "%02X", $c; print OUTFILE "static struct x86OpCode *OpCode_${h}[] = {\n"; $CodeName = "Op_$h"; foreach $jj (@$CodeName) { print OUTFILE " Instructions + $jj,\n"; } print OUTFILE " 0\n};\n\n"; } print OUTFILE "struct x86OpCode **x86OpCodes[] = {\n"; for ($c = 0; $c < 256; $c++) { printf OUTFILE " OpCode_%02X,\n", $c; } print OUTFILE "};\n"; close OUTFILE; print STDERR "Generating ../include/$IncludeOutputFileN\n"; open (OUTFILE, ">../include/$IncludeOutputFileN") || die "Unable to open ../include/$IncludeOutputFileN"; print OUTFILE "/*\n"; print OUTFILE " * This file was generated by optab.pl - do not edit.\n"; print OUTFILE " */\n\n"; print OUTFILE "#ifndef INCLUDED_inames_x86_h\n"; print OUTFILE "#define INCLUDED_inames_x86_h\n\n"; print OUTFILE "static const char *x86InstructionNames[] = {"; $first = 1; foreach $ii (@opcodes) { if (!$first) { print OUTFILE ","; } else { $first = 0; } $lower = $ii; $lower =~ tr/A-Z/a-z/; #print OUTFILE " \"$lower\",\n"; print OUTFILE "\n \"$lower\""; } print OUTFILE "\n};\n"; print OUTFILE "\n#endif /* INCLUDED_inames_x86_h */\n"; close OUTFILE; print STDERR "Generating ../include/$IncludeOutputFile\n"; open (OUTFILE, ">../include/$IncludeOutputFile") || die "Unable to open ../include/$IncludeOutputFile"; print OUTFILE "/*\n"; print OUTFILE " * This file was generated by optab.pl - do not edit.\n"; print OUTFILE " */\n\n"; print OUTFILE "#ifndef INCLUDED_optab_x86_h\n"; print OUTFILE "#define INCLUDED_optab_x86_h\n\n"; print OUTFILE "enum\n{\n"; print OUTFILE join(",\n", map { " I_$_" } @opcodes), "\n"; print OUTFILE "};\n"; print OUTFILE "\n"; print OUTFILE "extern struct x86OpCode **x86OpCodes[];\n"; print OUTFILE "extern struct x86OpCode *x86Instructions[];\n"; #print OUTFILE "extern const char *x86InstructionNames[];\n"; print OUTFILE "\n#endif /* INCLUDED_optab_x86_h */\n"; close OUTFILE; # This function takes an array of strings as input, formats # them, and returns an array entry for our Instructions[] # array. # # Inputs: arg1 - instruction name # arg2 - list of operands separated by commas # arg3 - list of opcode bytes separated by commas # arg4 - (optional) additional info such as modrm values sub format { local ($inst, $operands, $codes, $modrm) = @_; local $opcnt = 0; local $oparg1 = "-1"; local $oparg2 = "-1"; local $oparg3 = "-1"; # format the operands $operands =~ s/fpureg/reg_fpu/g; $operands =~ s/imm(\d+)/imm|bits$1/g; $operands =~ s/imm/immediate/g; $operands =~ s/mem(\d+)/mem|bits$1/g; $operands =~ s/mem/memory/g; $operands =~ s/mmxreg/reg_mmx/g; $operands =~ s/moffs/memoffs/g; $operands =~ s/rel(\d+)/relative|bits$1/g; $operands =~ s/rm(\d+)/regmem|bits$1/g; $operands =~ s/segreg/reg_sr/g; $operands =~ s/xmmreg/reg_xmm/g; $operands =~ s/conreg/reg_control/g; $operands =~ s/debreg/reg_debug/g; $operands =~ s/ptr(\d+):(\d+)/seg$1|off$2/g; if ($operands !~ /_rm/) { $operands =~ s/rm/regmem/g; } # Add a BITS32 to OFF32 opcodes so they will get priority over # their 16 bit versions, if any $operands =~ s/off32/off32|bits32/g; # Handle rDD(xxx) situations ($op1, $op2, $op3) = split(/,/, $operands, 3); if ($op1 =~ s/r(\d+)\((\w+)\)/register|bits$1/g) { $oparg1 = "r_$2"; } if ($op1 =~ s/r\((\w+)\)/register/g) { $oparg1 = "r_$1"; } if ($op1 =~ s/const(\d+)/constant/g) { $oparg1 = $1; } if ($op1 =~ s/reg(\d+)_rm/reg$1/g) { $oparg1 = "MODFIELD_RM"; } if ($op2 =~ s/r(\d+)\((\w+)\)/register|bits$1/g) { $oparg2 = "r_$2"; } if ($op2 =~ s/r\((\w+)\)/register/g) { $oparg2 = "r_$1"; } if ($op2 =~ s/const(\d+)/constant/g) { $oparg2 = $1; } if ($op2 =~ s/reg(\d+)_rm/reg$1/g) { $oparg2 = "MODFIELD_RM"; } if ($op3 =~ s/r(\d+)\((\w+)\)/register|bits$1/g) { $oparg3 = "r_$2"; } if ($op3 =~ s/r\((\w+)\)/register/g) { $oparg3 = "r_$1"; } if ($op3 =~ s/const(\d+)/constant/g) { $oparg3 = $1; } if ($op3 =~ s/reg(\d+)_rm/reg$1/g) { $oparg3 = "MODFIELD_RM"; } if (length($op3) != 0) { $opcnt = 3; $finalop = "$op1, $op2, $op3"; } elsif (length($op2) != 0) { $opcnt = 2; $finalop = "$op1, $op2, 0"; } elsif (length($op1) != 0) { $opcnt = 1; $finalop = "$op1, 0, 0"; } else { $opcnt = 0; $finalop = "0, 0, 0"; } $finalop = '0, 0, 0', $opcnt = 0 if $operands eq 'void'; $finalop = 'BITS32, 0, 0', $opcnt = 0 if $operands eq 'void32'; $finalop =~ tr/a-z/A-Z/; # We must convert opargs to uppercase in case they hold # a register index (ie: r_al -> R_AL) $oparg1 =~ tr/a-z/A-Z/; $oparg2 =~ tr/a-z/A-Z/; $oparg3 =~ tr/a-z/A-Z/; # codes will come in looking something like: F1,F2,F3 # Replace the commas with a \x and stick a \x on the front # to get the first opcode (\xF1\xF2\xF3) $oplen = $codes =~ tr/\,/\,/; $oplen++; $codes =~ s/\,/\\x/g; $codes = "\\x" . $codes; # If there is no digit associated with the opcode, make the field # -1. If the digit is /r, make the field "REGRM", otherwise it's # numerical value. A value of "c" indicates a register code # that is added to the opcode. $modrm = -1 if $modrm eq ''; $modrm = 'REGRM' if $modrm eq 'r'; $modrm = 'REGCODE' if $modrm eq 'c'; $modrm = 'FPUCODE' if $modrm eq 'f'; ("{ I_$inst, $opcnt, { $finalop }, \"$codes\", $oplen, $modrm, { $oparg1, $oparg2, $oparg3 } },"); }