/*
 * libOp
 *
 * Copyright (C) 2000 Patrick Alken
 * This library comes with absolutely NO WARRANTY
 *
 * Should you choose to use and/or modify this source code, please
 * do so under the terms of the GNU General Public License under which
 * this program is distributed.
 *
 * $Id: args-x86.c,v 1.6 2002/01/01 03:47:31 cosine Exp $
 */

#include <stdio.h>
#include <assert.h>

#include "disasm-x86.h"
#include "modsib-x86.h"
#include "prefix-x86.h"

/*
 * libString includes
 */
#include "Strn.h"

static int GetRegisterIndex(struct OpCode *ptr, int operand,
                            int digit, int byte);
static unsigned long GetImmediateValue(long flags,
                                       unsigned char **data, int *err);
static int GetSegmentRegister(int modrm);
static int OperandRegister(unsigned char **data, char *str, int *len,
                           struct OpCode *match, unsigned int operand,
                           int opnum);
static void OperandRegmemMemory(unsigned char **data, char *str,
                               int *len, struct OpCode *match,
                               unsigned int operands);
static int OperandRelative(unsigned char **data, char *str, int *len,
                           struct OpCode *match, unsigned int operand);

/*
 * These array indices are arranged in the order as specified
 * in the Intel Instruction Set Reference where each
 * segment register has a corresponding numerical value (in this
 * case the array index number).
 */
static char *SegmentRegisters[] = {
  "es",
  "cs",
  "ss",
  "ds",
  "fs",
  "gs",
  "?s",
  "?s"
};

/*
 * List of all intel registers
 */
static char *Registers[] = {
  "ah",
  "al",
  "ax",
  "bh",
  "bl",
  "bp",
  "bx",
  "ch",
  "cl",
  "cr0",
  "cr2",
  "cr3",
  "cr4",
  "cs",
  "cx",
  "dh",
  "di",
  "dl",
  "dr0",
  "dr1",
  "dr2",
  "dr3",
  "dr4",
  "dr5",
  "dr6",
  "dr7",
  "ds",
  "dx",
  "eax",
  "ebp",
  "ebx",
  "ecx",
  "edi",
  "edx",
  "es",
  "esi",
  "esp",
  "fs",
  "gs",
  "mm0",
  "mm1",
  "mm2",
  "mm3",
  "mm4",
  "mm5",
  "mm6",
  "mm7",
  "si",
  "sp",
  "ss",
  "st(0)",
  "st(1)",
  "st(2)",
  "st(3)",
  "st(4)",
  "st(5)",
  "st(6)",
  "st(7)",
  "tr3",
  "tr4",
  "tr5",
  "tr6",
  "tr7",
  "xmm0",
  "xmm1",
  "xmm2",
  "xmm3",
  "xmm4",
  "xmm5",
  "xmm6",
  "xmm7"
};

/*
ConstructArguments()
  Construct the correct arguments for the opcode 'match'

Inputs: data   - data buffer
        outstr - string to store results in
        match  - OpCode structure containing matching opcode

Return: 1 upon success
        0 upon failure

Side effects: On success, 'outstr' will be modified to contain
              the instruction name and it's arguments

              On failure, 'outstr' will be modified to contain
              the error message
*/

int
ConstructArguments(unsigned char **data, char *outstr,
                   struct OpCode *match)

{
  int olen;            /* number of bytes written to outstr */
  int ii;              /* looping */
  int colon;           /* does operand contain a colon? */
  char colonbuf[50];

  assert(data && *data && outstr && match);

  olen = 0;
  colon = 0;
  *colonbuf = '\0';

  /*
   * If the opcode has a rep prefix, insert "rep"
   */
  if (PrefixFlags & PX_REP)
    olen += Sprintf(outstr + olen, "rep ");
  else if (PrefixFlags & PX_REPE)
    olen += Sprintf(outstr + olen, "repe ");
  else if (PrefixFlags & PX_REPNE)
    olen += Sprintf(outstr + olen, "repne ");

  /*
   * Store instruction name into outstr
   */
  olen += Sprintf(outstr + olen, "%s", InstructionNames[match->name]);

  for (ii = 0; ii < match->OperandCount; ++ii)
  {
    if (colon)
    {
    #if 0 /* FIXME */
      assert(*colonbuf != '\0');

      outstr[olen++] = ':';
      olen += Sprintf(outstr + olen, "%s", colonbuf);
      *colonbuf = '\0';
    #endif
    }
    else if (ii == 0)
      outstr[olen++] = ' ';
    else
    {
      outstr[olen++] = ',';
      outstr[olen++] = ' ';
      outstr[olen] = '\0';
    }

    if (match->operands[ii] & NEAR)
      olen += Sprintf(outstr + olen, "near ");
    else if (match->operands[ii] & FAR)
      olen += Sprintf(outstr + olen, "far ");
    else if (match->operands[ii] & SHORT)
      olen += Sprintf(outstr + olen, "short ");

    if (match->operands[ii] & COLON)
      colon = 1;
    else
      colon = 0;

    if (match->operands[ii] & (REGISTER | REG_MMX | REG_XMM))
    {
      int opret = OperandRegister(data, outstr, &olen, match,
                                  match->operands[ii], ii);
      if (opret == (-1))
        return (0);
    } /* if (match->operands[ii] & (REGISTER | REG_MMX | REG_XMM)) */
    else if (match->operands[ii] & IMMEDIATE)
    {
      unsigned long value;
      int err = 0;

      value = GetImmediateValue(match->operands[ii], data, &err);
      if (err)
      {
        Sprintf(outstr,
          "GetImmediateValue() failed for instruction: %s",
          InstructionNames[match->name]);
        return (0);
      }

      /*
       * My Sprintf() doesn't do %X :-)
       */
      if (colon)
      {
        /*
         * If this opcode uses a colon, it indicates an
         * address, so the operand orderings are reversed -
         * save this value for later.
         */
        sprintf(colonbuf, "0x%lx", value);
      }
      else
      {
        olen += sprintf(outstr + olen, "0x%lx", value);
      }
    } /* if (match->operands[ii] & IMMEDIATE) */
    else if (match->operands[ii] & (REGMEM | MEMORY))
    {
      OperandRegmemMemory(data, outstr, &olen, match,
                          match->operands[ii]);
    } /* if (match->operands[ii] & (REGMEM | MEMORY)) */
    else if (match->operands[ii] & RELATIVE)
    {
      int opret = OperandRelative(data, outstr, &olen, match,
                                  match->operands[ii]);
      if (opret == (-1))
        return (0);
    } /* if (match->operands[ii] & RELATIVE) */
    else if (match->operands[ii] & REG_FPU)
    {
      /*
       * Floating point stack register - get the register
       * index of Registers[] and add it to our string.
       */

      assert(FPUCode >= 0);
      assert(FPUCode <= 7);

      olen += Sprintf(outstr + olen, "%s",
                      Registers[R_ST0 + FPUCode]);
    } /* if (match->operands[ii] & REG_FPU) */
    else if (match->operands[ii] & REG_SR)
    {
      int sindex;

      if (match->digit == REGRM)
      {
        assert(ModRMOffset != 0);

        if (*data == ModRMOffset)
          ++*data;
        if (*data == SibOffset)
          ++*data;
      }

      sindex = GetSegmentRegister((unsigned char) *ModRMOffset);
      olen += Sprintf(outstr + olen, "%s", SegmentRegisters[sindex]);
    } /* else if (match->operands[ii] & REG_SR) */
    else if (match->operands[ii] & MEMOFFS)
    {
      unsigned long value;
      int err = 0;

      /*
       * For arguments of moffs8, moffs16, and moffs32, we
       * always use a memory offset value of size 16 or 32 bits,
       * depending on the instruction size attributes
       */
      if (PrefixFlags & PX_32BITADDR)
        value = GetImmediateValue(BITS32, data, &err);
      else
        value = GetImmediateValue(BITS16, data, &err);

      assert(err == 0);

      if (PrefixFlags & PX_SEGOVER)
      {
        assert(SegmentOverride != (-1));
        olen += Sprintf(outstr + olen, "%s:",
                        Registers[SegmentOverride]);
      }

      olen += sprintf(outstr + olen, "[+0x%lx]", value);
    } /* else if (match->operands[ii] & MEMOFFS) */
    else if (match->operands[ii] & CONSTANT)
    {
      /*
       * This operand is a numerical constant. The constant's
       * value will be stored in opinfo[ii]
       */
      assert(match->opinfo[ii] != NOOPARG);
      olen += Sprintf(outstr + olen, "%d", match->opinfo[ii]);
    } /* else if (match->operands[ii] & CONSTANT) */
    else if (match->operands[ii] & SEG16)
    {
      unsigned long segment,
                    offset;
      int err = 0;

      /*
       * This operand is ptr16:16, ptr16:32, m16:16, or m16:32. This
       * means we need an expression of the form "segment:offset" where
       * the size of segment is the number of bits on the left of
       * the colon, and the offset is the number of bits on the right.
       */
      if (match->operands[ii] & OFF16)
        offset = GetImmediateValue(BITS16, data, &err);
      else if (match->operands[ii] & OFF32)
        offset = GetImmediateValue(BITS32, data, &err);
      else
      {
        /*
         * We should *never* get here
         */
        assert(0);
        offset = 0;
      }

      assert(err == 0);

      segment = GetImmediateValue(BITS16, data, &err);
      assert(err == 0);

      olen += sprintf(outstr + olen,
                      "0x%lx:+0x%lx",
                      segment,
                      offset);
    } /* else if (match->operands[ii] & SEG16) */
  } /* for (ii = 0; ii < match->OperandCount; ++ii) */

  return 1;
} /* ConstructArguments() */

/*
GetRegisterIndex()
  Determine index of Registers[] corresponding to the given flags.

Input: ptr     - pointer to OpCode we are working on
       operand - number of operand we are working on
                 (first, second, etc)
       digit   - If this is set to REGRM, we must look up the
                 register in the ModR/M table. If it is set to
                 REGCODE, we use a set of pre-defined rules.
       byte    - only used if digit is REGRM or REGCODE

Return: Index of Registers[] corresponding to this register. If no
        register is matched, returns -1
*/

static int
GetRegisterIndex(struct OpCode *ptr, int operand,
                 int digit, int byte)

{
  unsigned int flags;

  assert(ptr != 0);

  flags = ptr->operands[operand];

  /*
   * First check if it is a specific register (R_AL etc)
   */
  if ((flags & REGISTER) && (ptr->opinfo[operand] != NOOPARG))
    return (ptr->opinfo[operand]);

  if ((digit >= 0) && (digit <= REGRM))
  {
    unsigned char reg;

    assert(byte != (-1));

    /*
     * This opcode is defined with /r or /digit as well as a
     * register operand, so we must look up 'byte' in our
     * ModRMTable[] to see which digit column the byte value
     * falls into. Once we know the correct column, we can then
     * determine which register to return. See Intel Architecture
     * Software Developer's Manual Vol. 2, table 2-1.
     */

    reg = ((unsigned char) byte >> 3) & 0x07;
    return (FindModRegister(reg, flags));
  } /* if (digit == REGRM) */
  else if (digit == REGCODE)
  {
    assert(byte != (-1));

    /*
     * These values were plucked directly from Table 3.1 in
     * the Intel instruction set reference.
     */
    switch (byte)
    {
      case 0:
      {
        if (flags & BITS8)
          return (R_AL);
        else if (flags & BITS16)
          return (R_AX);
        else if (flags & BITS32)
          return (R_EAX);

        break;
      }

      case 1:
      {
        if (flags & BITS8)
          return (R_CL);
        else if (flags & BITS16)
          return (R_CX);
        else if (flags & BITS32)
          return (R_ECX);

        break;
      }

      case 2:
      {
        if (flags & BITS8)
          return (R_DL);
        else if (flags & BITS16)
          return (R_DX);
        else if (flags & BITS32)
          return (R_EDX);

        break;
      }

      case 3:
      {
        if (flags & BITS8)
          return (R_BL);
        else if (flags & BITS16)
          return (R_BX);
        else if (flags & BITS32)
          return (R_EBX);

        break;
      }

      case 4:
      {
        if (flags & BITS8)
          return (R_AH);
        else if (flags & BITS16)
          return (R_SP);
        else if (flags & BITS32)
          return (R_ESP);

        break;
      }

      case 5:
      {
        if (flags & BITS8)
          return (R_CH);
        else if (flags & BITS16)
          return (R_BP);
        else if (flags & BITS32)
          return (R_EBP);

        break;
      }

      case 6:
      {
        if (flags & BITS8)
          return (R_DH);
        else if (flags & BITS16)
          return (R_SI);
        else if (flags & BITS32)
          return (R_ESI);

        break;
      }

      case 7:
      {
        if (flags & BITS8)
          return (R_BH);
        else if (flags & BITS16)
          return (R_DI);
        else if (flags & BITS32)
          return (R_EDI);

        break;
      }
    } /* switch (byte) */

    /*
     * We should *never* get here
     */
    return (-1);
  } /* if (digit == REGCODE) */

  return (-1);
} /* GetRegisterIndex() */

/*
GetImmediateValue()
 Called when an operand has the IMMEDIATE bit set - obtain the
immediate byte value from 'data'

Inputs: flags - bitmask variable containing size of immediate
                byte value
        data  - contains actual value
        err   - set to 1 if error occurs

Return: value of the immediate byte(s)
*/

static unsigned long
GetImmediateValue(long flags, unsigned char **data, int *err)

{
  unsigned long ret = 0;
  int length = 0;

  /*
   * Thank god for little endian :-)
   */

  if (flags & BITS8)
  {
    ret = (unsigned char) (*data)[length++];
    ++(*data);
  }
  else if (flags & BITS16)
  {
    ret = (unsigned char) (*data)[length++];
    ret += (unsigned char) (*data)[length++] * 256;
    (*data) += 2;
  }
  else if (flags & BITS32)
  {
    ret = (unsigned char) (*data)[length++];
    ret += (unsigned char) (*data)[length++] * 256;
    ret += (unsigned char) (*data)[length++] * 65536;
    ret += (unsigned char) (*data)[length++] * 16777216;
    (*data) += 4;
  }
  else
    *err = 1;

  return (ret);
} /* GetImmediateValue() */

/*
GetSegmentRegister()
  Determine the index in SegmentRegisters[] which matches
the modrm byte.
*/

static int
GetSegmentRegister(int modrm)

{
  /*
   * According to the Intel Instruction Set Reference, the
   * segment register values corresponding to the "Sreg"
   * argument of an opcode are the following:
   *
   * ES = 0
   * CS = 1
   * SS = 2
   * DS = 3
   * FS = 4
   * GS = 5
   *
   * In order to convert the modrm byte to a value between 0
   * and 7, we will AND it with 00111000 (0x38) and right
   * shift by 3. The indices in SegmentRegisters[] are already
   * set up to handle this calculation scheme.
   */

  return ((modrm & 0x38) >> 3);
} /* GetSegmentRegister() */

/*
OperandRegister()
  Handles register operands

Inputs: data     - opcode data string
        str      - output string
        len      - output string length
        match    - opcode match
        operand  - match's operand
*/

static int
OperandRegister(unsigned char **data, char *str, int *len,
                struct OpCode *match, unsigned int operand, int opnum)

{
  int regindex;
  int byteval;

  assert(*data && str && len && match);

  if ((match->digit >= 0) && (match->digit <= REGRM))
  {
    assert (ModRMOffset != 0);
    byteval = (unsigned char) *ModRMOffset;

    if (*data == ModRMOffset)
      ++*data;
    if (*data == SibOffset)
      ++*data;
  }
  else if (match->digit == REGCODE)
  {
    assert (RegisterCode != (-1));
    byteval = RegisterCode;
  }
  else
    byteval = (-1);

  regindex = GetRegisterIndex(match,
                              opnum,
                              match->digit,
                              byteval);

  if (regindex >= 0)
  {
    *len += Sprintf(str + *len, "%s", Registers[regindex]);
  }
  else
  {
    /*
     * Invalid register - should never happen
     */
    Sprintf(str,
      "Invalid register operand for instruction %s (%ld)",
      InstructionNames[match->name],
      operand);
    return (-1);
  }

  return (1);
} /* OperandRegister() */

/*
OperandRegmemMemory()
  Handles rm and mem operands

Inputs: data    - opcode data string
        str     - output string
        len     - length of output string
        match   - opcode match
        operand - match's operands

Return: none
*/

static void
OperandRegmemMemory(unsigned char **data, char *str, int *len,
                    struct OpCode *match, unsigned int operand)

{
  unsigned long value;
  unsigned int flags = 0;
  unsigned char mod,
                rm;
  int err;

  assert(*data && str && len && match);

  /*
   * We should already have the ModRMOffset and ModRMPtr from
   * FindOpcode(). If not, we are in trouble.
   */
  assert (match->digit != (-1));
  assert (ModRMPtr != 0);
  assert (ModRMOffset != 0);

  /*
   * If *data equals ModRMOffset, advance it by one, so we
   * don't try to use the modrm value for immediate values
   * etc.
   */
  if (*data == ModRMOffset)
    ++*data;
  if (*data == SibOffset)
    ++*data;

  if (operand & BITS8)
    *len += Sprintf(str + *len, "byte ");
  else if (operand & BITS16)
    *len += Sprintf(str + *len, "word ");
  else if (operand & BITS32)
    *len += Sprintf(str + *len, "dword ");
  else if (operand & BITS64)
    *len += Sprintf(str + *len, "qword ");
  else if (operand & BITS80)
    *len += Sprintf(str + *len, "tword "); /* FPU */

  mod = *ModRMOffset >> 6;
  rm = *ModRMOffset & 0x07;

  if (mod == 3)
  {
    int regidx;

    /*
     * Special case: ModR/M bytes C0 -> FF are ambiguous. That is,
     * their effective addresses can be one of five registers -
     * we can determine the correct register by looking at the
     * operand's size attribute. Also, we don't have to worry about
     * SIB bytes since any ModR/M bytes with a "mod" of 3 have
     * no SIBs.
     *
     */
    regidx = FindModRegister(rm, operand);

    if (regidx != (-1))
      *len += Sprintf(str + *len, "%s", Registers[regidx]);

    return;
  } /* if (mod == 3) */

  /*
   * It is a memory location - we need to calculate the effective
   * address
   */

  if (PrefixFlags & PX_SEGOVER)
  {
    assert(SegmentOverride != (-1));
    *len += Sprintf(str + *len, "%s:",
                    Registers[SegmentOverride]);
  }

  /*
   * It is a memory location, use []'s :)
   */
  str[(*len)++] = '[';

  if (SibPtr)
  {
    unsigned char scale,
                  base,
                  mod;

    if (SibPtr->index != M_NONE)
    {
      int scalef;

      /*
       * Insert the effective address into our string
       */
      *len += Sprintf(str + *len, "%s",
                      ModSibOffsets[SibPtr->index]);

      /*
       * Strip off the first two bits of the SIB byte which
       * will tell us our scaling factor
       */
      scale = *SibOffset >> 6;
      scalef = 1;
      while (scale--)
        scalef *= 2;

      if (scalef > 1)
        *len += Sprintf(str + *len, "*%d", scalef);
    }

    /*
     * Insert the base register into our string
     */
    base = *SibOffset & 0x07;
    mod = *ModRMOffset >> 6;

    if ((base == 5) && (mod == 0))
    {
      /*
       * This is the special case with [*] in the SIB table
       * (Table 2-3 in the Instruction Set Reference). It means
       * a 32-bit displacement with no base register.
       */
      err = 0;
      value = GetImmediateValue(BITS32, data, &err);
      if (err == 0)
        *len += sprintf(str + *len, "+0x%lx", value);
    }
    else
    {
      if (SibPtr->index != M_NONE)
        str[(*len)++] = '+';
      *len += Sprintf(str + *len, "%s",
                      SibBaseRegisters[base]);
    }
  } /* if (SibPtr) */
  else
  {
    if ((mod == 0) && (ModRMPtr->flags != 0))
    {
      /*
       * Special cases: we need to set up a special case for
       *                the disp16 and disp32 entries under
       *                tables 2-1 and 2-2, unfortunately.
       */
      flags = ModRMPtr->flags;
    }
    else
    {
      *len += Sprintf(str + *len, "%s",
                      ModSibOffsets[ModRMPtr->index]);
    }
  }

  /*
   * Check if there is an 8/16/32 byte displacement
   */
  if (!flags)
  {
    if (mod == 1)
      flags = BITS8;
    else if (mod == 2)
    {
      if (PrefixFlags & PX_32BITADDR)
        flags = BITS32;
      else
        flags = BITS16;
    }
    else
      flags = 0;
  }

  err = 0;
  value = GetImmediateValue(flags, data, &err);
  if (err == 0)
    *len += sprintf(str + *len, "+0x%lx", value);

  str[(*len)++] = ']';
  str[*len] = '\0';
} /* OperandRegmemMemory() */

/*
OperandRelative()
*/

static int
OperandRelative(unsigned char **data, char *str, int *len,
                struct OpCode *match, unsigned int operand)

{
  unsigned long value;
  int err;

  assert(*data && str && len && match);

  /*
   * Relative addresses are practically the same thing
   * as immediate bytes, but to make the code more clear,
   * we'll special case it - also, we'll insert a '+' before
   * the address to emphasize that it is a code offset.
   */
  err = 0;
  value = GetImmediateValue(operand, data, &err);
  if (err)
  {
    Sprintf(str,
      "GetImmediateValue() failed for instruction: %s",
      InstructionNames[match->name]);
    return (-1);
  }

  str[(*len)++] = '+';
  *len += sprintf(str + *len, "0x%lx", value);

  return (1);
} /* OperandRelative() */
