-- as.e -- Assembler, oh no, not another one! -- Pete Eberlein -- 13 Sep 2002 include machine.e -- there is a small problem with declaring registers this way; -- the values #BEEF90..#BEEF97 cannot be used as literal constants -- or within an address global constant eax=#BEEF90, ecx=eax+1, edx=eax+2, ebx=eax+3, esp=eax+4, ebp=eax+5, esi=eax+6, edi=eax+7 constant operands = { eax,eax*2,eax*4,eax*8, ecx,ecx*2,ecx*4,ecx*8, edx,edx*2,edx*4,edx*8, ebx,ebx*2,ebx*4,ebx*8, ebp,ebp*2,ebp*4,ebp*8, esi,esi*2,esi*4,esi*8, edi,edi*2,edi*4,edi*8, esp} function autosize(object word, object byte, integer i) if i < -128 or i > 127 then return word & int_to_bytes(i) else return byte & i end if end function function is_immediate (object o) if sequence(o) then return 0 end if return find(o, {eax,ecx,edx,ebx,esp,ebp,esi,edi}) = 0 end function function is_register (object o) return find(o, {eax,ecx,edx,ebx,esp,ebp,esi,edi}) != 0 end function function is_memory_address (object o) if atom(o) then return 0 end if if length(o) != 1 then return 0 end if return find(o[1], operands) = 0 end function function modrm(integer mod, object rm) -- rm can be {eax}, {eax,ebx}, {eax,ebx*4,5}, {ebx*4}, {ebx*4,5}, or {5} -- {reg,reg,n} means [reg+reg+n] (almost like GAS syntax vs NASM) integer reg1, reg2, scale, disp, e sequence result reg1 = 0 reg2 = 0 scale = 0 disp = 0 if atom(rm) then return {#C0 + mod*8 + and_bits(rm,7)} end if for i = 1 to length(rm) do e = find(rm[i], operands) if rm[i] = esp then if reg1 = 0 and reg2 = 0 then reg1 = esp reg2 = esp else puts(2, "modrm: Warning! esp must be alone in effective address\n") end if elsif e then if and_bits(e, 3) = 1 and reg1 = 0 then reg1 = rm[i] elsif reg2 = 0 then scale = and_bits(e-1,3) reg2 = rm[i] / power(2,scale) else puts(2, "modrm: Warning! Too many registers in effective address\n") end if else disp += rm[i] end if end for if reg2 then result = {mod*8 + 4, scale*#40 + and_bits(reg1,7) + 8*and_bits(reg2,7)} else result = {mod*8 + and_bits(reg1,7)} end if if disp = 0 and reg1 != ebp then elsif reg1 = 0 then result[length(result)] += 5 result &= int_to_bytes(disp) elsif disp <= 127 and disp >= -128 then result[1] += #40 result &= disp else result[1] += #80 result &= int_to_bytes(disp) end if return result end function global integer hex_preference hex_preference = 1 function smodrm(object rm) -- converts register/memory/immediate to a string sequence result, regs integer index, scale regs = {"eax","ecx","edx","ebx","esp","ebp","esi","edi"} if is_register(rm) then result = regs[1+rm-eax] elsif sequence(rm) then result = "[" for i = 1 to length(rm) do if rm[i] then if i > 1 then result &= "+" end if index = find(rm[i], operands) if index then scale = power(2,and_bits(index-1,3)) result &= regs[1-eax + rm[i] / scale] if scale > 1 then result &= sprintf("*%d",{scale}) end if elsif rm[i] < 10 or hex_preference = 0 then result &= sprintf("%d",{rm[i]}) else result &= sprintf("0x%x",{rm[i]}) end if end if end for result &= "]" elsif rm < 10 or hex_preference = 0 then result = sprintf("%d", {rm}) else result = sprintf("0x%x", {rm}) end if return result end function function multi_op(integer op, object dst, object src) -- ops0-7: add, or, adc, sbb, and, sub, xor, cmp if atom(dst) then if dst >= eax and dst <= edi then dst = and_bits(dst, 7) if dst = 0 and is_immediate(src) then -- mov eax, mem return (op*8+#05) & int_to_bytes(src) elsif is_immediate(src) then -- mov reg, immediate return autosize({#81,op*8+dst+#C0},{#83,op*8+dst+#C0},src) else -- mov reg, memory expression return (op*8+#03) & modrm(dst, src) end if else -- mov immediate, ... end if else if is_register(src) then -- mov memory expression, register return (op*8+#01) & modrm(and_bits(src,7), dst) elsif is_immediate(src) then -- mov memory expression, immediate return autosize(#81 & modrm(op, dst), #83 & modrm(op, dst), src) end if end if puts(1, "multi_op: Not a valid combination of operands\n") end function function add(object dst, object src) return multi_op(0, dst, src) end function function Or (object dst, object src) return multi_op(1, dst, src) end function function adc(object dst, object src) return multi_op(2, dst, src) end function function sbb(object dst, object src) return multi_op(3, dst, src) end function function And(object dst, object src) return multi_op(4, dst, src) end function function sub(object dst, object src) return multi_op(5, dst, src) end function function Xor(object dst, object src) return multi_op(6, dst, src) end function function cmp(object dst, object src) return multi_op(7, dst, src) end function function Not(object dst) return #F6 & modrm(2,dst) end function function neg(object dst) return #F6 & modrm(3,dst) end function function mul (object dst) return #F6 & modrm(4,dst) end function function imul(object dst) return #F6 & modrm(5,dst) end function function div (object dst) return #F6 & modrm(6,dst) end function function idiv(object dst) return #F6 & modrm(7,dst) end function function mov (object dst, object src) if atom(dst) then if dst >= eax and dst <= edi then dst = and_bits(dst, 7) if is_immediate(src) then -- mov reg, immediate return (#B8 + dst) & int_to_bytes(src) elsif dst = 0 and is_memory_address(src) then -- mov eax, mem return #A1 & int_to_bytes(src[1]) else -- mov reg, memory expression return #8B & modrm(dst, src) end if else -- mov immediate, ... end if else if is_memory_address(dst) and equal(src, eax) then -- mov mem, eax return #A3 & int_to_bytes(dst[1]) elsif is_register(src) then -- mov memory expression, register return #89 & modrm(and_bits(src,7), dst) elsif is_immediate(src) then -- mov memory expression, immediate return #C7 & modrm(0, dst) & int_to_bytes(src) end if end if puts(1, "mov: Not a valid combination of operands\n") end function function lea (object dst, object src) if is_register(dst) and sequence(src) then return #8D & modrm(and_bits(src,7), dst) end if puts(1, "lea: Not a valid combination of operands\n") end function function push (object src) if is_immediate(src) then return #68 & int_to_bytes(src) elsif is_register(src) then return {#50 + and_bits(src,7)} end if return #FF & modrm(6,src) end function function pushad () return {#60} end function function popad () return {#61} end function function call_near (object dst) return #FF & modrm(2, dst) end function function ret (integer i) if i then return #C2 & and_bits({i,floor(i/256)},255) else return {#C3} end if end function function label(sequence s) return {} end function constant -- kind_jumps={"JO","JNO","JB","JNAE","JNB","JAE","JZ","JE","JNZ","JNE","JBE", -- "JNA","JNBE","JA","JS","JNS","JP","JPE","JNP","JPO","JL","JNGE","JNL","JGE", -- "JLE","JNG","JNLE","JG","JMP"}, kind_jumps={"jo","lno","jb","jnae","jnb","jae","jz","je","jnz","jne","jbe", "jna","jnbe","ja","js","jns","jp","jpe","jnp","jpo","jl","jnge","jnl","jge", "jle","jng","jnle","jg","jmp"}, short_jumps={#70,#71,#72,#72,#73,#73,#74,#74,#75,#75,#76,#76,#77,#77, #78,#79,#7A,#7A,#7B,#7B,#7C,#7C,#7D,#7D,#7E,#7E,#7F,#7F,#EB}, near_jumps={{#0F,#80},{#0F,#81},{#0F,#82},{#0F,#82},{#0F,#83},{#0F,#83}, {#0F,#84},{#0F,#84},{#0F,#85},{#0F,#85},{#0F,#86},{#0F,#86},{#0F,#87}, {#0F,#87},{#0F,#88},{#0F,#89},{#0F,#8A},{#0F,#8A},{#0F,#8B},{#0F,#8B}, {#0F,#8C},{#0F,#8C},{#0F,#8D},{#0F,#8D},{#0F,#8E},{#0F,#8E},{#0F,#8F}, {#0F,#8F},{#E9}} include smart.e function jump_blocks(sequence blocks) -- blocks: {{{code},condition,target1[,target2]},...} -- target is now relative sequence result sequence offsets, deltas integer ok deltas = repeat(0, length(blocks)) offsets = repeat(0, length(blocks)+1) for i = 1 to length(blocks) do offsets[i+1] = offsets[i] + length(blocks[i][1]) + 2*(blocks[i][3]!=1) end for ok = 0 while not ok do ok = 1 --? {deltas,offsets} for i = 1 to length(blocks) do if deltas[i] < -128 or deltas[i] > 127 then deltas[i] = offsets[i+blocks[i][3]] - offsets[i+1] else deltas[i] = offsets[i+blocks[i][3]] - offsets[i+1] if deltas[i] < -128 or deltas[i] > 127 then offsets[i+1..length(offsets)] += length(near_jumps[ find(blocks[i][2], kind_jumps)])+2 ok = 0 --exit end if end if end for end while result = repeat(0,offsets[length(offsets)]) for i = 1 to length(blocks) do if deltas[i] = 0 then result[offsets[i]+1..offsets[i+1]] = blocks[i][1] elsif deltas[i] < -128 or deltas[i] > 127 then result[offsets[i]+1..offsets[i+1]] = blocks[i][1] & near_jumps[find(blocks[i][2], kind_jumps)] & int_to_bytes(deltas[i]) else --puts(1,blocks[i][2]&"\n") result[offsets[i]+1..offsets[i+1]] = blocks[i][1] & short_jumps[find(blocks[i][2], kind_jumps)] & deltas[i] end if end for return result end function function insert_blocks(sequence blocks, integer index) for b = 1 to length(blocks)+1 do if b=length(blocks)+1 then blocks &= index elsif blocks[b] = index then exit elsif blocks[b] > index then blocks = blocks[1..b-1] & index & blocks[b..length(blocks)] exit end if end for return blocks end function global function assemble(sequence st) -- st is {{"mov", eax, {esp}},{"jmp", -1},...) -- jump offsets are relative to the jmp instruction (0 is infinite loop) sequence result, blocks, offsets, code integer index blocks = {length(st)+1} for i = 1 to length(st) do index = 0 if find(st[i][1], kind_jumps) then if sequence(st[i][2]) then blocks = insert_blocks(blocks, find({st[i][2]&':'},st)) else blocks = insert_blocks(blocks, i+st[i][2]) end if blocks = insert_blocks(blocks, i+1) end if end for if blocks[1] = 1 then blocks = blocks[2..length(blocks)] end if result = repeat({{},"jmp",1}, length(blocks)) offsets = repeat(0, length(st)) & length(blocks) index = 1 for i = 1 to length(blocks) do result[i][3] = blocks[i] while index < blocks[i] do offsets[index] = i if find(st[index][1], kind_jumps) then result[i][2] = st[index][1] if sequence(st[index][2]) then printf(1, "\t\t%s %s\n", st[index]) result[i][3] = find({st[index][2]&':'},st) else printf(1, "\t\t%s %d\n", st[index]) result[i][3] = st[index][2]+index end if index += 1 exit elsif st[index][1][length(st[index][1])] = ':' then printf(1, "\t\t%s\n", st[index]) else code = call_func(routine_id(st[index][1]),st[index][2..length(st[index])]) result[i][1] &= code for j = 1 to length(code) do printf(1, "%02x", {code[j]}) end for if length(code) < 4 then puts(1, "\t") end if puts(1, "\t"&st[index][1]&" ") for j = 2 to length(st[index]) do if j > 2 then puts(1, ", ") end if puts(1, smodrm(st[index][j])) end for puts(1, "\n") end if index += 1 end while puts(1, "\n") end for for i = 1 to length(result) do result[i][3] = offsets[result[i][3]] - offsets[blocks[i]-1] end for --register optimization would happen here return jump_blocks(result) end function --? (assemble({ --{"mov",eax,5}, --{"mov",{edi},3}, --{"mov",eax,{edi,ebx,10}}, --{"jmp","label0"}, --{"label0:"}, --{"mov",esp,eax}, --{"jmp",-1} --}))