From 8beb98dd715bf64b7c2a8a4c3f517dc7d05c568b Mon Sep 17 00:00:00 2001 From: Enrico Lumetti Date: Sat, 7 May 2022 16:31:33 +0200 Subject: [PATCH] Parse header of serialized bytecode --- .gitignore | 1 + metamethods.txt | 22 +++++++++++ microLua.lua | 6 ++- opcodes.txt | 99 +++++++++++++++++++++++++++++++++++++++++++++++ readbc.lua | 51 ++++++++++++++++++++++++ tests/generate.sh | 5 +++ tests/test1.lua | 1 + vm.lua | 73 ++++++++++++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 metamethods.txt create mode 100644 opcodes.txt create mode 100644 readbc.lua create mode 100755 tests/generate.sh create mode 100644 tests/test1.lua create mode 100644 vm.lua diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..07b0e87 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.luo diff --git a/metamethods.txt b/metamethods.txt new file mode 100644 index 0000000..e75d0a0 --- /dev/null +++ b/metamethods.txt @@ -0,0 +1,22 @@ +_(index) +_(newindex) +_(gc) +_(mode) +_(eq) +_(len) +_(lt) +_(le) +_(concat) +_(call) +_(add) +_(sub) +_(mul) +_(div) +_(mod) +_(pow) +_(unm) +_(metatable) +_(tostring) +MMDEF_FFI(_) +MMDEF_PAIRS(_) + diff --git a/microLua.lua b/microLua.lua index f301245..02e9e11 100644 --- a/microLua.lua +++ b/microLua.lua @@ -1 +1,5 @@ -print("Hello World!") +local readbc = require('readbc') + +fp = io.open('tests/test1.luo') +header = readbc.read_header(fp) +print(header.name) diff --git a/opcodes.txt b/opcodes.txt new file mode 100644 index 0000000..f5060b1 --- /dev/null +++ b/opcodes.txt @@ -0,0 +1,99 @@ +0 _(ISLT, var, ___, var, lt) \ +1 _(ISGE, var, ___, var, lt) \ +2 _(ISLE, var, ___, var, le) \ +3 _(ISGT, var, ___, var, le) \ +4 _(ISEQV, var, ___, var, eq) \ +5 _(ISNEV, var, ___, var, eq) \ +6 _(ISEQS, var, ___, str, eq) \ +7 _(ISNES, var, ___, str, eq) \ +8 _(ISEQN, var, ___, num, eq) \ +9 _(ISNEN, var, ___, num, eq) \ +10 _(ISEQP, var, ___, pri, eq) \ +11 _(ISNEP, var, ___, pri, eq) \ +12 _(ISTC, dst, ___, var, ___) \ +13 _(ISFC, dst, ___, var, ___) \ +14 _(IST, ___, ___, var, ___) \ +15 _(ISF, ___, ___, var, ___) \ +16 _(ISTYPE, var, ___, lit, ___) \ +17 _(ISNUM, var, ___, lit, ___) \ +18 _(MOV, dst, ___, var, ___) \ +19 _(NOT, dst, ___, var, ___) \ +20 _(UNM, dst, ___, var, unm) \ +21 _(LEN, dst, ___, var, len) \ +22 _(ADDVN, dst, var, num, add) \ +23 _(SUBVN, dst, var, num, sub) \ +24 _(MULVN, dst, var, num, mul) \ +25 _(DIVVN, dst, var, num, div) \ +26 _(MODVN, dst, var, num, mod) \ +27 _(ADDNV, dst, var, num, add) \ +28 _(SUBNV, dst, var, num, sub) \ +29 _(MULNV, dst, var, num, mul) \ +30 _(DIVNV, dst, var, num, div) \ +31 _(MODNV, dst, var, num, mod) \ +32 _(ADDVV, dst, var, var, add) \ +33 _(SUBVV, dst, var, var, sub) \ +34 _(MULVV, dst, var, var, mul) \ +35 _(DIVVV, dst, var, var, div) \ +36 _(MODVV, dst, var, var, mod) \ +37 _(POW, dst, var, var, pow) \ +38 _(CAT, dst, rbase, rbase, concat) \ +39 _(KSTR, dst, ___, str, ___) \ +40 _(KCDATA, dst, ___, cdata, ___) \ +41 _(KSHORT, dst, ___, lits, ___) \ +42 _(KNUM, dst, ___, num, ___) \ +43 _(KPRI, dst, ___, pri, ___) \ +44 _(KNIL, base, ___, base, ___) \ +45 _(UGET, dst, ___, uv, ___) \ +46 _(USETV, uv, ___, var, ___) \ +47 _(USETS, uv, ___, str, ___) \ +48 _(USETN, uv, ___, num, ___) \ +49 _(USETP, uv, ___, pri, ___) \ +50 _(UCLO, rbase, ___, jump, ___) \ +51 _(FNEW, dst, ___, func, gc) \ +52 _(TNEW, dst, ___, lit, gc) \ +53 _(TDUP, dst, ___, tab, gc) \ +54 _(GGET, dst, ___, str, index) \ +55 _(GSET, var, ___, str, newindex) \ +56 _(TGETV, dst, var, var, index) \ +57 _(TGETS, dst, var, str, index) \ +58 _(TGETB, dst, var, lit, index) \ +59 _(TGETR, dst, var, var, index) \ +60 _(TSETV, var, var, var, newindex) \ +61 _(TSETS, var, var, str, newindex) \ +62 _(TSETB, var, var, lit, newindex) \ +63 _(TSETM, base, ___, num, newindex) \ +64 _(TSETR, var, var, var, newindex) \ +65 _(CALLM, base, lit, lit, call) \ +66 _(CALL, base, lit, lit, call) \ +67 _(CALLMT, base, ___, lit, call) \ +68 _(CALLT, base, ___, lit, call) \ +69 _(ITERC, base, lit, lit, call) \ +70 _(ITERN, base, lit, lit, call) \ +71 _(VARG, base, lit, lit, ___) \ +72 _(ISNEXT, base, ___, jump, ___) \ +73 _(RETM, base, ___, lit, ___) \ +74 _(RET, rbase, ___, lit, ___) \ +75 _(RET0, rbase, ___, lit, ___) \ +76 _(RET1, rbase, ___, lit, ___) \ +77 _(FORI, base, ___, jump, ___) \ +78 _(JFORI, base, ___, jump, ___) \ +79 _(FORL, base, ___, jump, ___) \ +80 _(IFORL, base, ___, jump, ___) \ +81 _(JFORL, base, ___, lit, ___) \ +82 _(ITERL, base, ___, jump, ___) \ +83 _(IITERL, base, ___, jump, ___) \ +84 _(JITERL, base, ___, lit, ___) \ +85 _(LOOP, rbase, ___, jump, ___) \ +86 _(ILOOP, rbase, ___, jump, ___) \ +87 _(JLOOP, rbase, ___, lit, ___) \ +88 _(JMP, rbase, ___, jump, ___) \ +89 _(FUNCF, rbase, ___, ___, ___) \ +90 _(IFUNCF, rbase, ___, ___, ___) \ +91 _(JFUNCF, rbase, ___, lit, ___) \ +92 _(FUNCV, rbase, ___, ___, ___) \ +93 _(IFUNCV, rbase, ___, ___, ___) \ +94 _(JFUNCV, rbase, ___, lit, ___) \ +95 _(FUNCC, rbase, ___, ___, ___) \ +96 _(FUNCCW, rbase, ___, ___, ___) + + diff --git a/readbc.lua b/readbc.lua new file mode 100644 index 0000000..70994f5 --- /dev/null +++ b/readbc.lua @@ -0,0 +1,51 @@ +-- module for reading serialized lua bytecode + +local bit=require('bit') + +local HEADER_MAGIC = {27, string.byte('L'), string.byte('J')} +local HEADER_FLAG_STRIP = 0x02 + +local function read_uleb128(fp) + local last_byte = false + local res = 0 + local shift = 0 + -- the bytes are read from the LSB to the MSB + while not last_byte do + local uleb_byte = string.byte(fp:read(1)) + res = bit.bor(res, bit.lshift(bit.band(uleb_byte, 0x7f), shift)) + + if bit.band(uleb_byte, 0x80) == 0 then + last_byte = true + end + end + return res +end + +local function read_header(fp) + local magic = { string.byte(fp:read(3), 1, 3) } + local version = string.byte(fp:read(1)) + local flags = read_uleb128(fp) + + local name + if bit.band(flags, HEADER_FLAG_STRIP) ~= 0 then + name = '' + else + local nameLen = read_uleb128(fp) + name = fp:read(nameLen) + end + + return { + magic = magic, + version = version, + flags = flags, + name = name, + } +end + +local function read_proto(fp) +end + +return { + read_header = read_header, + read_proto = read_proto, +} diff --git a/tests/generate.sh b/tests/generate.sh new file mode 100755 index 0000000..4f23b79 --- /dev/null +++ b/tests/generate.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +for f in $(ls *.lua); do + luajit -bg $f "$(echo $f | cut -f 1 -d '.').luo" +done diff --git a/tests/test1.lua b/tests/test1.lua new file mode 100644 index 0000000..5315e97 --- /dev/null +++ b/tests/test1.lua @@ -0,0 +1 @@ +local x = 24 diff --git a/vm.lua b/vm.lua new file mode 100644 index 0000000..e994dbb --- /dev/null +++ b/vm.lua @@ -0,0 +1,73 @@ +local funcbc = require('jit.util').funcbc + +function opcode(id, name, a, b, c, metamethod) + return { + id=id, + name=name, + a=a, + b=b, + c=c, + methametod=metamethod, + } +end + +function gen_opcodes(tbl) + length = #tbl + res = {} + for i=1,length do + res[i] = i+1 + end + return res +end + +Mode = { + none = 0, + dst = 1, + base = 2, + var = 3,, + rbase = 4, + uv = 5, + lit = 6, + lits = 7, + pri = 8, + num = 9, + str = 10, + tab = 11, + func = 12, + jump = 13, + cdata = 14, + max = 15, + none = 15, -- same as max +} + +Metamethod = { + index = 0, + newindex = 1, + gc = 2, + mode = 3, + eq = 4, + len = 5, + lt = 6, + le = 7, + concat = 8, + call = 9, + add = 10, + sub = 11, + mul = 12, + div = 13, + mod = 14, + pow = 15, + unm = 16, + metatable = 17, + tostring = 18, +} + +Opcodes = gen_opcodes { + opcode(37, 'POW', Mode.dst, Mode.none, Mode.lits, Metamethod.none), + opcode(41, 'KSHORT', Mode.dst, Mode.var, Mode.var, Metamethod.pow), +} + +print(Opcodes[1]) +ins, m = funcbc(test1, 3) -- TODO: what is m? +print(ins) +print(m)