diff --git a/BUILDING.txt b/BUILDING.txt new file mode 100644 index 0000000..7f97007 --- /dev/null +++ b/BUILDING.txt @@ -0,0 +1,9 @@ +Mrifk should build on any platform which supports GHC 6.2 (i.e. Win32 or +Posix). GHC versions prior to 6.2 will almost work, except that showHex +will prepend "0x" to hexadecimal numbers, which uglifies the output a bit. + +On UNIX platforms you'll probably want to delete the ".exe" from the first +line of the Makefile. + +The code is licensed under the GPL, and is Copyright 2004 Ben Rudiak-Gould. +You can contact me at br276@cl.cam.ac.uk. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..971f000 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +EXE = .exe +HFLAGS = -v -O + +all : mrifk$(EXE) + +mrifk$(EXE) : Mrifk*.hs + ghc --make -XParallelListComp -o $@ ${HFLAGS} Mrifk.hs \ No newline at end of file diff --git a/Mrifk.hs b/Mrifk.hs new file mode 100644 index 0000000..4eff1fa --- /dev/null +++ b/Mrifk.hs @@ -0,0 +1,80 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Main ( + main +) where + + +import Mrifk_cmdline +import Mrifk_storyfile +import Mrifk_disasm +import Mrifk_decompile +import Mrifk_strings +import Mrifk_memmap +import Mrifk_grammar +import Mrifk_objects +import Mrifk_print + +import Control.Monad.State (evalState) +import Data.Array +import Data.Tree +import Data.Maybe (fromJust) +import Numeric (showHex) + + +mrifkRelease = "1-patched-v2" +mrifkSerial = "160321" + + +main = + do putStrLn ("! \""++storyFileName++"\"") + putStrLn ("! Decompiled by Mrifk release "++mrifkRelease++", serial "++mrifkSerial) +-- putStrLn "\n! Dictionary\n" +-- mapM_ print (elems dictionary) + putStrLn "\n! Grammar" + mapM_ putStrLn (ppVerbs verbs) + putStrLn "\n! Object tree\n" + mapM_ putStrLn (concatMap ppObjectTree objectForest) + putStrLn "\n! Routines" + mapM_ putStrLn ppRoutines + putStrLn "\n! Strings\n" + mapM_ putStrLn ppStrings + + +routines = map (onFth4 maybeDecompile) $ evalState disasmRoutines informCode + +maybeDecompile | disassembleOnly = updateLabels + | otherwise = decompile + +onFth4 f (a,b,c,d) = (a,b,c,f d) + + +ppRoutines = concatMap ppRoutine routines + +ppStrings = map ppString strings + +ppString (addr,str) = "! 0x" ++ showHex addr (' ' : ppQuotedString str) + + +ppObjectTree = ppObjectTree' 0 + +ppObjectTree' depth (Node obj children) = + ppObject depth obj (fromJust (lookup obj objects)) + ++ concatMap (ppObjectTree' (depth+1)) children diff --git a/Mrifk_cmdline.hs b/Mrifk_cmdline.hs new file mode 100644 index 0000000..48fde4a --- /dev/null +++ b/Mrifk_cmdline.hs @@ -0,0 +1,48 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_cmdline ( + storyFileName, disassembleOnly +) where + + +import System.Environment (getArgs) +import System.Exit (exitFailure) +import System.IO (hPutStrLn,stderr) +import System.IO.Unsafe (unsafePerformIO) + + +storyFileName :: String +disassembleOnly :: Bool + +(storyFileName,disassembleOnly) = unsafePerformIO parseArgs + + +parseArgs = + do args <- getArgs + case args of + [story] -> return (story,False) + ["-S",story] -> return (story,True) + _ -> usage + + +usage = + do hPutStrLn stderr "usage: mrifk [-S] storyfile.ulx" + hPutStrLn stderr " -S disassemble only (omit decompilation pass)" + exitFailure diff --git a/Mrifk_code.hs b/Mrifk_code.hs new file mode 100644 index 0000000..d02e782 --- /dev/null +++ b/Mrifk_code.hs @@ -0,0 +1,75 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_code ( + Statement(..), LabelType(..), Expr(..), Opcode(..), BinaryOp (..), + binopEQ, binopNE, binopLT, binopGT, binopLE, binopGE, + binopHas, binopOr, binopAnd, + binopName, binopPrec +) where + + +data Statement = + Push Expr | Eval String Expr | + Print String Expr | NewLine | + IfThenElse Expr [Statement] [Statement] | + Label Int LabelType | JCond Expr Int | Jump Int | + Give Expr Bool Int | + GInstr (String,Int,Int,Int,Opcode) [Expr] + deriving (Show,Eq) + +data LabelType = Single | Multi | Phantom deriving (Show,Eq) + +data Expr = + SP | Imm Int | ImmString Int | Mem Int | Local Int | + Unary String Int Expr | PostIncDec Expr String | + Binary Expr BinaryOp Expr | + Assign Expr Expr | Call Expr [Expr] | + SpecialName String + deriving (Show,Eq) + +data Opcode = + OCopy | OJCond BinaryOp | OJump | OCall | OCallI | OReturn | + OALoadBit | OAStore | OAStoreB | OAStoreBit | + OStreamChar | OStreamNum | OStreamStr | + OBinary BinaryOp | OGlk | OStkSwap | OSpecial + deriving (Show,Eq) + + +data BinaryOp = + NormalOp String Int | PredicateOp String String | LogicalOp String String deriving (Show,Eq) + + +binopEQ = PredicateOp " == " " ~= " +binopNE = PredicateOp " ~= " " == " +binopLT = PredicateOp " < " " >= " +binopGT = PredicateOp " > " " <= " +binopLE = PredicateOp " <= " " > " +binopGE = PredicateOp " >= " " < " +binopHas = PredicateOp " has " " hasnt " +binopOr = LogicalOp " || " " && " +binopAnd = LogicalOp " && " " || " + +binopName (PredicateOp s _) = s +binopName (LogicalOp s _) = s +binopName (NormalOp s _) = s + +binopPrec (PredicateOp _ _) = 3 +binopPrec (LogicalOp _ _) = 2 +binopPrec (NormalOp _ p) = p diff --git a/Mrifk_decompile.hs b/Mrifk_decompile.hs new file mode 100644 index 0000000..8bd0272 --- /dev/null +++ b/Mrifk_decompile.hs @@ -0,0 +1,237 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_decompile ( + decompile, updateLabels +) where + + +import Mrifk_code +import Mrifk_util + +import Data.List (sort) + + +updateLabels code = updateLabels' (sort (findJumps code)) code + +updateLabels' jumps (Label addr oldType : rest) = + let (jumpsHere,jumpsLater) = break (> addr) (dropWhile (< addr) jumps) + rest' = updateLabels' jumpsLater rest + in updateLabel addr oldType jumpsHere ++ rest' + +updateLabels' jumps (IfThenElse expr a b : rest) = + IfThenElse expr (updateLabels' jumps a) (updateLabels' jumps b) : updateLabels' jumps rest + +updateLabels' jumps (x:xs) = x : updateLabels' jumps xs +updateLabels' jumps [] = [] + +updateLabel ad _ [] = [] +updateLabel ad Phantom _ = [Label ad Phantom] +updateLabel ad _ [_] = [Label ad Single] +updateLabel ad _ _ = [Label ad Multi] + + +findJumps = concatMap findJumps' + +findJumps' (GInstr (_,_,_,0,_) _) = [] +findJumps' (GInstr (_,loads,stores,_,_) args) = + [x | Imm x <- drop (loads+stores) args] +findJumps' (JCond _ target) = [target] +findJumps' (Jump target) = [target] +findJumps' (IfThenElse _ a b) = findJumps a ++ findJumps b +findJumps' (Give _ _ _) = [] +findJumps' (Label _ _) = [] +findJumps' (Push _) = [] +findJumps' (Eval _ _) = [] +findJumps' (Print _ _) = [] +findJumps' NewLine = [] +-- findJumps' x = error (show x) + + +{------------------------------- decompiler ----------------------------------} + + +doToDeath f code = + findFixedPoint (iterate (f . updateLabels) code) + where findFixedPoint (x:y:z) + | x == y = x + | otherwise = findFixedPoint (y:z) + + +decompile = doToDeath decompileJumps . doToDeath decompile' + + +decompile' (JCond cond l : Push (Imm 0) : Jump m : Label l' Single : Push (Imm 1) : rest@(Label m' _ : _)) | l == l' && m == m' = + decompile' (Push (negateExpr (negateExpr cond)) : rest) + + +decompile' (IfThenElse cond thenClause elseClause : rest) = + IfThenElse cond (decompile' thenClause) (decompile' elseClause) : decompile' rest + + +decompile' (JCond cond1 label : JCond cond2 label' : rest) | label == label' = + decompile' (JCond (Binary cond1 binopOr cond2) label : rest) + +decompile' (JCond cond1 label1 : JCond cond2 label2 : rest@(Label label1' _ : _)) | label1 == label1' = + decompile' (JCond (Binary (negateExpr cond1) binopAnd cond2) label2 : rest) + + +decompile' (Push (Local n) : Eval "" (Assign (Local n') (Binary (Local n'') (NormalOp op 5) (Imm 1))) : rest) | n == n' && n == n'' = + decompile' (Push (PostIncDec (Local n) (incDecOp op)) : rest) + +decompile' (Push (Mem n) : Eval "" (Assign (Mem n') (Binary (Mem n'') (NormalOp op 5) (Imm 1))) : rest) | n == n' && n == n'' = + decompile' (Push (PostIncDec (Mem n) (incDecOp op)) : rest) + + +-- FIXME: Might want to use stkswap as a clue that +-- the compiler is doing something special +decompile' (Push expr1 : Push expr2 : GInstr (_,_,_,_,OStkSwap) _ : rest) = + decompile' (Push expr2 : Push expr1 : rest) + + +decompile' (Push expr : (GInstr info@(_,loads,_,_,_) args) : rest) + | not (ready loads args) = decompile' (GInstr info (substSP expr args) : rest) + +decompile' (instr@(GInstr (_,loads,_,_,type_) args) : rest) + | ready loads args = case decompileReady type_ args of + Just instr' -> decompile' (instr' : rest) + Nothing -> instr : decompile' rest + | otherwise = instr : decompile' rest + +decompile' (x:xs) = x : decompile' xs +decompile' [] = [] + + +incDecOp " + " = "++" +incDecOp " - " = "--" + + +ready loads args = + all (/= SP) (take loads args) + +substSP expr (SP:rest) = expr:rest +substSP expr (x:rest) = x:substSP expr rest + + +decompileJumps (first@(JCond cond midLabel) : rest) = + case findMatch (findIfThenElse midLabel) rest of + Just (thenClause,(elseClause,(endLabel,rest'))) -> + let exit = [Label endLabel Phantom] in + IfThenElse (negateExpr cond) (thenClause ++ exit) (elseClause ++ exit) : decompileJumps rest' + Nothing -> + case findMatch (findIfThen midLabel) rest of + Just (thenClause,rest') -> + let exit = [Label midLabel Phantom] in + IfThenElse (negateExpr cond) (thenClause ++ exit) [] : decompileJumps rest' + Nothing -> first : decompileJumps rest + +decompileJumps (IfThenElse cond thenClause elseClause : rest) = + IfThenElse cond (decompileJumps thenClause) (decompileJumps elseClause) : decompileJumps rest + +decompileJumps (x:xs) = x : decompileJumps xs +decompileJumps [] = [] + + + +findMatch = findMatch' id +findMatch' before f [] = Nothing +findMatch' before f (x:xs) = + case f (x:xs) of + Just result -> Just (before [],result) + Nothing -> findMatch' (before.(x:)) f xs + +findIfThenElse l (Jump endLabel : Label l' Single : rest) | l == l' = + findMatch findElse rest where + findElse all@(Label endLabel' _ : rest) | endLabel == endLabel' = Just (endLabel,all) + findElse _ = Nothing + +findIfThenElse _ _ = Nothing + +findIfThen l all@(Label l' _ : _) | l == l' = Just all +findIfThen _ _ = Nothing + + +negateExpr (Binary left (LogicalOp op notOp) right) = + Binary (negateExpr left) (LogicalOp notOp op) (negateExpr right) + +negateExpr (Binary left (PredicateOp op notOp) right) = + Binary left (PredicateOp notOp op) right + +negateExpr x = Unary "~~" 2 x + + +decompileReady type_@(OJCond _) [val,label] = + decompileReady type_ [val,Imm 0,label] + +decompileReady (OJCond op) [val1,val2,Imm label] = + Just (JCond (Binary val1 op val2) label) + +decompileReady (OBinary op) [left,right,dst] = + Just (pushOrStore dst (Binary left op right)) + +decompileReady OJump [Imm label] = + Just (Jump label) + +decompileReady OReturn [expr] = Just (Eval "return " expr) + +decompileReady OStreamStr [Imm s] = Just (Print "" (ImmString s)) +decompileReady OStreamStr [s] = Just (Print "(s) " s) + +decompileReady OStreamNum [n] = Just (Print "" n) + +decompileReady OStreamChar [Imm 10] = Just NewLine +decompileReady OStreamChar [ch] = Just (Print "(char) " ch) + +decompileReady OCopy [src,dst] = + Just (pushOrStore dst src) + +decompileReady OCallI (func:rest) = + let (args,store) = separateStore rest + in Just (pushOrStore store (Call func args)) + +decompileReady OCall [func,Imm argc,dest] = + Just (GInstr ("callfi*",argc+1,1,0,OCallI) (func : replicate argc SP ++ [dest])) + +decompileReady OGlk [func,Imm argc,dest] = + Just (GInstr ("callfi*",argc+2,1,0,OCallI) (SpecialName "glk" : func : replicate argc SP ++ [dest])) + +decompileReady OAStoreB [array,offset,value] = + Just $ Eval "" (Assign (Binary array (NormalOp "->" 7) offset) value) + +decompileReady OAStore [array,offset,value] = + Just $ Eval "" (Assign (Binary array (NormalOp "-->" 7) offset) value) + +decompileReady OALoadBit [obj,Imm attrPlus8,store] | attrPlus8 >= 8 = + Just (pushOrStore store (Binary obj binopHas (Imm (attrPlus8 - 8)))) + +decompileReady OALoadBit [obj,Binary attr (NormalOp " + " _) (Imm 8),store] = + Just (pushOrStore store (Binary obj binopHas attr)) + +decompileReady OAStoreBit [obj,Imm attrPlus8,Imm bit] | attrPlus8 >= 8 && (bit == 0 || bit == 1) = + Just $ Give obj (odd bit) (attrPlus8 - 8) + +decompileReady _ _ = Nothing + + +separateStore [x] = ([],x) +separateStore (x:xs) = onFst (x:) (separateStore xs) + +pushOrStore SP expr = Push expr +pushOrStore (Imm 0) expr = Eval "" expr +pushOrStore dest expr = Eval "" (Assign dest expr) diff --git a/Mrifk_disasm.hs b/Mrifk_disasm.hs new file mode 100644 index 0000000..f7c3c31 --- /dev/null +++ b/Mrifk_disasm.hs @@ -0,0 +1,263 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_disasm ( + disasmRoutines +) where + + +import Mrifk_storyfile +import Mrifk_code + +import Numeric (showHex) +import Control.Monad (replicateM,liftM) +import Data.Bits ((.&.),shiftR) + + +{------------------------------- disassembly ----------------------------------} + + +disasmRoutines = do + eos <- isEOS + if eos then return [] else do + type_ <- peekUByte + if type_ `elem` [0xC0,0xC1] then + do r <- disasmRoutine + rs <- disasmRoutines + return (r:rs) + else if type_ == 0x31 then + -- hack: skip over unreachable trailing "return" statements generated by Inform + getBytes 3 >> disasmRoutines + else + do pos <- getPos + error ("Unexpected byte during code disassembly: " ++ showHex type_ (" at offset " ++ showHex pos "")) + + +disasmRoutine = do + addr <- getPos + type_ <- getUByte + localTypes <- disasmLocalTypes + body <- disasmInstrs + return (addr,type_,localTypes,body) + +disasmLocalTypes = do + type_ <- getUByte + count <- getUByte + if type_ == 0 && count == 0 + then return [] + else do rest <- disasmLocalTypes + return (replicate count type_ ++ rest) + + +disasmInstrs = + do eof <- endOfFunction + if eof then + return [] + else + do addr <- getPos + instr <- disasmInstr + rest <- disasmInstrs + return (Label addr Multi : instr : rest) + +endOfFunction = + do eos <- isEOS + if eos then return True + else do byte <- peekUByte + return (byte >= 0xC0) + + +disasmInstr = do + opcode1 <- peekUByte + opcode <- if opcode1 < 0x80 then + getUByte + else if opcode1 < 0xC0 then + liftM (subtract 0x8000) getUWord + else + liftM (+ 0x40000000) getDword + case lookup opcode opcodes of + Just info -> disasmInstr' info + Nothing -> error ("Invalid Glulx opcode " ++ showHex opcode "") + + +disasmInstr' info@(_,loads,stores,branches,_) = do + let operandCount = loads+stores+branches + operandByteCount = (operandCount+1) `div` 2 + x <- replicateM operandByteCount getOperandByte + let operandDescs = take operandCount $ concat x + operands <- mapM getOperand operandDescs + endAddr <- getPos + let operands' = zipWith ($) (replicate (loads+stores) id ++ repeat (cvtBranch endAddr)) operands + return (GInstr (cvtInfo info) operands') + +cvtBranch endAddr (Imm 0) = Imm 0 +cvtBranch endAddr (Imm 1) = Imm 1 +cvtBranch endAddr (Imm n) = Imm (endAddr + n - 2) +cvtBranch endAddr x = error "Unexpected branch target mode" + +-- Hack to handle jumpabs, which takes a target label but doesn't +-- follow the format rule of other branch instructions +cvtInfo ("jumpabs",1,0,0,type_) = ("jumpabs",0,0,1,type_) +cvtInfo x = x + + +getOperandByte = do + x <- getUByte + return [x .&. 15, x `shiftR` 4] + +getOperand 0 = return (Imm 0) +getOperand 1 = liftM Imm getSByte +getOperand 2 = liftM Imm getSWord +getOperand 3 = liftM Imm getDword +getOperand 5 = liftM Mem getUByte +getOperand 6 = liftM Mem getUWord +getOperand 7 = liftM Mem getDword +getOperand 8 = return SP +getOperand 9 = liftM Local getUByte +getOperand 10 = liftM Local getUWord +getOperand 11 = liftM Local getDword +getOperand 13 = liftM (Mem . (+ hdrRAMStart)) getUByte +getOperand 14 = liftM (Mem . (+ hdrRAMStart)) getUWord +getOperand 15 = liftM (Mem . (+ hdrRAMStart)) getDword + + +opcodes = + [(0x00,("nop", 0,0,0, OSpecial)), + (0x10,("add", 2,1,0, OBinary (NormalOp " + " 5))), + (0x11,("sub", 2,1,0, OBinary (NormalOp " - " 5))), + (0x12,("mul", 2,1,0, OBinary (NormalOp " * " 6))), + (0x13,("div", 2,1,0, OBinary (NormalOp " / " 6))), + (0x14,("mod", 2,1,0, OBinary (NormalOp " % " 6))), + (0x15,("neg", 1,1,0, OSpecial)), + (0x18,("bitand", 2,1,0, OBinary (NormalOp " & " 6))), + (0x19,("bitor", 2,1,0, OBinary (NormalOp " | " 6))), + (0x1A,("bitxor", 2,1,0, OSpecial)), + (0x1B,("bitnot", 1,1,0, OSpecial)), + (0x1C,("shiftl", 2,1,0, OSpecial)), + (0x1D,("sshiftr", 2,1,0, OSpecial)), + (0x1E,("ushiftr", 2,1,0, OSpecial)), + (0x20,("jump", 0,0,1, OJump)), + (0x22,("jz", 1,0,1, OJCond binopEQ)), + (0x23,("jnz", 1,0,1, OJCond binopNE)), + (0x24,("jeq", 2,0,1, OJCond binopEQ)), + (0x25,("jne", 2,0,1, OJCond binopNE)), + (0x26,("jlt", 2,0,1, OJCond binopLT)), + (0x27,("jge", 2,0,1, OJCond binopGE)), + (0x28,("jgt", 2,0,1, OJCond binopGT)), + (0x29,("jle", 2,0,1, OJCond binopLE)), + (0x2A,("jltu", 2,0,1, OSpecial)), + (0x2B,("jgeu", 2,0,1, OSpecial)), + (0x2C,("jgtu", 2,0,1, OSpecial)), + (0x2D,("jleu", 2,0,1, OSpecial)), + (0x30,("call", 2,1,0, OCall)), + (0x31,("return", 1,0,0, OReturn)), + (0x32,("catch", 0,1,1, OSpecial)), + (0x33,("throw", 2,0,0, OSpecial)), + (0x34,("tailcall", 2,0,0, OSpecial)), + (0x40,("copy", 1,1,0, OCopy)), + (0x41,("copys", 1,1,0, OSpecial)), + (0x42,("copyb", 1,1,0, OSpecial)), + (0x44,("sexs", 1,1,0, OSpecial)), + (0x45,("sexb", 1,1,0, OSpecial)), + (0x48,("aload", 2,1,0, OBinary (NormalOp "-->" 7))), + (0x49,("aloads", 2,1,0, OSpecial)), + (0x4A,("aloadb", 2,1,0, OBinary (NormalOp "->" 7))), + (0x4B,("aloadbit", 2,1,0, OALoadBit)), + (0x4C,("astore", 3,0,0, OAStore)), + (0x4D,("astores", 3,0,0, OSpecial)), + (0x4E,("astoreb", 3,0,0, OAStoreB)), + (0x4F,("astorebit", 3,0,0, OAStoreBit)), + (0x50,("stkcount", 0,1,0, OSpecial)), + (0x51,("stkpeek", 1,1,0, OSpecial)), + (0x52,("stkswap", 0,0,0, OStkSwap)), + (0x53,("stkroll", 2,0,0, OSpecial)), + (0x54,("stkcopy", 1,0,0, OSpecial)), + (0x70,("streamchar", 1,0,0, OStreamChar)), + (0x71,("streamnum", 1,0,0, OStreamNum)), + (0x72,("streamstr", 1,0,0, OStreamStr)), + (0x73,("streamunichar", 1,0,0, OStreamChar)), + (0x100,("gestalt", 2,1,0, OSpecial)), + (0x101,("debugtrap", 1,0,0, OSpecial)), + (0x102,("getmemsize", 0,1,0, OSpecial)), + (0x103,("setmemsize", 1,1,0, OSpecial)), + (0x104,("jumpabs", 1,0,0, OSpecial)), + (0x110,("random", 1,1,0, OSpecial)), + (0x111,("setrandom", 1,0,0, OSpecial)), + (0x120,("quit", 0,0,0, OSpecial)), + (0x121,("verify", 0,1,0, OSpecial)), + (0x122,("restart", 0,0,0, OSpecial)), + (0x123,("save", 1,1,0, OSpecial)), + (0x124,("restore", 1,1,0, OSpecial)), + (0x125,("saveundo", 0,1,0, OSpecial)), + (0x126,("restoreundo",0,1,0, OSpecial)), + (0x127,("protect", 2,0,0, OSpecial)), + (0x130,("glk", 2,1,0, OGlk)), + (0x140,("getstringtbl",0,1,0, OSpecial)), + (0x141,("setstringtbl",1,0,0, OSpecial)), + (0x148,("getiosys", 0,2,0, OSpecial)), + (0x149,("setiosys", 2,0,0, OSpecial)), + (0x150,("linearsearch",7,1,0, OSpecial)), + (0x151,("binarysearch",7,1,0, OSpecial)), + (0x152,("linkedsearch",6,1,0, OSpecial)), + (0x160,("callf", 1,1,0, OCallI)), + (0x161,("callfi", 2,1,0, OCallI)), + (0x162,("callfii", 3,1,0, OCallI)), + (0x163,("callfiii", 4,1,0, OCallI)), + (0x170,("mzero", 2,0,0, OSpecial)), + (0x171,("mcopy", 3,0,0, OSpecial)), + (0x178,("malloc", 1,1,0, OSpecial)), + (0x179,("mfree", 1,0,0, OSpecial)), + (0x180,("accelfunc", 2,0,0, OSpecial)), + (0x181,("accelparam", 2,0,0, OSpecial)), + (0x190,("numtof", 1,1,0, OSpecial)), + (0x191,("ftonumz", 1,1,0, OSpecial)), + (0x192,("ftonumn", 1,1,0, OSpecial)), + (0x198,("ceil", 1,1,0, OSpecial)), + (0x199,("floor", 1,1,0, OSpecial)), + (0x1A0,("fadd", 2,1,0, OBinary (NormalOp " + " 5))), + (0x1A1,("fsub", 2,1,0, OBinary (NormalOp " - " 5))), + (0x1A2,("fmul", 2,1,0, OBinary (NormalOp " * " 6))), + (0x1A3,("fdiv", 2,1,0, OBinary (NormalOp " / " 6))), + (0x1A4,("fmod", 2,2,0, OBinary (NormalOp " % " 6))), + (0x1A8,("sqrt", 1,1,0, OSpecial)), + (0x1A9,("exp", 1,1,0, OSpecial)), + (0x1AA,("log", 1,1,0, OSpecial)), + (0x1AB,("pow", 2,1,0, OSpecial)), + (0x1B0,("sin", 1,1,0, OSpecial)), + (0x1B1,("cos", 1,1,0, OSpecial)), + (0x1B2,("tan", 1,1,0, OSpecial)), + (0x1B3,("asin", 1,1,0, OSpecial)), + (0x1B4,("acos", 1,1,0, OSpecial)), + (0x1B5,("atan", 1,1,0, OSpecial)), + (0x1B6,("atan2", 2,1,0, OSpecial)), + (0x1C0,("jfeq", 3,0,1, OSpecial)), + (0x1C1,("jfne", 3,0,1, OSpecial)), + (0x1C2,("jflt", 2,0,1, OJCond binopLT)), + (0x1C3,("jfle", 2,0,1, OJCond binopLE)), + (0x1C4,("jfgt", 2,0,1, OJCond binopGT)), + (0x1C5,("jfge", 2,0,1, OJCond binopGE)), + (0x1C8,("jisnan", 1,0,1, OSpecial)), + (0x1C9,("jisinf", 1,0,1, OSpecial)), +-- Support the FyreVM specific opcode based on https://github.com/ChicagoDave/fyrevm-dotnet/blob/master/Opcodes.cs. 0x1000 to 0x10FF were reserved for this usage in version 3.1.2 of the Glulx specification. + (0x1000,("fyrecall", 3,1,0, OSpecial)), +-- Support the @parchment extension by Dannii Willis based on draft specification here http://curiousdannii.github.io/if/op-parchment.html which officially took this opcode in version 3.1.2 of the Glulx specification. 0x1100 to 0x11FF are reserved for his usage. + (0x1110,("parchment", 2,1,0, OSpecial)), +-- Support Andrew Plotkin's iOS extensions... 0x1200 to 0x12FF are reserved for this purpose in version 3.1.2 but any specification or code relating to these doesn't seem to be public. +-- Support Git specific opcodes based on https://github.com/DavidKinder/Git/blob/master/opcodes.c and https://github.com/DavidKinder/Git/blob/master/opcodes.h, 0x7900 to 0x79FF are reserved for these as of 1.3.2 + (0x7940,("setcacheram", 1,0,0, OSpecial)), + (0x7941,("prunecache", 2,0,0, OSpecial))] \ No newline at end of file diff --git a/Mrifk_grammar.hs b/Mrifk_grammar.hs new file mode 100644 index 0000000..59bb363 --- /dev/null +++ b/Mrifk_grammar.hs @@ -0,0 +1,200 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_grammar ( + GrammarLine(..), GrammarToken(..), + verbs, actionRoutines, + DictWordTag(..), + dictionary, + dictWordAt, isDictWord +) where + + +import Mrifk_storyfile +import Mrifk_memmap + + +import Data.Char (chr) +import Control.Monad +import Data.Array +import Data.Bits +import Data.Maybe (isJust) +import Numeric (showHex) + + +-- words after verb action reverse +data GrammarLine = GrammarLine [GrammarToken] Int Bool + deriving Show + + +verbs :: [([String],[GrammarLine])] + +verbs = reverse (zip verbWords grammarEntries) + +verbWords = + [[x | (x,_,tags) <- elems dictionary, DictVerb n <- tags, n == n'] + | n' <- [255,254..1]] + + +grammarEntries :: [[GrammarLine]] +actionRoutines :: [Int] +dictionary :: Array Int (String,String,[DictWordTag]) +numEntries,entryBaseAddr,entryLength :: Int + +(grammarEntries, actionRoutines, + (dictionary,numEntries,entryBaseAddr,entryLength)) = + evalFrom informGrammarTable parseGV2 + + +data GrammarToken = ElementaryToken String | Preposition String + | Attribute Int | ParseRoutine String Int + | Alternatives [GrammarToken] + deriving (Show,Eq,Ord) + + +{-----------} + + +parseGV2 = + do numVerbs <- getDword + verbGrammars <- replicateM numVerbs getDword + entries <- mapM parseGV2Verb verbGrammars + numActionRoutines <- getDword + actionRoutines <- replicateM numActionRoutines getDword + dictionary <- parseDictionary + return (entries, actionRoutines, dictionary) + +parseGV2Verb addr = + do pos <- getPos + if pos == addr then + do numLines <- getUByte + replicateM numLines parseGV2Line + else + error ("Grammar table not contiguous: next pointer is 0x" ++ showHex addr (", but expected 0x" ++ showHex pos "")) + +parseGV2Line = do + actionNum <- getUWord + flags <- getUByte + let reverseParams = testBit flags 0 + tokens <- parseGV2Tokens + return $ GrammarLine (groupGV2Tokens tokens) actionNum reverseParams + +parseGV2Tokens = do + tokenType <- getUByte + if tokenType == 15 + then return [] + else do + tokenData <- getDword + tokens <- parseGV2Tokens + return ((tokenType .&. 0x30,parseGV2Token (tokenType .&. 15) tokenData) : tokens) + +parseGV2Token 1 d = ElementaryToken (elementaryTokenTypes !! d) +parseGV2Token 2 d = Preposition (dictWordAt d) +parseGV2Token 3 d = ParseRoutine "noun=" d +parseGV2Token 4 d = Attribute d +parseGV2Token 5 d = ParseRoutine "scope=" d +parseGV2Token 6 d = ParseRoutine "" d + + +elementaryTokenTypes = + ["noun", "held", "multi", "multiheld", "multiexcept", + "multiinside", "creature", "special", "number", "topic"] + + +groupGV2Tokens ((0,x):rest) = + x : groupGV2Tokens rest +groupGV2Tokens ((32,x):rest) = + Alternatives (x : map snd xs) : groupGV2Tokens rest' + where (xs,rest') = break (\(n,_) -> not (testBit n 4)) rest +groupGV2Tokens ((n,x):rest) = error (show n) +groupGV2Tokens [] = [] + + +{---------------} + + +-- Dictionary + + +data DictWordTag + = DictVerb Int | DictPrep + | DictNoun | DictPlural | DictMeta + | DictMaybeTruncated + deriving (Show,Eq) + + +parseDictionary = do + numWords <- getDword + pos <- getPos + case filter (possibleDictLength pos numWords) [8..100] of + [] -> error "Unable to determine dictionary word length" + (len:_) -> + do words <- replicateM numWords (getDictWord len) + return (listArray (0,numWords-1) words, numWords, pos, len) + +getDictWord len = + do _ <- getUByte -- 0x60, already checked + wordBytes <- replicateM (len-7) getUByte + let word = map chr (takeWhile (/= 0) wordBytes) + truncated = if last wordBytes == 0 then [] else [DictMaybeTruncated] + flags <- getUWord + verbNum <- getUWord + unused <- getUWord + let tags = truncated ++ parseFlags flags verbNum + return (mungeDictWord word tags, word, tags) + + +parseFlags flags verbNum = + (if testBit flags 0 then [DictVerb verbNum] else []) + ++ (if testBit flags 3 then [DictPrep] else []) + ++ (if testBit flags 7 then [DictNoun] else []) + ++ (if testBit flags 2 then [DictPlural] else []) + ++ (if testBit flags 1 then [DictMeta] else []) + + +mungeDictWord word tags = word' ++ attr + where word' = {-sfFullDictWord-} word + attr | '/' `elem` word' = "" + | DictPlural `elem` tags = "//p" + | length word == 1 = "//" + | otherwise = "" + + +possibleDictLength pos numWords len = + (pos + numWords * len <= snd wholeFile) + && all (== 0x60) (map byteAt (take numWords [pos,pos+len..])) + + +dictWordAt :: Int -> String + +dictWordAt addr = + case addrToDictWordIndex addr of + Just n -> case dictionary!n of (word,_,_) -> word + Nothing -> "unknownDictWord" ++ show addr + + +addrToDictWordIndex addr + | addr < entryBaseAddr = Nothing + | otherwise = + case (addr - entryBaseAddr) `divMod` entryLength of + (d,0) -> if d < numEntries then Just d else Nothing + (_,_) -> Nothing + + +isDictWord addr = isJust (addrToDictWordIndex addr) diff --git a/Mrifk_memmap.hs b/Mrifk_memmap.hs new file mode 100644 index 0000000..4aee2a3 --- /dev/null +++ b/Mrifk_memmap.hs @@ -0,0 +1,198 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_memmap ( + informCode, informObjectTable, informGrammarTable, + numAttribBytes, + commonPropNames, indivPropNames, + attribNames, actionNames, arrayNames +) where + + +import Mrifk_storyfile +import Mrifk_strings + +import Data.Array (Array,listArray) +import Data.Bits (shiftR) +import Data.Maybe (mapMaybe) +import Numeric (showHex) + + +{- + +Here I use various heuristics to try to figure out +the addresses of Inform data structures. + +This is the Glulx memory layout generated by Inform: + + +---------------------+ 000000 +Read- | header | + only +=====================+ 000024 +memory | memory layout id | + +---------------------+ hdrStartFunc + | code | + +---------------------+ hdrDecodingTbl + | string decode table | + + - - - - - - - - - - + + | strings | + +=====================+ hdrRAMStart +Dynamic | global variables | +memory + - - - - - - - - - - + + | arrays | + +---------------------+ + | printing variables | + +---------------------+ + | objects | + + - - - - - - - - - - + + | property values | + + - - - - - - - - - - + + | property defaults | + + - - - - - - - - - - + + | class numbers table | + + - - - - - - - - - - + + | id names table | + +=====================+ +Readable| grammar table | +memory + - - - - - - - - - - + + | actions | + +---------------------+ + | dictionary | + +---------------------+ hdrExtStart + +globals: array of longs, no length info +arrays: raw data, no length info (sigh...) +printing variables: long count, followed by count*long absolute string addresses + should be able to get addr from string decode table +objects, property values: see technical.txt +property defaults: array of longs, length is count of common properties below +class numbers: array of long absolute ptrs to objects, 0-terminated +id names table: + header of 8 longs: + absaddr,count of common properties + absaddr,count of individual properties (#s starting with 256) + absaddr,count of attributes + absaddr,count of actions + (note: addr of common props is always just past the header, + addr of indiv props just past common, etc.) + count+count+count+count long (possibly null) abs ptrs to strings + count, string ptrs for array names (but no array addresses???) + +I try to find the id names table first. The grammar table follows it. +The object table can be found by pattern (using the known count of attributes) + +-} + + +indivPropStart = 256 -- FIXME: make this variable? + + +informCode = fromTo hdrStartFunc hdrDecodingTbl + + +informIdNamesTable :: Int + +commonPropNames, indivPropNames, attribNames, actionNames, arrayNames :: Array Int (Maybe String) + +(informIdNamesTable, informGrammarTable, numAttribBytes, + commonPropNames, indivPropNames, attribNames, actionNames, arrayNames) = + case possibleIdNamesTables of + [x] -> x + [] -> error "No identifier names table found (not compiled with Inform 6.21?)" + xs -> error ("More than one candidate for identifier names table. File offsets:\n" + ++ concat ['\t' : showHex addr "\n" | (addr,_,_,_,_,_,_,_) <- xs]) + + +possibleIdNamesTables = + mapMaybe maybeIdNamesTableAt [hdrRAMStart .. hdrExtStart-32] + +maybeIdNamesTableAt addr = + if addrCommon == expectCommon && addrIndiv == expectIndiv + && addrAttrib == expectAttrib && addrAction == expectAction + && all isStringPtr (dwordsFromTo expectCommon expectArray) + && all isStringPtr (dwordsFromTo addrArray addrArrayEnd) + then + Just (addr, addrArrayEnd, (numAttrib + 7) `shiftR` 3, + nameTable addrCommon numCommon 0, + nameTable addrIndiv numIndiv indivPropStart, + nameTable addrAttrib numAttrib 0, + nameTable addrAction numAction 0, + nameTable addrArray numArray 0) + else + Nothing + + where + addrCommon = dwordAt addr + numCommon = dwordAt (addr+4) + addrIndiv = dwordAt (addr+8) + numIndiv = dwordAt (addr+12) + addrAttrib = dwordAt (addr+16) + numAttrib = dwordAt (addr+20) + addrAction = dwordAt (addr+24) + numAction = dwordAt (addr+28) + expectCommon = addr + 32 + expectIndiv = expectCommon + 4 * numCommon + expectAttrib = expectIndiv + 4 * numIndiv + expectAction = expectAttrib + 4 * numAttrib + expectArray = expectAction + 4 * numAction + numArray = dwordAt expectArray + addrArray = expectArray + 4 + addrArrayEnd = addrArray + 4 * numArray + +nameTable addr count base = + listArray (base,base+count-1) + [maybeStringAt (dwordAt (addr + n * 4)) | n <- [0..count-1]] + + +{-----------} + + +-- take the earliest match for the object table, because the list +-- starting with any subsequent object also looks like a valid +-- object table + +informObjectTable = + case mapMaybe couldBeObjectTable [hdrRAMStart .. informIdNamesTable-25] of + [] -> fromTo 0 0 + ((a,b):_) -> fromTo a b + +couldBeObjectTable addr = + if byteAt addr == 0x70 && isStringPtr (dwordAt (addr+numAttribBytes+5)) then + if dwordAt (addr+numAttribBytes+1) == 0 then + Just (addr,expectNextAddr) + else if dwordAt (addr+numAttribBytes+1) == expectNextAddr then + case couldBeObjectTable expectNextAddr of + Just (from,to) -> Just (addr,to) + Nothing -> Nothing + else + Nothing + else + Nothing + where expectNextAddr = addr + numAttribBytes + 25 + + +{-----------} + + +isStringPtr p = + p == 0 || (p >= hdrDecodingTbl && p < hdrExtStart && byteAt p `elem` [0xE0,0xE1,0xE2]) + +maybeStringAt 0 = Nothing +maybeStringAt addr = Just (evalFrom addr decodeString) + +dwordsFromTo n k = evalFromTo n k (repeatUntilEmpty getDword) diff --git a/Mrifk_objects.hs b/Mrifk_objects.hs new file mode 100644 index 0000000..51fcef1 --- /dev/null +++ b/Mrifk_objects.hs @@ -0,0 +1,101 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_objects ( + Object(..), + objects, numObjects, objectForest +) where + + +import Mrifk_strings +import Mrifk_storyfile +import Mrifk_memmap +import Mrifk_util + +import Control.Monad (replicateM) +import Control.Monad.State (evalState) +import Data.Array +import Data.Bits (testBit) +import Data.Tree +import Data.Maybe (fromJust) + + +-- name attrs prop# priv data +data Object = Object String [Int] [(Int,Bool,[Int])] + +type Object' = ((Int,Object),Int,Int,Int) + + +objectForest :: Forest Int + +objectForest = + map treeFrom roots + where parents = makeLookupTable [(a,p) | ((a,_),p,_,_) <- objectList] + siblings = makeLookupTable [(a,s) | ((a,_),_,s,_) <- objectList] + children = makeLookupTable [(a,c) | ((a,_),_,_,c) <- objectList] + roots = [a | ((a,_),0,_,_) <- objectList] + treeFrom n = + let kids = takeWhile (/= 0) $ + iterate (fromJust . flip tableLookup siblings) + (fromJust $ tableLookup n children) + in Node n (map treeFrom kids) + + +objects :: [(Int,Object)] +objects = [o | (o,_,_,_) <- objectList] + +numObjects :: Int +numObjects = length objectList + + +objectList :: [Object'] +objectList = + evalState (repeatUntilEmpty getObject) informObjectTable + +getObject = + do addr <- getPos + typeByte <- getUByte -- we already verified this is 0x70 when searching for the object table + attribBytes <- replicateM numAttribBytes getUByte + nextAddr <- getDword + nameAddr <- getDword + propAddr <- getDword + parent <- getDword + sibling <- getDword + child <- getDword + let name = evalFrom nameAddr decodeString + attribs = decodeAttribsFrom 0 attribBytes + props = evalFrom propAddr getProps + return ((addr, Object name attribs props), parent, sibling, child) + +decodeAttribsFrom n [] = [] +decodeAttribsFrom n (b:bs) = + [n+i | i <- [0..7], testBit b i] ++ decodeAttribsFrom (n+8) bs + +getProps = + do numProps <- getDword + replicateM numProps getProp + +getProp = + do id <- getUWord + len <- getUWord + addr <- getDword + flags <- getUWord + let private = testBit flags 0 + data_ = evalFrom addr (replicateM len getDword) + return (id,private,data_) diff --git a/Mrifk_print.hs b/Mrifk_print.hs new file mode 100644 index 0000000..924653f --- /dev/null +++ b/Mrifk_print.hs @@ -0,0 +1,362 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_print ( + ppRoutine, ppObject, ppQuotedString, ppVerbs +) where + + +import Mrifk_code +import Mrifk_grammar +import Mrifk_objects +import Mrifk_memmap +import Mrifk_storyfile +import Mrifk_strings +import Mrifk_util + +import Data.Char (ord,isDigit) +import Data.Array +import Data.Ix (inRange) +import Data.Maybe (fromMaybe) +import Numeric (showHex) + + +{--------------} + + +ppRoutine (addr,type_,params,body) = + (if type_ == 193 then [] else ["! Stack-parameter routine"]) + ++ (if all (== 4) params + then [] + else ["! Local lengths: " ++ unwords [show n | n <- params]]) + ++ (unwords ("[" : nameRoutine addr : [nameLocal n | n <- scanl (+) 0 params | _ <- params] ++ [";"])) + : indentBlock (ppStatements body) + ++ ["];",""] + + +ppStatements x@(Print _ _ : _) = + let (prints,rest) = break (not.isPrint) x + (isRet,rest') = case rest of + (NewLine : Eval "return " (Imm 1) : rest') -> (True,rest') + _ -> (False,rest) + in (ppPrints prints isRet ++ ";") : ppStatements rest' + +ppStatements (x : xs) = ppStatement x ++ ppStatements xs + +ppStatements [] = [] + + +ppPrints [Print "" s@(ImmString _)] True = + ppExpr 0 s + +ppPrints prints isRet = + let prefix = if isRet then "print_ret " else "print " + args = map ppPrint prints + in prefix ++ join ", " args + +isPrint (Print _ _) = True +isPrint _ = False +ppPrint (Print prefix val) = prefix ++ ppExpr 1 val + + +ppStatement (Label addr Phantom) = [] +ppStatement (Label addr _) = ["<." ++ nameLabel addr ++ ":"] + +ppStatement (GInstr (name,loads,stores,branches,_) args) = + let prefixes = replicate loads "" ++ replicate stores "-> " ++ replicate branches "label" + printableArgs = zipWith (++) prefixes (map (ppExpr 99) args) + in [join " " (('@' : name) : printableArgs) ++ ";"] + +ppStatement (Eval cmd expr) = [cmd ++ ppExpr 0 expr ++ ";"] + +ppStatement (Push expr) = ["{{{PUSH}}} " ++ ppExpr 0 expr ++ ";"] + +ppStatement NewLine = ["new_line;"] + +ppStatement (IfThenElse cond thenClause []) = + ("if (" ++ ppExpr 0 cond ++ ") {") : indentBlock (ppStatements thenClause) ++ ["}"] + +ppStatement (IfThenElse cond thenClause elseClause) = + let elseClauseText = ppStatements elseClause + elseText = case elseClauseText of + (line@('i':'f':' ':_) : lines@(_:_)) -> ("} else " ++ line) : lines + _ -> "} else {" : indentBlock elseClauseText ++ ["}"] + in ("if (" ++ ppExpr 0 cond ++ ") {") : indentBlock (ppStatements thenClause) ++ elseText + +ppStatement (JCond cond label) = + ["if (" ++ ppExpr 0 cond ++ ") " ++ ppJump label ++ ";"] + +ppStatement (Jump label) = + [ppJump label ++ ";"] + +ppStatement (Give obj b attr) = + ["give " ++ ppExpr 0 obj ++ (if b then " " else " ~") ++ nameAttr attr ++ ";"] + +ppStatement x = ["{{{" ++ show x ++ "}}}"] + + +ppJump 0 = "rfalse" +ppJump 1 = "rtrue" +ppJump label = "jump " ++ nameLabel label + + +ppExpr prec (Assign dst src) = + parenIf (1 <= prec) (ppExpr 1 dst ++ " = " ++ ppExpr 0 src) + +ppExpr prec (Call func args) = + parenIf (11 <= prec) (ppExpr 10 func ++ "(" ++ join "," (map (ppExpr 0) args) ++ ")") + +ppExpr prec (Binary left op right) = + let p = binopPrec op in + parenIf (p <= prec) (ppExpr (p-1) left ++ binopName op ++ ppExpr p right) + +ppExpr prec (Unary op opPrec expr) = + parenIf (opPrec <= prec) (op ++ ppExpr (opPrec-1) expr) + +ppExpr prec (PostIncDec expr op) = + parenIf (9 <= prec) (ppExpr 8 expr ++ op) + +ppExpr prec SP = "sp" +ppExpr prec (Imm n) = ppValue n +ppExpr prec (ImmString n) = ppQuotedString (evalFrom n decodeString) -- FIXME: quote +ppExpr prec (Mem n) = "mem" ++ show n +ppExpr prec (Local n) = nameLocal n +ppExpr prec (SpecialName x) = x + +-- ppExpr prec foo = "{{{" ++ show foo ++ "}}}" + + +{--------------} + + +ppObject treeDepth n (Object name attribs props) = + "" : firstLine : indentWith (map ppProp props) + ++ [" has\t" ++ ppAttribs attribs ++ ";"] + where + firstLine = "Object" ++ take (3*treeDepth+1) (cycle " ->") + ++ nameObject n ++ ' ' : ppQuotedString name + + +ppProp (n, private, vals) = + unwords (nameProp n : map ppValue vals) ++ ";" + + +ppAttribs attribs = + join " " [nameAttr attr | attr <- attribs] + +indentWith [] = [] +indentWith (firstLine : lines) = + (" with\t" ++ firstLine) : map ('\t' :) lines + + +{--------------} + + +-- FIXME: leaves something to be desired + +ppValue val + | val <= 16 || not (inRange wholeFile val) + = show val -- FIXME: negatives? + | otherwise + = case byteAt val of + 0x70 -> nameObject val + 0x60 -> ppDictWord (dictWordAt val) + 0xC0 -> nameRoutine val + 0xC1 -> nameRoutine val + 0xE0 -> ppQuotedString (evalFrom val decodeString) + 0xE1 -> ppQuotedString (evalFrom val decodeString) + _ -> show val + + +{--------------} + + +ppQuotedString s = '"' : ppString s ++ "\"" + +ppString "" = "" +ppString (x : rest) + | x == '\10' = '^' : ppString rest + | x == '"' = '~' : ppString rest + | x <= '\x153' = (informEscapes ! x) ++ ppString rest + | otherwise = informEscapeChar x (ppString rest) + +ppString' "" = "" +ppString' (x : rest) + | x == '\'' = '^' : ppString' rest + | x == '"' = '"' : ppString' rest + | x <= '\x153' = (informEscapes ! x) ++ ppString' rest + | otherwise = informEscapeChar x (ppString' rest) + + +-- correct behavior of showHex requires GHC 6.2 +informEscapeChar x rest = + "@{" ++ showHex (ord x) ('}' : rest) + + +informEscapes :: Array Char String + +informEscapes = + accumArray (\a b -> b) undefined ('\0','\x153') $ + [(x,[x]) | x <- ['\32'..'\126']] + ++ [(x,informEscapeChar x "") | x <- ['\0'..'\31'] ++ "^~@\"" ++ ['\127'..'\x153']] + ++ informSpecialEscapes + + +informSpecialEscapes :: [(Char,String)] +informSpecialEscapes = + [('\xE4',"@:a"), ('\xF6',"@:o"), ('\xFC',"@:u"), + ('\xC4',"@:A"), ('\xD6',"@:O"), ('\xDC',"@:U"), + ('\xDF',"@ss"), ('\xAB',"@>>"), ('\xBB',"@<<"), + ('\xEB',"@:e"), ('\xEF',"@:i"), ('\xFF',"@:y"), + ('\xCB',"@:E"), ('\xCF',"@:I"), ('\xE1',"@'a"), + ('\xE9',"@'e"), ('\xED',"@'i"), ('\xF3',"@'o"), + ('\xFA',"@'u"), ('\xFD',"@'y"), ('\xC1',"@'A"), + ('\xC9',"@'E"), ('\xCD',"@'I"), ('\xD3',"@'O"), + ('\xDA',"@'U"), ('\xDD',"@'Y"), ('\xE0',"@`a"), + ('\xE8',"@`e"), ('\xEC',"@`i"), ('\xF2',"@`o"), + ('\xF9',"@`u"), ('\xC0',"@`A"), ('\xC8',"@`E"), + ('\xCC',"@`I"), ('\xD2',"@`O"), ('\xD9',"@`U"), + ('\xE2',"@^a"), ('\xEA',"@^e"), + ('\xEE',"@^i"), ('\xF4',"@^o"), + ('\xFB',"@^u"), ('\xC2',"@^A"), + ('\xCA',"@^E"), ('\xCE',"@^I"), + ('\xD4',"@^O"), ('\xDB',"@^U"), + ('\xE5',"@oa"), ('\xC5',"@oA"), + ('\xF8',"@/o"), ('\xD8',"@/O"), + ('\xE3',"@~a"), ('\xF1',"@~n"), ('\xF5',"@~o"), + ('\xC3',"@~A"), ('\xD1',"@~N"), ('\xD5',"@~O"), + ('\xE6',"@ae"), ('\xC6',"@AE"), + ('\xE7',"@cc"), ('\xC7',"@cC"), + ('\xFE',"@th"), ('\xF0',"@et"), ('\xDE',"@Th"), ('\xD0',"@Et"), + ('\xA3',"@LL"), + ('\x0153',"@oe"), ('\x0152',"@OE"), + ('\xA1',"@!!"), ('\xBF',"@??")] + + +{---------------} + + +ppVerbs :: [([String],[GrammarLine])] -> [String] + +ppVerbs = concatMap ppVerb + +ppVerb (verbs,grammars) = + -- FIXME: check meta + "" : unwords ("Verb" : map ppDictWord verbs) + : map ppVerbGrammarLine grammars ++ [";"] + +ppVerbGrammarLine (GrammarLine tokens action reverse) = + tab 7 (" * " ++ unwords (map ppVerbToken tokens)) + ("-> " ++ nameAction action ++ if reverse then " reverse" else "") + +-- FIXME +ppVerbToken (ElementaryToken s) = s +ppVerbToken (Preposition p) = ppDictWord p +ppVerbToken (Attribute n) = nameAttr n +ppVerbToken (ParseRoutine prefix n) = prefix ++ nameRoutine n +ppVerbToken (Alternatives tokens) = + join "/" (map ppVerbToken tokens) + + +ppDictWord w = '\'' : ppString' w ++ "'" + + +{--------------} + + +parenIf True x = '(' : x ++ ")" +parenIf False x = x + + +nameObject :: Int -> String +nameObject 0 = "nothing" +nameObject n = + case tableLookup n objectNames of + Just name -> name + Nothing -> "unknownObj" ++ show n + +objectNames = + makeLookupTable $ + -- earlier in the list takes precedence + map guessObjName objects + +guessObjName (n,obj) = + case obj of + Object "" _ _ -> (n,baseName) + Object desc _ _ -> (n,baseName ++ '_' : makeIdent desc) + where baseName = "obj" ++ show n + +makeIdent name = + map helper name + where helper c = if isLegalIdentChar c then c else '_' + +isLegalIdentChar c = + inRange ('0','9') c || inRange ('A','Z') c || inRange ('a','z') c + + +nameProp n = + maybeIdx commonPropNames n $ + maybeIdx indivPropNames n $ + "prop" ++ show n + +nameAttr n = + maybeIdx attribNames n $ + "attr" ++ show n + +nameAction n = + maybeIdx actionNames n $ + "Action" ++ show n + +nameRoutine :: Int -> String +nameRoutine addr = + fromMaybe ("routine" ++ show addr) + (addr `tableLookup` routineNames) + +routineNames = + makeLookupTable $ + -- earlier in the list takes precedence + [(addr, nameAction n ++ "Sub") + | addr <- actionRoutines | n <- [0..]] + +nameLocal n = "local" ++ show n + +nameLabel n = "label" ++ show n + + +maybeIdx array n def + | inRange (bounds array) n = fromMaybe def (array ! n) + | otherwise = def + + +indentBlock = map indentBlock' + +indentBlock' ('<':rest) = ' ':' ':rest +indentBlock' rest = ' ':' ':' ':' ':rest + + +join sep [] = [] +join sep xs = foldr1 (\x y -> x ++ sep ++ y) xs + + +tab :: Int -> String -> String -> String +tab n x y = + x ++ makeTabs (n - length x `div` 8) ++ y + +makeTabs n | n <= 0 = " " + | otherwise = replicate n '\t' diff --git a/Mrifk_storyfile.hs b/Mrifk_storyfile.hs new file mode 100644 index 0000000..aa63db2 --- /dev/null +++ b/Mrifk_storyfile.hs @@ -0,0 +1,177 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_storyfile ( + wholeFile, + byteAt, dwordAt, bytesFrom, + getPos, isEOS, peekUByte, getUByte, getBytes, + getSByte, getUWord, getSWord, getDword, + repeatUntilEmpty, evalFrom, evalFromTo, fromTo, + hdrMagic, hdrVersion, hdrRAMStart, + hdrExtStart, hdrStartFunc, hdrDecodingTbl +) where + + +import qualified ReadBinary + +import Mrifk_util +import Mrifk_cmdline + +import System.IO.Unsafe (unsafePerformIO) +import Control.Monad.State (State,get,put,evalState) +import Foreign (plusPtr) + + +(inputFile,inputFileLen) = + handleBlorb $ unsafePerformIO $ + ReadBinary.readBinaryFile storyFileName + +byteAt n = ReadBinary.byteAt inputFile n + +wholeFile :: DataBlock +wholeFile = (0,inputFileLen) + +from :: Int -> DataBlock +from n = onFst (+n) wholeFile + +fromTo :: Int -> Int -> DataBlock +fromTo n k = (n,k) + +evalFrom n act = evalState act (from n) +evalFromTo n k act = evalState act (fromTo n k) + +dwordAt n = evalFrom n getDword + +bytesFrom n = evalFrom n (repeatUntilEmpty getUByte) + + +{-----------} + + +hdrMagic = dwordAt 0 -- "Glul" +hdrVersion = dwordAt 4 +hdrRAMStart = dwordAt 8 +hdrExtStart = dwordAt 12 +hdrEndMem = dwordAt 16 +hdrStackSize = dwordAt 20 +hdrStartFunc = dwordAt 24 +hdrDecodingTbl = dwordAt 28 + +hdrExtensionMagic = dwordAt 36 -- "Info" +hdrExtensionFormatVersion = dwordAt 40 +hdrExtensionInformVersion = dwordAt 44 + + +{-------------} + + +handleBlorb :: (ReadBinary.BinaryData, Int) + -> (ReadBinary.BinaryData, Int) + +handleBlorb (p,size) = + if dwordAt 0 /= 0x464F524D then + (p,size) + else if dwordAt 8 /= 0x49465253 || dwordAt 12 /= 0x52496478 then + error "Unrecognized blorb file format" + else + let numResources = dwordAt 16 + resources = take numResources + [(dwordAt n, dwordAt (n+8)) | n <- [24,36..]] + in case [pos | (0x45786563,pos) <- resources] of + [] -> error "No story file in blorb" + [pos] -> case dwordAt pos of + 0x5A434F44 -> error "This appears to be a Z-machine blorb. Try Reform." + 0x474C554C -> (p `plusPtr` (pos+8), dwordAt (pos+4)) + _ -> error "Unrecognized blorb file format" + _ -> error "More than one story file found. You'll have to extract one by hand." + where + byteAt n = ReadBinary.byteAt p n + dwordAt n = byteAt n * 16777216 + + byteAt (n+1) * 65536 + + byteAt (n+2) * 256 + + byteAt (n+3) + + +{----------} + + +type DataBlock = (Int,Int) + +type StreamReader a = State DataBlock a + +getPos :: StreamReader Int +getPos = + do (a,z) <- get + return a + +isEOS :: StreamReader Bool +isEOS = + do (a,z) <- get + return (a >= z) + +peekUByte :: StreamReader Int +peekUByte = + do (a,z) <- get + return (byteAt a) + +getUByte :: StreamReader Int +getUByte = + do (a,z) <- get + put (a+1,z) -- should probably bounds-check + return (byteAt a) + +getBytes :: Int -> StreamReader DataBlock +getBytes n = + do (a,z) <- get + put (a+n,z) -- definitely bounds-check + return (a,a+n) + +getSByte :: StreamReader Int +getSByte = + do x <- getUByte + return (if x < 128 then x else x - 256) + +getUWord :: StreamReader Int +getUWord = + do a <- getUByte + b <- getUByte + return (a*256+b) + +getSWord :: StreamReader Int +getSWord = + do a <- getSByte + b <- getUByte + return (a*256+b) + +getDword :: StreamReader Int +getDword = + do hi <- getSWord + lo <- getUWord + return (hi*65536+lo) + +repeatUntilEmpty :: StreamReader a -> StreamReader [a] +repeatUntilEmpty action = + do eos <- isEOS + if eos then + return [] + else do + first <- action + s <- get + let lazyRest = evalState (repeatUntilEmpty action) s + return (first : lazyRest) diff --git a/Mrifk_strings.hs b/Mrifk_strings.hs new file mode 100644 index 0000000..cad8e0a --- /dev/null +++ b/Mrifk_strings.hs @@ -0,0 +1,108 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_strings ( + decodeString, strings +) where + + +import Mrifk_storyfile + +import Data.Char (chr) +import Data.Bits (testBit) +import Control.Monad.State (evalState) + + +{------------------------------- Huffman table ----------------------------------} + +data HuffNode = + HuffBranch HuffNode HuffNode | + HuffLiteral String | HuffStop | + HuffIndir Int [Int] | HuffDoubleIndir Int [Int] + deriving Show + + +huffmanTree = huffmanSubtreeAt (dwordAt (hdrDecodingTbl+8)) + +huffmanSubtreeAt n = + case byteAt n of + 0 -> HuffBranch (huffmanSubtreeAt (dwordAt (n+1))) + (huffmanSubtreeAt (dwordAt (n+5))) + 1 -> HuffStop + 2 -> HuffLiteral [chr (byteAt (n+1))] + 3 -> HuffLiteral (map chr (takeWhile (/= 0) (bytesFrom (n+1)))) + 8 -> HuffIndir (dwordAt (n+1)) [] + 9 -> HuffDoubleIndir (dwordAt (n+1)) [] + 10 -> HuffIndir (dwordAt (n+1)) (take (dwordAt (n+5)) (dwordsFrom (n+9))) + 11 -> HuffDoubleIndir (dwordAt (n+1)) (take (dwordAt (n+5)) (dwordsFrom (n+9))) + + +dwordsFrom n = evalFrom n (repeatUntilEmpty getDword) + + +-- I do this in the monad so that I can find where +-- the string ends by using getPos afterwards + +huffDecode = huffDecode' huffmanTree [] + +huffDecode' branch@(HuffBranch _ _) [] = + do bits <- getUByte + huffDecode' branch (map (testBit bits) [0..7]) + +huffDecode' branch@(HuffBranch zero one) (bit:bits) = + huffDecode' (if bit then one else zero) bits + +huffDecode' (HuffLiteral s) bits = + do rest <- huffDecode' huffmanTree bits + return (s ++ rest) + +huffDecode' HuffStop bits = return [] + + +{------------------------------- ----------------------------------} + + +strings :: [(Int,String)] + +strings = evalState decodeStrings (hdrDecodingTbl + dwordAt hdrDecodingTbl, hdrRAMStart) + + +decodeStrings = + do eos <- isEOS + if eos then return [] else do + type_ <- peekUByte + if type_ < 0xE0 then return [] else do + pos <- getPos + s <- decodeString + ss <- decodeStrings + return ((pos,s):ss) + + +decodeString = + do type_ <- getUByte + case type_ of + 0xE0 -> getCString + 0xE1 -> huffDecode + 0xE2 -> getCString + +getCString = + do x <- getUByte + if x == 0 then return [] + else do rest <- getCString + return (chr x : rest) diff --git a/Mrifk_util.hs b/Mrifk_util.hs new file mode 100644 index 0000000..9522bfc --- /dev/null +++ b/Mrifk_util.hs @@ -0,0 +1,61 @@ +{- +Mrifk, a decompiler for Glulx story files. +Copyright 2004 Ben Rudiak-Gould. + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You can read the GNU General Public License at this URL: + http://www.gnu.org/copyleft/gpl.html +-} + + +module Mrifk_util ( + makeLookupTable, tableLookup, + sortFst, uniqBy, + onFst +) where + + +import Data.List (sortBy) + + +onFst f (a,b) = (f a,b) + + +makeLookupTable :: (Ord a) => [(a,b)] -> LookupTable a b +tableLookup :: (Ord a) => a -> LookupTable a b -> Maybe b + +data LookupTable a b = + LookupTableBranch (LookupTable a b) a b (LookupTable a b) | LookupTableLeaf + +makeLookupTable = makeLookupTable' . uniqFst . sortFst + +makeLookupTable' [] = LookupTableLeaf +makeLookupTable' x = + let (left,((p,q):right)) = splitAt (length x `div` 2) x + in LookupTableBranch (makeLookupTable' left) p q (makeLookupTable' right) + +tableLookup x LookupTableLeaf = Nothing +tableLookup x (LookupTableBranch left x' y right) = + case x `compare` x' of + EQ -> Just y + LT -> tableLookup x left + GT -> tableLookup x right + + +uniqBy eq (x:xs) = x : uniqBy eq (dropWhile (eq x) xs) +uniqBy eq [] = [] + +uniqFst :: (Eq a) => [(a,b)] -> [(a,b)] +uniqFst = uniqBy (\(a,_) (b,_) -> a == b) + +sortFst :: (Ord a) => [(a,b)] -> [(a,b)] +sortFst = sortBy (\(a,_) (b,_) -> a `compare` b) diff --git a/ReadBinary.hs b/ReadBinary.hs new file mode 100644 index 0000000..3829061 --- /dev/null +++ b/ReadBinary.hs @@ -0,0 +1,47 @@ +module ReadBinary ( + BinaryData, + readBinaryFile, + byteAt +) where + +import System.IO.Unsafe (unsafePerformIO) +import Foreign (peekElemOff) +import Foreign.Marshal.Alloc (mallocBytes) +import Foreign.Ptr (Ptr,nullPtr) +import Foreign.C.Types (CUChar,CInt(..),CLong(..),CSize(..),CFile) +import Foreign.C.String (CString,withCString) + + +type BinaryData = Ptr CUChar +readBinaryFile :: String -> IO (BinaryData,Int) +byteAt :: BinaryData -> Int -> Int + + +foreign import ccall "stdio.h fopen" fopen :: CString -> CString -> IO (Ptr CFile) +foreign import ccall "stdio.h fseek" fseek :: Ptr CFile -> CLong -> CInt -> IO CInt +foreign import ccall "stdio.h ftell" ftell :: Ptr CFile -> IO CLong +foreign import ccall "stdio.h fread" fread :: Ptr a -> CSize -> CSize -> Ptr CFile -> IO CSize +foreign import ccall "stdio.h fclose" fclose :: Ptr CFile -> IO CInt + +fopen' name access = + withCString name (\n -> + withCString access (\a -> + do f <- fopen n a + if f == nullPtr + then ioError (userError ("Unable to open the file " ++ name)) + else return f)) + + +readBinaryFile name = do + f <- fopen' name "rb" + fseek f 0 2 -- seek to end of file + len <- ftell f + fseek f 0 0 -- seek back to beginning + buf <- mallocBytes (fromIntegral len) + fread buf 1 (fromIntegral len) f + fclose f + return (buf, fromIntegral len) + + +{-# INLINE byteAt #-} +buf `byteAt` ofs = fromEnum (unsafePerformIO (peekElemOff buf ofs))