diff --git a/decompile/src/JSRF/Core.hpp b/decompile/src/JSRF/Core.hpp index 838a2fb..90be3fe 100644 --- a/decompile/src/JSRF/Core.hpp +++ b/decompile/src/JSRF/Core.hpp @@ -215,7 +215,7 @@ enum GlobalIndex { }; #pragma pack(4) -extern struct Game { +struct Game { char unknown0x4[4]; unsigned unknown0x8; unsigned * unknown0xC; @@ -350,7 +350,8 @@ extern struct Game { void draw(); void frame(); int mainLoop(); -} * g_game; +}; +extern Game * g_game; // Root of the exec GameObj tree struct RootExecObj : GameObj { diff --git a/decompile/src/JSRF/GameData.hpp b/decompile/src/JSRF/GameData.hpp index 7896835..75f3e65 100644 --- a/decompile/src/JSRF/GameData.hpp +++ b/decompile/src/JSRF/GameData.hpp @@ -75,7 +75,13 @@ enum FlagList { }; // Numeric IDs for tag sizes (should maybe put somewhere else?) -enum TagSize { TAGSIZE_SS, TAGSIZE_S, TAGSIZE_M, TAGSIZE_L, TAGSIZE_XL }; +enum TagSize { + TAGSIZE_SS, + TAGSIZE_S, + TAGSIZE_M, + TAGSIZE_L, + TAGSIZE_XL +}; // Numeric IDs for different test run categories enum TestRunType { @@ -87,22 +93,31 @@ enum TestRunType { // Unpacked version of TestRunScoreSaved struct TestRunScore { - unsigned score; - unsigned character; - unsigned rank1; // Used by Jet Tech - unsigned rank2; // Used by other test runs + unsigned score; + unsigned character; + unsigned rank1; // Used by Jet Tech + unsigned rank2; // Used by other test runs }; // Numeric IDs for different timers -enum Timer { TIMER_DEATHBALLPRACTICE, TIMER_CLUTCH, TIMER_UNUSED }; +enum Timer { + TIMER_DEATHBALLPRACTICE, + TIMER_CLUTCH, + TIMER_UNUSED +}; // Info showed in save/load menu -struct SaveDescription { unsigned chapter, playtimeSeconds; }; +struct SaveDescription { + unsigned chapter, playtimeSeconds; +}; -union FlagListOrPtr { FlagList list; unsigned * ptr; }; +union FlagListOrPtr { + FlagList list; + unsigned * ptr; +}; // Save data-ish data structure used at runtime -extern struct GameData { +struct GameData { SaveData saveActive; SaveData saveStashed; // Holds save data during test runs/tutorials @@ -188,6 +203,7 @@ extern struct GameData { virtual ~GameData(); void addHighScore(unsigned stageId, TestRunType type, TestRunScore * score); -} g_gameData; +}; +extern GameData g_gameData; #endif diff --git a/decompile/src/XDK/D3D.hpp b/decompile/src/XDK/D3D.hpp index b89f1bd..cea0682 100644 --- a/decompile/src/XDK/D3D.hpp +++ b/decompile/src/XDK/D3D.hpp @@ -10,9 +10,15 @@ Direct3D8 declarations. typedef DWORD D3DCOLOR; -struct D3DVECTOR { float x, y, z ; }; -struct D3DVECTOR4 { float x, y, z, w; }; +struct D3DVECTOR { + float x, y, z ; +}; +struct D3DVECTOR4 { + float x, y, z, w; +}; -struct D3DRECT { LONG x1, y1, x2, y2; }; +struct D3DRECT { + LONG x1, y1, x2, y2; +}; #endif diff --git a/documentation/gettingstarted.md b/documentation/gettingstarted.md index 9c34c68..5767e38 100644 --- a/documentation/gettingstarted.md +++ b/documentation/gettingstarted.md @@ -83,15 +83,31 @@ executable where objdiff doesn't expect them to be, which will mess up our diffs. To correct this, open the memory map (`Window > Memory Map`) and uncheck the "X" column for `.rdata`, `.data`, and `DOLBY`. -Now we'll import symbols from the JSRF decompilation repository. After running -the analysis, open the script manager (`Window > Script Manager`) and select -the "Data" folder in the left pane. Double click the script titled -`ImportSymbolsScript.py`, and a file picker will open after a moment. Select -`symboltable.tsv` from the `delink/` directory of your cloned JSRF -decompilation repository, and you should see a bunch of `Created function...` -and `Created label...` in the scripting console window. Save your changes -(save icon in the top left of the CodeBrowser window), and your Ghidra project -should be all ready for creating object files for objdiff. +Now we'll import data types from the decompilation. Open a shell in the +`ghidra/` directory of your copy of the repository and run `make_header.sh`, +which will produce a `jsrf.h` in the same directory with the combined contents +of every header in a format suitable for Ghidra. Then, in Ghidra, select +`File > Parse C Source...` to open a window for importing C headers. Remove +everything from the "Source files to parse" and "Parse options" boxes, and add +`jsrf.h` to the former (click the green + symbol on the right and select the +`jsrf.h` file). Click the "..." on the "Program Architecture:" box and select +the row with the values "x86," "default," "32," "little," and "Visual Studio." +Finally, click the "Parse to Program" button, "Continue" to confirm, and +"Don't Use Open Archives" (the header is completely self-contained and doesn't +need any information from any other data type archives). You should then see a +window reporting successful import, and you'll be able to find `jsrf.h` with +all of its definitions under `default.xbe` in the Data Type Manager window in +the bottom left. + +Lastly, we'll import symbols from the JSRF decompilation repository. Open the +script manager (`Window > Script Manager`) and select the "Data" folder in the +left pane. Double click the script titled `ImportSymbolsScript.py`, and a file +picker will open after a moment. Select `symboltable.tsv` from the `ghidra/` +directory of your cloned JSRF decompilation repository, and you should see a +bunch of `Created function...` and `Created label...` printed to the scripting +console window. Save your changes (save icon in the top left of the +CodeBrowser window), and your Ghidra project should be all ready for creating +object files for objdiff. ### Producing Object Files @@ -198,12 +214,12 @@ request to merge it back into the online copy. ## Contributing to Delinking Getting the JSRF binary delinked is just as important as decompiling the resulting object files, but takes a bit more investment. The concrete task of -a delinking contributor is to populate `symboltable.tsv` and `objects.csv` in -the `delink/` directory, which together enable consistent delinking of object -files. The former lists symbols at different addresses through the whole -executable, while the latter lists the address ranges that have been identified -as separable objects. Both of these things are figured out by combing over the -whole executable in Ghidra. +a delinking contributor is to populate `symboltable.tsv` in the `ghidra/` +directory and `objects.csv` in the `delink/` directory, which together enable +consistent delinking of object files. The former lists symbols at different +addresses through the whole executable, while the latter lists the address +ranges that have been identified as separable objects. Both of these things +are figured out by combing over the whole executable in Ghidra. ### Updating `symboltable.tsv` @@ -243,12 +259,37 @@ Now, to actually export the table, right-click on one of the table cells, click to CSV..." before selecting where to save your exported symbol table. The final step is converting this CSV file to the format expected by -`ImportSymbolsScript.py`. Open a shell in the repository's `delink/` directory +`ImportSymbolsScript.py`. Open a shell in the repository's `ghidra/` directory and run `make_symboltable.sh` with the path of your exported CSV as an argument, and `symboltable.tsv` will be overwritten with a new table containing your exported symbols. +### Updating `make_header.sh` +If you've added any header files, you'll want to add them to the `HEADERS` +variable in `ghidra/make_header.sh`. Make sure that any other header files +they depend on are earlier in the list, as this script combines everything into +one file without any `#include` directives. Make sure the script runs +successfully and Ghidra is able to import the resulting `jsrf.h`. + +Keep in mind that `make_header.sh` uses a fairly rudimentary `awk` script to +convert C++ headers to C, which places some gentle constraints on how +declarations need to be written. In general, it's enough to just keep things +simple and not do anything unusual (keep data type and variable declarations +separate, don't use macros for declarations, etc.), but the one big catch is +that the body of a data type definition must not be on the same line as the +opening or closing braces. That is, do not write +```c++ +struct X { unsigned x; }; +``` +but rather +```c++ +struct X { + unsigned x; +}; +``` + + ### Updating `objects.csv` `objects.csv` is a listing of addresses for each object file or group of object files that we've identified. Each column after the first two corresponds to a diff --git a/ghidra/.gitignore b/ghidra/.gitignore new file mode 100644 index 0000000..2fb0644 --- /dev/null +++ b/ghidra/.gitignore @@ -0,0 +1 @@ +jsrf.h diff --git a/ghidra/headerconvert.awk b/ghidra/headerconvert.awk new file mode 100644 index 0000000..c8383aa --- /dev/null +++ b/ghidra/headerconvert.awk @@ -0,0 +1,115 @@ +# awk script to convert C++ header files into valid C for Ghidra +# This script naturally isn't 100% robust, but so long as the header files +# stay simple without the formatting getting too weird, this works fairly well. + +function fn_ptr(cls, signature, ret, fname, args) { +# Convert the given method signature to a function pointer + if ($1 ~ /^~/) # Special case for virtual destructor + return "\t\t" cls " * __attribute__((thiscall)) "\ + "(*scalar_deleting_destructor)("\ + cls " *, "\ + "BOOL"\ + ");\n" + else { + ret = signature + sub(/[a-zA-Z_][a-zA-Z_0-9]+\(.*/, "", ret) + + fname = substr(signature, length(ret)+1) + sub(/\(.*/, "", fname) + + args = signature + sub(/.*\(/ , "(" cls " *, ", args) # Add "this" pointer + sub(/,[ \t]\)/, ")" , args) # Remove trailing ',' + + return ret "__attribute__((thiscall)) (*" fname ")" args "\n" + } +} + +# Exclude lines with no code +NF == 0 || $1 ~ /^\/\// { next } + +# Include preprocessor directives, except some unneeded ones +$1 ~ /^#/ && !($1 ~ /^#(if|endif|include)/) && !/_HPP$/ + +# Include typedefs +/^typedef/ + +# Make enums and unions typedef'd +/^(enum|union)/,/^\}/ { + # Pull name from first line and prefix with "typedef" + if (/^(enum|union)/) { + name = $2 + printf("typedef %s\n", $0) + next + } + + if (!/^\}/) print # Print body unchanged + else printf("} %s;\n\n", name) # Add name to complete typedef +} + +# The main event: turn classes and structs into typedef'd C structs +# This means turning parent classes into members and defining vtable members +# pointing to structs of appropriately-typed function pointers. +/^(class|struct)/,/^\}/ { + # Initialize some data describing struct + if (/^(class|struct)/) { + name = $2 + parent = $3 == ":" ? $4 : "" + body = "" + vtable = "" + + next + } + + # Read struct members + if (/\(/) { # Method start + in_method = 1 + + if ($1 == "virtual") { # Record virtual method for vtable + sub(/virtual[ \t]+/, "") + method = "\t" $0 + } + } + + if (/\)/) { # Method end + if (method) { # Add method to vtable + if (!/\(/) { # Add line if not added already + sub(/^[ \t]+/, "") + method = method $0 + } + + vtable = vtable fn_ptr(name, method) + } + + in_method = 0 + method = "" + next + } + + if (in_method) { # Method arguments + # Add line if not added already and method is virtual + if (method && !/\(/) { + sub(/^[ \t]+/, "") + method = method $0 + } + + next + } + + if (!/^\}/) body = body $0 "\n" # Add to body + else { # Reached end; output struct + # Add parent as first member, if there is one + if (parent) printf("typedef struct %s {\n\t%s super;\n", name, parent) + else printf("typedef struct %s {\n" , name ) + + # Add vtable as first member, if there is one + # We assume that if a class has virtual methods and a parent, + # the parent already has a vtable, and we don't handle the case + # of a derived class having virtual methods not on its parent. + if (vtable && !parent) { + printf("\tstruct %sVtbl {\n%s\t} * vtable;\n", name, vtable) + } + + printf("%s} %s;\n\n", body, name) + } +} diff --git a/ghidra/make_header.sh b/ghidra/make_header.sh new file mode 100755 index 0000000..b99ae22 --- /dev/null +++ b/ghidra/make_header.sh @@ -0,0 +1,22 @@ +#!/bin/sh -eu +# Merges all header files in the decompilation into a C header file called +# jsrf.h for importing into Ghidra + +# Create output file +printf '%s\n' '// Automatically generated mass header file for Ghidra' > jsrf.h + +# Figuring out include order programmatically is awful, so we'll have to add +# all the headers here by hand in an order that functions properly +HEADERS=" + Std.hpp + XDK/Win32.hpp + XDK/D3D.hpp + Smilebit/MMatrix.hpp + JSRF/Core.hpp + JSRF/GameData.hpp +" + +# Process each header file into jsrf.h +for header in $HEADERS; do + awk -f headerconvert.awk "../decompile/src/$header" >> jsrf.h +done diff --git a/delink/make_symboltable.sh b/ghidra/make_symboltable.sh similarity index 100% rename from delink/make_symboltable.sh rename to ghidra/make_symboltable.sh diff --git a/delink/symboltable.tsv b/ghidra/symboltable.tsv similarity index 100% rename from delink/symboltable.tsv rename to ghidra/symboltable.tsv