Add data type import for Ghidra

This commit is contained in:
KeybadeBlox 2026-02-04 19:52:12 -05:00
parent 30f8a5879e
commit 63002e0f08
9 changed files with 233 additions and 31 deletions

View file

@ -215,7 +215,7 @@ enum GlobalIndex {
};
#pragma pack(4)
extern struct Game {
struct Game {
char unknown0x4[4];
unsigned unknown0x8;
unsigned * unknown0xC;
@ -350,7 +350,8 @@ extern struct Game {
void draw();
void frame();
int mainLoop();
} * g_game;
};
extern Game * g_game;
// Root of the exec GameObj tree
struct RootExecObj : GameObj {

View file

@ -75,7 +75,13 @@ enum FlagList {
};
// Numeric IDs for tag sizes (should maybe put somewhere else?)
enum TagSize { TAGSIZE_SS, TAGSIZE_S, TAGSIZE_M, TAGSIZE_L, TAGSIZE_XL };
enum TagSize {
TAGSIZE_SS,
TAGSIZE_S,
TAGSIZE_M,
TAGSIZE_L,
TAGSIZE_XL
};
// Numeric IDs for different test run categories
enum TestRunType {
@ -87,22 +93,31 @@ enum TestRunType {
// Unpacked version of TestRunScoreSaved
struct TestRunScore {
unsigned score;
unsigned character;
unsigned rank1; // Used by Jet Tech
unsigned rank2; // Used by other test runs
unsigned score;
unsigned character;
unsigned rank1; // Used by Jet Tech
unsigned rank2; // Used by other test runs
};
// Numeric IDs for different timers
enum Timer { TIMER_DEATHBALLPRACTICE, TIMER_CLUTCH, TIMER_UNUSED };
enum Timer {
TIMER_DEATHBALLPRACTICE,
TIMER_CLUTCH,
TIMER_UNUSED
};
// Info showed in save/load menu
struct SaveDescription { unsigned chapter, playtimeSeconds; };
struct SaveDescription {
unsigned chapter, playtimeSeconds;
};
union FlagListOrPtr { FlagList list; unsigned * ptr; };
union FlagListOrPtr {
FlagList list;
unsigned * ptr;
};
// Save data-ish data structure used at runtime
extern struct GameData {
struct GameData {
SaveData saveActive;
SaveData saveStashed; // Holds save data during test runs/tutorials
@ -188,6 +203,7 @@ extern struct GameData {
virtual ~GameData();
void addHighScore(unsigned stageId, TestRunType type, TestRunScore * score);
} g_gameData;
};
extern GameData g_gameData;
#endif

View file

@ -10,9 +10,15 @@ Direct3D8 declarations.
typedef DWORD D3DCOLOR;
struct D3DVECTOR { float x, y, z ; };
struct D3DVECTOR4 { float x, y, z, w; };
struct D3DVECTOR {
float x, y, z ;
};
struct D3DVECTOR4 {
float x, y, z, w;
};
struct D3DRECT { LONG x1, y1, x2, y2; };
struct D3DRECT {
LONG x1, y1, x2, y2;
};
#endif

View file

@ -83,15 +83,31 @@ executable where objdiff doesn't expect them to be, which will mess up our
diffs. To correct this, open the memory map (`Window > Memory Map`) and
uncheck the "X" column for `.rdata`, `.data`, and `DOLBY`.
Now we'll import symbols from the JSRF decompilation repository. After running
the analysis, open the script manager (`Window > Script Manager`) and select
the "Data" folder in the left pane. Double click the script titled
`ImportSymbolsScript.py`, and a file picker will open after a moment. Select
`symboltable.tsv` from the `delink/` directory of your cloned JSRF
decompilation repository, and you should see a bunch of `Created function...`
and `Created label...` in the scripting console window. Save your changes
(save icon in the top left of the CodeBrowser window), and your Ghidra project
should be all ready for creating object files for objdiff.
Now we'll import data types from the decompilation. Open a shell in the
`ghidra/` directory of your copy of the repository and run `make_header.sh`,
which will produce a `jsrf.h` in the same directory with the combined contents
of every header in a format suitable for Ghidra. Then, in Ghidra, select
`File > Parse C Source...` to open a window for importing C headers. Remove
everything from the "Source files to parse" and "Parse options" boxes, and add
`jsrf.h` to the former (click the green + symbol on the right and select the
`jsrf.h` file). Click the "..." on the "Program Architecture:" box and select
the row with the values "x86," "default," "32," "little," and "Visual Studio."
Finally, click the "Parse to Program" button, "Continue" to confirm, and
"Don't Use Open Archives" (the header is completely self-contained and doesn't
need any information from any other data type archives). You should then see a
window reporting successful import, and you'll be able to find `jsrf.h` with
all of its definitions under `default.xbe` in the Data Type Manager window in
the bottom left.
Lastly, we'll import symbols from the JSRF decompilation repository. Open the
script manager (`Window > Script Manager`) and select the "Data" folder in the
left pane. Double click the script titled `ImportSymbolsScript.py`, and a file
picker will open after a moment. Select `symboltable.tsv` from the `ghidra/`
directory of your cloned JSRF decompilation repository, and you should see a
bunch of `Created function...` and `Created label...` printed to the scripting
console window. Save your changes (save icon in the top left of the
CodeBrowser window), and your Ghidra project should be all ready for creating
object files for objdiff.
### Producing Object Files
@ -198,12 +214,12 @@ request to merge it back into the online copy.
## Contributing to Delinking
Getting the JSRF binary delinked is just as important as decompiling the
resulting object files, but takes a bit more investment. The concrete task of
a delinking contributor is to populate `symboltable.tsv` and `objects.csv` in
the `delink/` directory, which together enable consistent delinking of object
files. The former lists symbols at different addresses through the whole
executable, while the latter lists the address ranges that have been identified
as separable objects. Both of these things are figured out by combing over the
whole executable in Ghidra.
a delinking contributor is to populate `symboltable.tsv` in the `ghidra/`
directory and `objects.csv` in the `delink/` directory, which together enable
consistent delinking of object files. The former lists symbols at different
addresses through the whole executable, while the latter lists the address
ranges that have been identified as separable objects. Both of these things
are figured out by combing over the whole executable in Ghidra.
### Updating `symboltable.tsv`
@ -243,12 +259,37 @@ Now, to actually export the table, right-click on one of the table cells, click
to CSV..." before selecting where to save your exported symbol table.
The final step is converting this CSV file to the format expected by
`ImportSymbolsScript.py`. Open a shell in the repository's `delink/` directory
`ImportSymbolsScript.py`. Open a shell in the repository's `ghidra/` directory
and run `make_symboltable.sh` with the path of your exported CSV as an
argument, and `symboltable.tsv` will be overwritten with a new table containing
your exported symbols.
### Updating `make_header.sh`
If you've added any header files, you'll want to add them to the `HEADERS`
variable in `ghidra/make_header.sh`. Make sure that any other header files
they depend on are earlier in the list, as this script combines everything into
one file without any `#include` directives. Make sure the script runs
successfully and Ghidra is able to import the resulting `jsrf.h`.
Keep in mind that `make_header.sh` uses a fairly rudimentary `awk` script to
convert C++ headers to C, which places some gentle constraints on how
declarations need to be written. In general, it's enough to just keep things
simple and not do anything unusual (keep data type and variable declarations
separate, don't use macros for declarations, etc.), but the one big catch is
that the body of a data type definition must not be on the same line as the
opening or closing braces. That is, do not write
```c++
struct X { unsigned x; };
```
but rather
```c++
struct X {
unsigned x;
};
```
### Updating `objects.csv`
`objects.csv` is a listing of addresses for each object file or group of object
files that we've identified. Each column after the first two corresponds to a

1
ghidra/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
jsrf.h

115
ghidra/headerconvert.awk Normal file
View file

@ -0,0 +1,115 @@
# awk script to convert C++ header files into valid C for Ghidra
# This script naturally isn't 100% robust, but so long as the header files
# stay simple without the formatting getting too weird, this works fairly well.
function fn_ptr(cls, signature, ret, fname, args) {
# Convert the given method signature to a function pointer
if ($1 ~ /^~/) # Special case for virtual destructor
return "\t\t" cls " * __attribute__((thiscall)) "\
"(*scalar_deleting_destructor)("\
cls " *, "\
"BOOL"\
");\n"
else {
ret = signature
sub(/[a-zA-Z_][a-zA-Z_0-9]+\(.*/, "", ret)
fname = substr(signature, length(ret)+1)
sub(/\(.*/, "", fname)
args = signature
sub(/.*\(/ , "(" cls " *, ", args) # Add "this" pointer
sub(/,[ \t]\)/, ")" , args) # Remove trailing ','
return ret "__attribute__((thiscall)) (*" fname ")" args "\n"
}
}
# Exclude lines with no code
NF == 0 || $1 ~ /^\/\// { next }
# Include preprocessor directives, except some unneeded ones
$1 ~ /^#/ && !($1 ~ /^#(if|endif|include)/) && !/_HPP$/
# Include typedefs
/^typedef/
# Make enums and unions typedef'd
/^(enum|union)/,/^\}/ {
# Pull name from first line and prefix with "typedef"
if (/^(enum|union)/) {
name = $2
printf("typedef %s\n", $0)
next
}
if (!/^\}/) print # Print body unchanged
else printf("} %s;\n\n", name) # Add name to complete typedef
}
# The main event: turn classes and structs into typedef'd C structs
# This means turning parent classes into members and defining vtable members
# pointing to structs of appropriately-typed function pointers.
/^(class|struct)/,/^\}/ {
# Initialize some data describing struct
if (/^(class|struct)/) {
name = $2
parent = $3 == ":" ? $4 : ""
body = ""
vtable = ""
next
}
# Read struct members
if (/\(/) { # Method start
in_method = 1
if ($1 == "virtual") { # Record virtual method for vtable
sub(/virtual[ \t]+/, "")
method = "\t" $0
}
}
if (/\)/) { # Method end
if (method) { # Add method to vtable
if (!/\(/) { # Add line if not added already
sub(/^[ \t]+/, "")
method = method $0
}
vtable = vtable fn_ptr(name, method)
}
in_method = 0
method = ""
next
}
if (in_method) { # Method arguments
# Add line if not added already and method is virtual
if (method && !/\(/) {
sub(/^[ \t]+/, "")
method = method $0
}
next
}
if (!/^\}/) body = body $0 "\n" # Add to body
else { # Reached end; output struct
# Add parent as first member, if there is one
if (parent) printf("typedef struct %s {\n\t%s super;\n", name, parent)
else printf("typedef struct %s {\n" , name )
# Add vtable as first member, if there is one
# We assume that if a class has virtual methods and a parent,
# the parent already has a vtable, and we don't handle the case
# of a derived class having virtual methods not on its parent.
if (vtable && !parent) {
printf("\tstruct %sVtbl {\n%s\t} * vtable;\n", name, vtable)
}
printf("%s} %s;\n\n", body, name)
}
}

22
ghidra/make_header.sh Executable file
View file

@ -0,0 +1,22 @@
#!/bin/sh -eu
# Merges all header files in the decompilation into a C header file called
# jsrf.h for importing into Ghidra
# Create output file
printf '%s\n' '// Automatically generated mass header file for Ghidra' > jsrf.h
# Figuring out include order programmatically is awful, so we'll have to add
# all the headers here by hand in an order that functions properly
HEADERS="
Std.hpp
XDK/Win32.hpp
XDK/D3D.hpp
Smilebit/MMatrix.hpp
JSRF/Core.hpp
JSRF/GameData.hpp
"
# Process each header file into jsrf.h
for header in $HEADERS; do
awk -f headerconvert.awk "../decompile/src/$header" >> jsrf.h
done