Compare commits

...

4 commits

Author SHA1 Message Date
KeybadeBlox
febf9fc172 Finish function demangling
Data demangling should be much easier.
2026-02-10 00:06:29 -05:00
KeybadeBlox
f193fef3ce Enable string pooling
JSRF evidently has string pooling enabled (i.e. program-wide
deduplication of strings), so we'll want it in our compiler settings as
well.
2026-02-09 23:53:24 -05:00
KeybadeBlox
e6d7acfa05 Fix some disconnected symbols in objdiff
This will be so much easier when we can export with name mangling.
2026-02-07 22:09:26 -05:00
KeybadeBlox
576a60d331 Mostly finish function name mangling
Still need to do function pointers.
2026-02-07 22:08:02 -05:00
3 changed files with 310 additions and 55 deletions

View file

@ -3,7 +3,7 @@ all: src/JSRF/Jet2.obj
# Simple inference rule for producing object files
.SUFFIXES: .cpp .obj
.cpp.obj:
CL.EXE /nologo /Wall /TP /W3 /Ogityb0 /MT /GX /Fo$@ /c $<
CL.EXE /nologo /Wall /TP /W3 /Ogityb0 /MT /Gf /GX /Fo$@ /c $<
# Header files used for each object
src/JSRF/Jet2.obj: src/JSRF/Core.hpp src/Std.hpp src/XDK/D3D.hpp\

View file

@ -15,10 +15,10 @@
},
"symbol_mappings": {
"[.rdata-0]": "[.xdata$x-0]",
"_main_funcinfo": "$T745",
"_main_handler": "$L749",
"_main_handler_unwind1": "$L741",
"_main_unwindmap": "$T751"
"_main_funcinfo": "$T754",
"_main_handler": "$L758",
"_main_handler_unwind1": "$L750",
"_main_unwindmap": "$T760"
}
},
{
@ -48,6 +48,7 @@
"Game::drawObjs": "?drawObjs@Game@@QAEXXZ",
"Game::drawTree1": "?drawTree1@Game@@QAEXPAUGameObj@@@Z",
"Game::enableDrawChildren": "?enableDrawChildren@Game@@QAEXXZ",
"Game::enableSkipDraw": "?enableSkipDraw@Game@@QAEXXZ",
"Game::enableSomeExtraDrawListCode": "?enableSomeExtraDrawListCode@Game@@QAEXXZ",
"Game::exec": "?exec@Game@@QAEXXZ",
"Game::fatal": "?fatal@Game@@QAEXXZ",
@ -67,7 +68,6 @@
"Game::setGlobal": "?setGlobal@Game@@QAEXW4GlobalIndex@@I@Z",
"Game::setLogosStarted": "?setLogosStarted@Game@@QAEXH@Z",
"Game::setObj": "?setObj@Game@@QAEXW4GameObjIndex@@PAUGameObj@@@Z",
"Game::setSkipDraw": "?enableSkipDraw@Game@@QAEXXZ",
"Game::setUncoveredPauseNextFrame": "?setUncoveredPauseNextFrame@Game@@QAEXH@Z",
"Game::sortDrawPriorityList": "?sortDrawPriorityList@Game@@QAEXXZ",
"Game::sortDrawPriorityListSingleLevel": "?sortDrawPriorityListSingleLevel@Game@@QAEXD@Z",

View file

@ -1,25 +1,59 @@
// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// Be aware that the mangling implementation is only partial.
// The implementation is missing a few obscure corners but mostly complete.
// Keep in mind that certain qualities that aren't visible to Ghidra, like
// visibility or CV qualifiers, will always be assumed to be their most
// permissive form (public, non-const, etc.).
//
// Special symbol names like "operator new" or "scalar deleting destructor"
// are given unique mangling. To properly demangle these, name them as they
// appear in objdiff, replacing spaces with underscores, e.g. "operator_new"
// and "`scalar_deleting_destructor'" (notice the ` and ').
//
// @category Symbol
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
import ghidra.program.model.data.LongLongDataType;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
import ghidra.program.model.data.UnsignedIntegerDataType;
import ghidra.program.model.data.UnsignedLongDataType;
import ghidra.program.model.data.UnsignedLongLongDataType;
import ghidra.program.model.data.UnsignedShortDataType;
import ghidra.program.model.data.VoidDataType;
import ghidra.program.model.data.WideCharDataType;
import ghidra.program.model.symbol.Namespace;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolIterator;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
public class MSVC7Mangle extends GhidraScript{
@ -31,81 +65,302 @@ public class MSVC7Mangle extends GhidraScript{
while (iter.hasNext() && !monitor.isCancelled()) {
final Symbol s = iter.next();
switch (s.getObject()) {
case Function f -> demangleFn(f);
case Data d -> demangleData(s);
default -> {}
// Skip if already mangled
if (s.getName().charAt(0) == '?') return;
// Get mangled name
final String mangled = switch (s.getObject()) {
case Function f -> mangleFn (f);
case Data _ -> mangleData(s);
default -> null;
};
// Apply new name
if (mangled != null) {
s.setName(mangled, SourceType.USER_DEFINED);
makeGlobal(s);
}
}
}
private void demangleFn(final Function f) throws Exception {
// Gather everything needed for mangling
final List<String> name = Arrays.asList(f.getName(true)
.split("::"));
Collections.reverse(name);
final String callc = f.getCallingConventionName();
final DataType ret = f.getReturnType();
final DataType[] args = Arrays.stream(f.getSignature(true)
.getArguments())
.map(x -> x.getDataType())
.toArray(DataType[]::new);
private String mangleFn(final Function f) throws Exception {
/* Set the function's name to its mangled version */
final ArrayList<String> dict = new ArrayList<>();
// Construct mangled name
final String mangled =
"?" + String.join("@", name) + "@@" +
switch (callc) {
case "__cdecl" -> "YA";
case "__thiscall" -> isVirtual(f) ? "UAE" :
"QAE";
case "__fastcall" -> ""; // TODO
default -> throw new Exception(
"Need to specify calling convention"
);
} +
mangleType(ret) +
mangleArgs(args) +
"Z";
final List<String> nameParts = Arrays.asList(f.getName(true).split("::"));
Collections.reverse(nameParts);
final String unqualified = nameParts.get(0);
final boolean isMethod = f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2;
final String name = mangleIdentifier(f.getName(true), isMethod, f.getReturnType(), dict);
f.setName(mangled, SourceType.USER_DEFINED);
// Special methods with unique formats
if (isMethod) {
final String clsName = nameParts.get(1);
if (unqualified.equals( clsName)) { // Constructor
return "?" + name + "QAE@" + mangleArgs(f.getSignature(true), dict) + "Z";
} else if (unqualified.equals("~" + clsName)) { // Destructor
return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ";
}
}
return "?" + name + mangleFnAttrs(f, nameParts) +
mangleFnType(f.getSignature(true), dict);
}
private void demangleData(final Symbol s) {
// TODO
printf("TODO: data symbol \"%s\"\n", s.getName(true));
private static String mangleIdentifier(
final String ident,
final boolean isMethod,
final DataType retType, // Function return type, nullable
final List<String> dict
) {
/* Mangle a fully qualified identifier
Identifiers like X::Y::Z are mangled with names in reverse order each
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary).
*/
// Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Non-method special names
parts.set(0, switch (parts.get(0)) {
case "operator_new" -> "?2";
case "operator_delete" -> "?3";
case "operator_new[]" -> "?_U";
case "operator_delete[]" -> "?_V";
default -> parts.get(0);
});
// Method special names
if (isMethod) {
final String clsName = parts.get(1);
parts.set(0, switch (parts.get(0)) {
// Definitely some cases missing
case "operator_=" -> "?4";
case "operator_>>" -> "?5";
case "operator_<<" -> "?6";
case "operator_!" -> "?7";
case "operator_==" -> "?8";
case "operator_!=" -> "?9";
case "operator_[]" -> "?A";
case "operator_->" -> "?C";
case "operator_*" -> "?D";
case "operator_++" -> "?E";
case "operator_--" -> "?F";
case "operator_-" -> "?G";
case "operator_+" -> "?H";
case "operator_&" -> "?I";
case "operator_->*" -> "?J";
case "operator_/" -> "?K";
case "operator_%" -> "?L";
case "operator_<" -> "?M";
case "operator_<=" -> "?N";
case "operator_>" -> "?O";
case "operator_>=" -> "?P";
case "operator_," -> "?Q";
case "operator_()" -> "?R";
case "operator_~" -> "?S";
case "operator_^" -> "?T";
case "operator_|" -> "?U";
case "operator_&&" -> "?V";
case "operator_||" -> "?W";
case "operator_*=" -> "?X";
case "operator_+=" -> "?Y";
case "operator_-=" -> "?Z";
case "operator_/=" -> "?_0";
case "operator_%=" -> "?_1";
case "operator_>>=" -> "?_2";
case "operator_<<=" -> "?_3";
case "operator_&=" -> "?_4";
case "operator_|=" -> "?_5";
case "operator_^=" -> "?_6";
case "`scalar_deleting_destructor'" -> "?_G";
default ->
parts.get(0).equals( clsName) ? "?0" :
parts.get(0).equals("~" + clsName) ? "?1" :
retType != null && // Feeble attempt at user-defined conversions
parts.get(0).equals(
"operator_" +
retType.getName()
.replace(" ", "")
) ? "?B" :
parts.get(0);
});
}
// Apply any backreferences and combine together
return parts.stream()
.map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@")))
.reduce("", String::concat) + "@";
}
private static <T> Optional<String> backref(
final T x,
final List<T> dict
) {
/* Produce a backreference string if x is found in dict */
switch (Integer.valueOf(dict.indexOf(x))) {
case -1:
dict.add(x);
return Optional.empty();
case Integer ref:
return Optional.of(ref.toString());
}
}
private String mangleFnAttrs(
final Function f,
final List<String> name
) {
/* Produce a string for a function's visibility and linkage */
return switch (f.getCallingConventionName()) {
case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method
default -> isStatic(name) ? "S" : "Y";
};
}
private boolean isVirtual(final Function f) {
/* Attempt to determine whether a method is virtual
We essentially try to figure out if any references are from a vtable.
We essentially try to figure out if any references are from a vtable
by checking if they lie in non-executable memory, or from a scalar
deleting destructor.
*/
final Reference[] refs = getReferencesTo(f.getEntryPoint());
for (int i = 0; i < refs.length; i++) {
final Address addr = refs[i].getFromAddress();
final Optional<String> caller = Optional.ofNullable(getFunctionContaining(addr))
.map(x -> x.getName(false));
// TODO
if (
!getMemoryBlock(addr).isExecute() ||
caller.map(x -> x.equals("`scalar_deleting_destructor'"))
.orElse(false) ||
caller.map(x -> x.startsWith("??_G")) // From mangled name
.orElse(false)
) return true;
}
return false;
}
private String mangleType(final DataType t) throws Exception {
/* Mangle a data type in a function name */
return switch(t) {
case Enum e -> "W4" + e.getName() + "@@";
case IntegerDataType x -> "H";
case VoidDataType x -> "X";
default -> throw new Exception(
"Unhandled data type \"" + t.toString() + "\""
private static boolean isStatic(final List<String> name) {
/* Determines whether a function is static from its name
Everything is normally assumed non-static, but certain methods are
automatically made static.
*/
return name.size() > 1 && Arrays.asList(
"operator_new" , "operator_new[]",
"operator_delete", "operator_delete[]"
).contains(name.get(0));
}
private static String mangleFnType(
final FunctionSignature f,
final List<String> dict
) throws Exception {
/* Mangle everything in f but its name and visibility/linkage */
return mangleCallC(f) + mangleType(f.getReturnType(), dict) +
mangleArgs(f, dict) + "Z";
}
private static String mangleCallC(final FunctionSignature f) throws Exception {
/* Produce a string for a function's calling convention */
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "A";
case "__thiscall" -> "E";
case "__fastcall" -> "I";
case "__stdcall" -> "G";
default -> throw new Exception(
f.getName() +
"(): Need to specify calling convention"
);
};
}
private String mangleArgs(final DataType[] args) throws Exception {
private static String mangleType(
final DataType t,
final List<String> dict
) throws Exception {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
case CharDataType _ -> "D"; // Must come after its child types
case ShortDataType _ -> "F";
case UnsignedShortDataType _ -> "G";
case IntegerDataType _ -> "H";
case UnsignedIntegerDataType _ -> "I";
case LongDataType _ -> "J";
case UnsignedLongDataType _ -> "K";
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "P" +
(p.getDataType() instanceof FunctionSignature ? "6" : "A") +
mangleType(p.getDataType(), dict);
case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict);
case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict);
case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict);
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case FunctionSignature f -> mangleFnType(f, dict);
case TypeDef d -> mangleType(d.getBaseDataType(), dict);
default -> throw new Exception ("Unknown type \"" + t.getName() + "\"");
};
}
private static String mangleArgs(
final FunctionSignature f,
final List<String> dict
) throws Exception {
/* Mangle the arguments for a function */
final DataType[] args = Arrays.stream(f.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
final ArrayList<DataType> argDict = new ArrayList<>();
if (args.length == 0) return "X";
else {
String encoded = "";
// I try to be more expression-oriented, but not being
// able to throw in lambdas, not having an error sum
// type, and not having applicative functors would
// means that using .stream().map().reduce() would
// require me to write stuff like
// (s1, s2) -> s1.flatMap(s -> s2.map(s + s2))
// (i.e. substituting applicative for monad + functor)
// while also having much worse UX for errors
//
// It turns out that academic-sounding stuff everyone
// freaks out at is actually useful (and Optional still
// helped us out here)
String mangled = "";
for (int i = 0; i < args.length; i++)
encoded += mangleType(args[i]);
return encoded + "@";
mangled += backref(args[i], argDict).orElse(mangleType(args[i], dict));
return mangled + "@";
}
}
private static String mangleData(final Symbol s) throws Exception {
/* Set the data symbol's name to its mangled version */
throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
}
private static void makeGlobal(final Symbol s) throws Exception {
/* Move into the global namespace */
// I cannot for the life of me find a more convenient way of
// doing this
while (!s.isGlobal()) s.setNamespace(s.getParentNamespace()
.getParentNamespace());
}
}