JSRF-Decompilation/ghidra/ghidra_scripts/MSVC7Mangle.java
KeybadeBlox 576a60d331 Mostly finish function name mangling
Still need to do function pointers.
2026-02-07 22:08:02 -05:00

249 lines
9.1 KiB
Java

// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// @category Symbol
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.FunctionDefinition;
import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
import ghidra.program.model.data.LongLongDataType;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
import ghidra.program.model.data.UnsignedIntegerDataType;
import ghidra.program.model.data.UnsignedLongDataType;
import ghidra.program.model.data.UnsignedLongLongDataType;
import ghidra.program.model.data.UnsignedShortDataType;
import ghidra.program.model.data.VoidDataType;
import ghidra.program.model.data.WideCharDataType;
import ghidra.program.model.symbol.Namespace;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolIterator;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
public class MSVC7Mangle extends GhidraScript{
@Override
public void run() throws Exception {
final SymbolIterator iter = currentProgram.getSymbolTable()
.getPrimarySymbolIterator(currentSelection, true);
while (iter.hasNext() && !monitor.isCancelled()) {
final Symbol s = iter.next();
switch (s.getObject()) {
case Function f -> mangleFn(f);
case Data _ -> mangleData(s);
default -> {}
}
}
}
private void mangleFn(final Function f) throws Exception {
/* Set the function's name to its mangled version */
if (f.getName().charAt(0) == '?') return; // Already mangled
else {
f.setName(mangledFnName(f), SourceType.USER_DEFINED);
makeGlobal(f);
}
}
private String mangledFnName(final Function f) throws Exception {
/* Produce the mangled name for a function */
final ArrayList<String> dict = new ArrayList<>();
final String name = mangleIdentifier(f.getName(true), dict) + "@";
final List<String> nameParts = Arrays.asList(f.getName(true).split("::"));
Collections.reverse(nameParts);
final DataType ret = f.getReturnType();
final DataType[] args = Arrays.stream(f.getSignature(true)
.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
// Special methods overriding first part of name
if (
f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2
) {
final String unqualified = nameParts.get(0);
final String clsName = nameParts.get(1);
if (unqualified.equals(clsName)) // Constructor
return "?" + name.replaceFirst(unqualified + "@", "?0") +
"QAE@PA" + mangleArgs(args, dict) + "Z";
else if (unqualified.equals("~" + clsName)) // Destructor
return "?" + name.replaceFirst(unqualified + "@", "?1") +
(isVirtual(f) ? "UAE" : "QAE") + "@XZ";
else if (unqualified.equals("`scalar_deleting_destructor'")) // What it says
return "?" + name.replaceFirst(unqualified + "@", "?_G") +
"UAEPAXI@Z";
}
return "?" + name + fnAttrs(f) + mangleType(ret, dict) +
mangleArgs(args, dict) + "Z";
}
private static void mangleData(final Symbol s) throws Exception {
/* Set the data symbol's name to its mangled version */
throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
}
private static String mangleIdentifier(
final String ident,
final List<String> dict
) {
/* Mangle a fully qualified identifier
Identifiers like X::Y::Z are mangled with names in reverse order each
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary).
*/
// Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Apply any backreferences and combine together
return parts.stream()
.map(s -> backref(s, dict).orElse(s + "@"))
.reduce("", String::concat);
}
private static <T> Optional<String> backref(
final T x,
final List<T> dict
) {
/* Produce a backreference string if x is found in dict */
switch (Integer.valueOf(dict.indexOf(x))) {
case -1:
dict.add(x);
return Optional.empty();
case Integer ref:
return Optional.of(ref.toString());
}
}
private String fnAttrs(final Function f) throws Exception {
/* Produce attribute characters for the given function
Certain assumptions must be made about e.g. visibility, which is
integrated into these attributes but fully erased from the final
binary. Everything is assumed to be public, and static methods and
const-ness are not considered.
*/
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "YA";
case "__thiscall" -> isVirtual(f) ? "UAE" : "QAE";
case "__fastcall" -> throw new Exception("TODO: __fastcall");
case "__stdcall" -> throw new Exception("TODO: __stdcall");
default -> throw new Exception(
f.getName(true) +
"(): Need to specify calling convention"
);
};
}
private boolean isVirtual(final Function f) {
/* Attempt to determine whether a method is virtual
We essentially try to figure out if any references are from a vtable
by checking if they lie in non-executable memory, or from a scalar
deleting destructor.
*/
final Reference[] refs = getReferencesTo(f.getEntryPoint());
for (int i = 0; i < refs.length; i++) {
final Address addr = refs[i].getFromAddress();
final String caller = getFunctionContaining(addr).getName(false);
if (
!getMemoryBlock(addr).isExecute() ||
caller.equals("`scalar_deleting_destructor'") ||
caller.startsWith("??_G") // From mangled name
) return true;
}
return false;
}
private static String mangleType(
final DataType t,
final List<String> dict
) {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
case CharDataType _ -> "D"; // Must come after its child types
case ShortDataType _ -> "F";
case UnsignedShortDataType _ -> "G";
case IntegerDataType _ -> "H";
case UnsignedIntegerDataType _ -> "I";
case LongDataType _ -> "J";
case UnsignedLongDataType _ -> "K";
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "PA" + mangleType(p.getDataType(), dict);
case Union u -> "T" + mangleIdentifier(u.getName(), dict) + "@";
case Structure s -> "U" + mangleIdentifier(s.getName(), dict) + "@";
case Enum e -> "W4" + mangleIdentifier(e.getName(), dict) + "@";
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case FunctionDefinition f -> "!!TODO " + f.getPrototypeString(true) + "!!";
case TypeDef d -> mangleType(d.getBaseDataType(), dict);
default -> "!!UNKNOWN " + t.getName() + "!!";
};
}
private static String mangleArgs(
final DataType[] args,
final List<String> dict
) {
/* Mangle the arguments for a function */
final ArrayList<DataType> argDict = new ArrayList<>();
return args.length == 0 ?
"X" :
Arrays.stream(args)
.map(a -> backref(a, argDict).orElse(mangleType(a, dict)))
.reduce("", String::concat) + "@";
}
private static void makeGlobal(final Namespace ns) throws Exception {
/* Move into the global namespace */
// I cannot for the life of me find a more convenient way of
// doing this
while (!ns.getParentNamespace().isGlobal())
ns.setParentNamespace(ns.getParentNamespace()
.getParentNamespace());
}
}