Mostly finish function name mangling

Still need to do function pointers.
This commit is contained in:
KeybadeBlox 2026-02-07 22:08:02 -05:00
parent 76e39fdd27
commit 576a60d331

View file

@ -1,25 +1,49 @@
// Applies Visual C++ 7.0 name mangling to the symbols within the selected // Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected). // address range (or the whole program if nothing is selected).
// //
// Be aware that the mangling implementation is only partial.
//
// @category Symbol // @category Symbol
import ghidra.app.script.GhidraScript; import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Data; import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function; import ghidra.program.model.listing.Function;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType; import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum; import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.FunctionDefinition;
import ghidra.program.model.data.IntegerDataType; import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
import ghidra.program.model.data.LongLongDataType;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
import ghidra.program.model.data.UnsignedIntegerDataType;
import ghidra.program.model.data.UnsignedLongDataType;
import ghidra.program.model.data.UnsignedLongLongDataType;
import ghidra.program.model.data.UnsignedShortDataType;
import ghidra.program.model.data.VoidDataType; import ghidra.program.model.data.VoidDataType;
import ghidra.program.model.data.WideCharDataType;
import ghidra.program.model.symbol.Namespace;
import ghidra.program.model.symbol.Reference; import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.SourceType; import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.Symbol; import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolIterator; import ghidra.program.model.symbol.SymbolIterator;
import java.util.Arrays; import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
public class MSVC7Mangle extends GhidraScript{ public class MSVC7Mangle extends GhidraScript{
@ -32,80 +56,194 @@ public class MSVC7Mangle extends GhidraScript{
final Symbol s = iter.next(); final Symbol s = iter.next();
switch (s.getObject()) { switch (s.getObject()) {
case Function f -> demangleFn(f); case Function f -> mangleFn(f);
case Data d -> demangleData(s); case Data _ -> mangleData(s);
default -> {} default -> {}
} }
} }
} }
private void demangleFn(final Function f) throws Exception { private void mangleFn(final Function f) throws Exception {
// Gather everything needed for mangling /* Set the function's name to its mangled version */
final List<String> name = Arrays.asList(f.getName(true) if (f.getName().charAt(0) == '?') return; // Already mangled
.split("::")); else {
Collections.reverse(name); f.setName(mangledFnName(f), SourceType.USER_DEFINED);
final String callc = f.getCallingConventionName(); makeGlobal(f);
final DataType ret = f.getReturnType(); }
final DataType[] args = Arrays.stream(f.getSignature(true)
.getArguments())
.map(x -> x.getDataType())
.toArray(DataType[]::new);
// Construct mangled name
final String mangled =
"?" + String.join("@", name) + "@@" +
switch (callc) {
case "__cdecl" -> "YA";
case "__thiscall" -> isVirtual(f) ? "UAE" :
"QAE";
case "__fastcall" -> ""; // TODO
default -> throw new Exception(
"Need to specify calling convention"
);
} +
mangleType(ret) +
mangleArgs(args) +
"Z";
f.setName(mangled, SourceType.USER_DEFINED);
} }
private void demangleData(final Symbol s) { private String mangledFnName(final Function f) throws Exception {
// TODO /* Produce the mangled name for a function */
printf("TODO: data symbol \"%s\"\n", s.getName(true)); final ArrayList<String> dict = new ArrayList<>();
final String name = mangleIdentifier(f.getName(true), dict) + "@";
final List<String> nameParts = Arrays.asList(f.getName(true).split("::"));
Collections.reverse(nameParts);
final DataType ret = f.getReturnType();
final DataType[] args = Arrays.stream(f.getSignature(true)
.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
// Special methods overriding first part of name
if (
f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2
) {
final String unqualified = nameParts.get(0);
final String clsName = nameParts.get(1);
if (unqualified.equals(clsName)) // Constructor
return "?" + name.replaceFirst(unqualified + "@", "?0") +
"QAE@PA" + mangleArgs(args, dict) + "Z";
else if (unqualified.equals("~" + clsName)) // Destructor
return "?" + name.replaceFirst(unqualified + "@", "?1") +
(isVirtual(f) ? "UAE" : "QAE") + "@XZ";
else if (unqualified.equals("`scalar_deleting_destructor'")) // What it says
return "?" + name.replaceFirst(unqualified + "@", "?_G") +
"UAEPAXI@Z";
}
return "?" + name + fnAttrs(f) + mangleType(ret, dict) +
mangleArgs(args, dict) + "Z";
} }
private boolean isVirtual(final Function f) { private static void mangleData(final Symbol s) throws Exception {
/* Attempt to determine whether a method is virtual /* Set the data symbol's name to its mangled version */
We essentially try to figure out if any references are from a vtable. throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
}
private static String mangleIdentifier(
final String ident,
final List<String> dict
) {
/* Mangle a fully qualified identifier
Identifiers like X::Y::Z are mangled with names in reverse order each
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary).
*/ */
final Reference[] refs = getReferencesTo(f.getEntryPoint()); // Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// TODO // Apply any backreferences and combine together
return parts.stream()
return false; .map(s -> backref(s, dict).orElse(s + "@"))
.reduce("", String::concat);
} }
private String mangleType(final DataType t) throws Exception { private static <T> Optional<String> backref(
/* Mangle a data type in a function name */ final T x,
return switch(t) { final List<T> dict
case Enum e -> "W4" + e.getName() + "@@"; ) {
case IntegerDataType x -> "H"; /* Produce a backreference string if x is found in dict */
case VoidDataType x -> "X"; switch (Integer.valueOf(dict.indexOf(x))) {
default -> throw new Exception( case -1:
"Unhandled data type \"" + t.toString() + "\"" dict.add(x);
return Optional.empty();
case Integer ref:
return Optional.of(ref.toString());
}
}
private String fnAttrs(final Function f) throws Exception {
/* Produce attribute characters for the given function
Certain assumptions must be made about e.g. visibility, which is
integrated into these attributes but fully erased from the final
binary. Everything is assumed to be public, and static methods and
const-ness are not considered.
*/
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "YA";
case "__thiscall" -> isVirtual(f) ? "UAE" : "QAE";
case "__fastcall" -> throw new Exception("TODO: __fastcall");
case "__stdcall" -> throw new Exception("TODO: __stdcall");
default -> throw new Exception(
f.getName(true) +
"(): Need to specify calling convention"
); );
}; };
} }
private String mangleArgs(final DataType[] args) throws Exception { private boolean isVirtual(final Function f) {
/* Mangle the arguments for a function */ /* Attempt to determine whether a method is virtual
if (args.length == 0) return "X"; We essentially try to figure out if any references are from a vtable
else { by checking if they lie in non-executable memory, or from a scalar
String encoded = ""; deleting destructor.
for (int i = 0; i < args.length; i++) */
encoded += mangleType(args[i]); final Reference[] refs = getReferencesTo(f.getEntryPoint());
return encoded + "@"; for (int i = 0; i < refs.length; i++) {
final Address addr = refs[i].getFromAddress();
final String caller = getFunctionContaining(addr).getName(false);
if (
!getMemoryBlock(addr).isExecute() ||
caller.equals("`scalar_deleting_destructor'") ||
caller.startsWith("??_G") // From mangled name
) return true;
} }
return false;
}
private static String mangleType(
final DataType t,
final List<String> dict
) {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
case CharDataType _ -> "D"; // Must come after its child types
case ShortDataType _ -> "F";
case UnsignedShortDataType _ -> "G";
case IntegerDataType _ -> "H";
case UnsignedIntegerDataType _ -> "I";
case LongDataType _ -> "J";
case UnsignedLongDataType _ -> "K";
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "PA" + mangleType(p.getDataType(), dict);
case Union u -> "T" + mangleIdentifier(u.getName(), dict) + "@";
case Structure s -> "U" + mangleIdentifier(s.getName(), dict) + "@";
case Enum e -> "W4" + mangleIdentifier(e.getName(), dict) + "@";
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case FunctionDefinition f -> "!!TODO " + f.getPrototypeString(true) + "!!";
case TypeDef d -> mangleType(d.getBaseDataType(), dict);
default -> "!!UNKNOWN " + t.getName() + "!!";
};
}
private static String mangleArgs(
final DataType[] args,
final List<String> dict
) {
/* Mangle the arguments for a function */
final ArrayList<DataType> argDict = new ArrayList<>();
return args.length == 0 ?
"X" :
Arrays.stream(args)
.map(a -> backref(a, argDict).orElse(mangleType(a, dict)))
.reduce("", String::concat) + "@";
}
private static void makeGlobal(final Namespace ns) throws Exception {
/* Move into the global namespace */
// I cannot for the life of me find a more convenient way of
// doing this
while (!ns.getParentNamespace().isGlobal())
ns.setParentNamespace(ns.getParentNamespace()
.getParentNamespace());
} }
} }