Finish function demangling

Data demangling should be much easier.
This commit is contained in:
KeybadeBlox 2026-02-10 00:06:29 -05:00
parent f193fef3ce
commit febf9fc172

View file

@ -1,19 +1,29 @@
// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// The implementation is missing a few obscure corners but mostly complete.
// Keep in mind that certain qualities that aren't visible to Ghidra, like
// visibility or CV qualifiers, will always be assumed to be their most
// permissive form (public, non-const, etc.).
//
// Special symbol names like "operator new" or "scalar deleting destructor"
// are given unique mangling. To properly demangle these, name them as they
// appear in objdiff, replacing spaces with underscores, e.g. "operator_new"
// and "`scalar_deleting_destructor'" (notice the ` and ').
//
// @category Symbol
import ghidra.app.script.GhidraScript;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.FunctionDefinition;
import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
@ -55,67 +65,54 @@ public class MSVC7Mangle extends GhidraScript{
while (iter.hasNext() && !monitor.isCancelled()) {
final Symbol s = iter.next();
switch (s.getObject()) {
case Function f -> mangleFn(f);
// Skip if already mangled
if (s.getName().charAt(0) == '?') return;
// Get mangled name
final String mangled = switch (s.getObject()) {
case Function f -> mangleFn (f);
case Data _ -> mangleData(s);
default -> {}
default -> null;
};
// Apply new name
if (mangled != null) {
s.setName(mangled, SourceType.USER_DEFINED);
makeGlobal(s);
}
}
}
private void mangleFn(final Function f) throws Exception {
private String mangleFn(final Function f) throws Exception {
/* Set the function's name to its mangled version */
if (f.getName().charAt(0) == '?') return; // Already mangled
else {
f.setName(mangledFnName(f), SourceType.USER_DEFINED);
makeGlobal(f);
}
}
private String mangledFnName(final Function f) throws Exception {
/* Produce the mangled name for a function */
final ArrayList<String> dict = new ArrayList<>();
final String name = mangleIdentifier(f.getName(true), dict) + "@";
final List<String> nameParts = Arrays.asList(f.getName(true).split("::"));
Collections.reverse(nameParts);
final DataType ret = f.getReturnType();
final DataType[] args = Arrays.stream(f.getSignature(true)
.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
// Special methods overriding first part of name
if (
f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2
) {
final String unqualified = nameParts.get(0);
final boolean isMethod = f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2;
final String name = mangleIdentifier(f.getName(true), isMethod, f.getReturnType(), dict);
// Special methods with unique formats
if (isMethod) {
final String clsName = nameParts.get(1);
if (unqualified.equals(clsName)) // Constructor
return "?" + name.replaceFirst(unqualified + "@", "?0") +
"QAE@PA" + mangleArgs(args, dict) + "Z";
else if (unqualified.equals("~" + clsName)) // Destructor
return "?" + name.replaceFirst(unqualified + "@", "?1") +
(isVirtual(f) ? "UAE" : "QAE") + "@XZ";
else if (unqualified.equals("`scalar_deleting_destructor'")) // What it says
return "?" + name.replaceFirst(unqualified + "@", "?_G") +
"UAEPAXI@Z";
if (unqualified.equals( clsName)) { // Constructor
return "?" + name + "QAE@" + mangleArgs(f.getSignature(true), dict) + "Z";
} else if (unqualified.equals("~" + clsName)) { // Destructor
return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ";
}
}
return "?" + name + fnAttrs(f) + mangleType(ret, dict) +
mangleArgs(args, dict) + "Z";
}
private static void mangleData(final Symbol s) throws Exception {
/* Set the data symbol's name to its mangled version */
throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
return "?" + name + mangleFnAttrs(f, nameParts) +
mangleFnType(f.getSignature(true), dict);
}
private static String mangleIdentifier(
final String ident,
final boolean isMethod,
final DataType retType, // Function return type, nullable
final List<String> dict
) {
/* Mangle a fully qualified identifier
@ -129,10 +126,76 @@ public class MSVC7Mangle extends GhidraScript{
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Non-method special names
parts.set(0, switch (parts.get(0)) {
case "operator_new" -> "?2";
case "operator_delete" -> "?3";
case "operator_new[]" -> "?_U";
case "operator_delete[]" -> "?_V";
default -> parts.get(0);
});
// Method special names
if (isMethod) {
final String clsName = parts.get(1);
parts.set(0, switch (parts.get(0)) {
// Definitely some cases missing
case "operator_=" -> "?4";
case "operator_>>" -> "?5";
case "operator_<<" -> "?6";
case "operator_!" -> "?7";
case "operator_==" -> "?8";
case "operator_!=" -> "?9";
case "operator_[]" -> "?A";
case "operator_->" -> "?C";
case "operator_*" -> "?D";
case "operator_++" -> "?E";
case "operator_--" -> "?F";
case "operator_-" -> "?G";
case "operator_+" -> "?H";
case "operator_&" -> "?I";
case "operator_->*" -> "?J";
case "operator_/" -> "?K";
case "operator_%" -> "?L";
case "operator_<" -> "?M";
case "operator_<=" -> "?N";
case "operator_>" -> "?O";
case "operator_>=" -> "?P";
case "operator_," -> "?Q";
case "operator_()" -> "?R";
case "operator_~" -> "?S";
case "operator_^" -> "?T";
case "operator_|" -> "?U";
case "operator_&&" -> "?V";
case "operator_||" -> "?W";
case "operator_*=" -> "?X";
case "operator_+=" -> "?Y";
case "operator_-=" -> "?Z";
case "operator_/=" -> "?_0";
case "operator_%=" -> "?_1";
case "operator_>>=" -> "?_2";
case "operator_<<=" -> "?_3";
case "operator_&=" -> "?_4";
case "operator_|=" -> "?_5";
case "operator_^=" -> "?_6";
case "`scalar_deleting_destructor'" -> "?_G";
default ->
parts.get(0).equals( clsName) ? "?0" :
parts.get(0).equals("~" + clsName) ? "?1" :
retType != null && // Feeble attempt at user-defined conversions
parts.get(0).equals(
"operator_" +
retType.getName()
.replace(" ", "")
) ? "?B" :
parts.get(0);
});
}
// Apply any backreferences and combine together
return parts.stream()
.map(s -> backref(s, dict).orElse(s + "@"))
.reduce("", String::concat);
.map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@")))
.reduce("", String::concat) + "@";
}
private static <T> Optional<String> backref(
@ -149,22 +212,14 @@ public class MSVC7Mangle extends GhidraScript{
}
}
private String fnAttrs(final Function f) throws Exception {
/* Produce attribute characters for the given function
Certain assumptions must be made about e.g. visibility, which is
integrated into these attributes but fully erased from the final
binary. Everything is assumed to be public, and static methods and
const-ness are not considered.
*/
private String mangleFnAttrs(
final Function f,
final List<String> name
) {
/* Produce a string for a function's visibility and linkage */
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "YA";
case "__thiscall" -> isVirtual(f) ? "UAE" : "QAE";
case "__fastcall" -> throw new Exception("TODO: __fastcall");
case "__stdcall" -> throw new Exception("TODO: __stdcall");
default -> throw new Exception(
f.getName(true) +
"(): Need to specify calling convention"
);
case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method
default -> isStatic(name) ? "S" : "Y";
};
}
@ -177,22 +232,60 @@ public class MSVC7Mangle extends GhidraScript{
final Reference[] refs = getReferencesTo(f.getEntryPoint());
for (int i = 0; i < refs.length; i++) {
final Address addr = refs[i].getFromAddress();
final String caller = getFunctionContaining(addr).getName(false);
final Optional<String> caller = Optional.ofNullable(getFunctionContaining(addr))
.map(x -> x.getName(false));
if (
!getMemoryBlock(addr).isExecute() ||
caller.equals("`scalar_deleting_destructor'") ||
caller.startsWith("??_G") // From mangled name
caller.map(x -> x.equals("`scalar_deleting_destructor'"))
.orElse(false) ||
caller.map(x -> x.startsWith("??_G")) // From mangled name
.orElse(false)
) return true;
}
return false;
}
private static boolean isStatic(final List<String> name) {
/* Determines whether a function is static from its name
Everything is normally assumed non-static, but certain methods are
automatically made static.
*/
return name.size() > 1 && Arrays.asList(
"operator_new" , "operator_new[]",
"operator_delete", "operator_delete[]"
).contains(name.get(0));
}
private static String mangleFnType(
final FunctionSignature f,
final List<String> dict
) throws Exception {
/* Mangle everything in f but its name and visibility/linkage */
return mangleCallC(f) + mangleType(f.getReturnType(), dict) +
mangleArgs(f, dict) + "Z";
}
private static String mangleCallC(final FunctionSignature f) throws Exception {
/* Produce a string for a function's calling convention */
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "A";
case "__thiscall" -> "E";
case "__fastcall" -> "I";
case "__stdcall" -> "G";
default -> throw new Exception(
f.getName() +
"(): Need to specify calling convention"
);
};
}
private static String mangleType(
final DataType t,
final List<String> dict
) {
) throws Exception {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
@ -209,41 +302,65 @@ public class MSVC7Mangle extends GhidraScript{
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "PA" + mangleType(p.getDataType(), dict);
case Union u -> "T" + mangleIdentifier(u.getName(), dict) + "@";
case Structure s -> "U" + mangleIdentifier(s.getName(), dict) + "@";
case Enum e -> "W4" + mangleIdentifier(e.getName(), dict) + "@";
case Pointer p -> "P" +
(p.getDataType() instanceof FunctionSignature ? "6" : "A") +
mangleType(p.getDataType(), dict);
case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict);
case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict);
case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict);
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case FunctionDefinition f -> "!!TODO " + f.getPrototypeString(true) + "!!";
case FunctionSignature f -> mangleFnType(f, dict);
case TypeDef d -> mangleType(d.getBaseDataType(), dict);
default -> "!!UNKNOWN " + t.getName() + "!!";
default -> throw new Exception ("Unknown type \"" + t.getName() + "\"");
};
}
private static String mangleArgs(
final DataType[] args,
final FunctionSignature f,
final List<String> dict
) {
) throws Exception {
/* Mangle the arguments for a function */
final DataType[] args = Arrays.stream(f.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
final ArrayList<DataType> argDict = new ArrayList<>();
return args.length == 0 ?
"X" :
Arrays.stream(args)
.map(a -> backref(a, argDict).orElse(mangleType(a, dict)))
.reduce("", String::concat) + "@";
if (args.length == 0) return "X";
else {
// I try to be more expression-oriented, but not being
// able to throw in lambdas, not having an error sum
// type, and not having applicative functors would
// means that using .stream().map().reduce() would
// require me to write stuff like
// (s1, s2) -> s1.flatMap(s -> s2.map(s + s2))
// (i.e. substituting applicative for monad + functor)
// while also having much worse UX for errors
//
// It turns out that academic-sounding stuff everyone
// freaks out at is actually useful (and Optional still
// helped us out here)
String mangled = "";
for (int i = 0; i < args.length; i++)
mangled += backref(args[i], argDict).orElse(mangleType(args[i], dict));
return mangled + "@";
}
}
private static void makeGlobal(final Namespace ns) throws Exception {
private static String mangleData(final Symbol s) throws Exception {
/* Set the data symbol's name to its mangled version */
throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
}
private static void makeGlobal(final Symbol s) throws Exception {
/* Move into the global namespace */
// I cannot for the life of me find a more convenient way of
// doing this
while (!ns.getParentNamespace().isGlobal())
ns.setParentNamespace(ns.getParentNamespace()
while (!s.isGlobal()) s.setNamespace(s.getParentNamespace()
.getParentNamespace());
}
}