diff --git a/ghidra/ghidra_scripts/MSVC7Mangle.java b/ghidra/ghidra_scripts/MSVC7Mangle.java index 0675d24..5c3e6b2 100644 --- a/ghidra/ghidra_scripts/MSVC7Mangle.java +++ b/ghidra/ghidra_scripts/MSVC7Mangle.java @@ -1,19 +1,29 @@ // Applies Visual C++ 7.0 name mangling to the symbols within the selected // address range (or the whole program if nothing is selected). // +// The implementation is missing a few obscure corners but mostly complete. +// Keep in mind that certain qualities that aren't visible to Ghidra, like +// visibility or CV qualifiers, will always be assumed to be their most +// permissive form (public, non-const, etc.). +// +// Special symbol names like "operator new" or "scalar deleting destructor" +// are given unique mangling. To properly demangle these, name them as they +// appear in objdiff, replacing spaces with underscores, e.g. "operator_new" +// and "`scalar_deleting_destructor'" (notice the ` and '). +// // @category Symbol import ghidra.app.script.GhidraScript; import ghidra.program.model.address.Address; import ghidra.program.model.listing.Data; import ghidra.program.model.listing.Function; +import ghidra.program.model.listing.FunctionSignature; import ghidra.program.model.data.BooleanDataType; import ghidra.program.model.data.CharDataType; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DoubleDataType; import ghidra.program.model.data.Enum; import ghidra.program.model.data.FloatDataType; -import ghidra.program.model.data.FunctionDefinition; import ghidra.program.model.data.IntegerDataType; import ghidra.program.model.data.LongDataType; import ghidra.program.model.data.LongDoubleDataType; @@ -55,67 +65,54 @@ public class MSVC7Mangle extends GhidraScript{ while (iter.hasNext() && !monitor.isCancelled()) { final Symbol s = iter.next(); - switch (s.getObject()) { - case Function f -> mangleFn(f); + // Skip if already mangled + if (s.getName().charAt(0) == '?') return; + + // Get mangled name + final String mangled = switch (s.getObject()) { + case Function f -> mangleFn (f); case Data _ -> mangleData(s); - default -> {} + default -> null; + }; + + // Apply new name + if (mangled != null) { + s.setName(mangled, SourceType.USER_DEFINED); + makeGlobal(s); } } } - private void mangleFn(final Function f) throws Exception { + private String mangleFn(final Function f) throws Exception { /* Set the function's name to its mangled version */ - if (f.getName().charAt(0) == '?') return; // Already mangled - else { - f.setName(mangledFnName(f), SourceType.USER_DEFINED); - makeGlobal(f); - } - } - - private String mangledFnName(final Function f) throws Exception { - /* Produce the mangled name for a function */ final ArrayList dict = new ArrayList<>(); - final String name = mangleIdentifier(f.getName(true), dict) + "@"; - final List nameParts = Arrays.asList(f.getName(true).split("::")); + final List nameParts = Arrays.asList(f.getName(true).split("::")); Collections.reverse(nameParts); + final String unqualified = nameParts.get(0); + final boolean isMethod = f.getCallingConventionName().equals("__thiscall") && + nameParts.size() >= 2; + final String name = mangleIdentifier(f.getName(true), isMethod, f.getReturnType(), dict); - final DataType ret = f.getReturnType(); - final DataType[] args = Arrays.stream(f.getSignature(true) - .getArguments()) - .map(ParameterDefinition::getDataType) - .toArray(DataType[]::new); + // Special methods with unique formats + if (isMethod) { + final String clsName = nameParts.get(1); - // Special methods overriding first part of name - if ( - f.getCallingConventionName().equals("__thiscall") && - nameParts.size() >= 2 - ) { - final String unqualified = nameParts.get(0); - final String clsName = nameParts.get(1); - - if (unqualified.equals(clsName)) // Constructor - return "?" + name.replaceFirst(unqualified + "@", "?0") + - "QAE@PA" + mangleArgs(args, dict) + "Z"; - else if (unqualified.equals("~" + clsName)) // Destructor - return "?" + name.replaceFirst(unqualified + "@", "?1") + - (isVirtual(f) ? "UAE" : "QAE") + "@XZ"; - else if (unqualified.equals("`scalar_deleting_destructor'")) // What it says - return "?" + name.replaceFirst(unqualified + "@", "?_G") + - "UAEPAXI@Z"; + if (unqualified.equals( clsName)) { // Constructor + return "?" + name + "QAE@" + mangleArgs(f.getSignature(true), dict) + "Z"; + } else if (unqualified.equals("~" + clsName)) { // Destructor + return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ"; + } } - return "?" + name + fnAttrs(f) + mangleType(ret, dict) + - mangleArgs(args, dict) + "Z"; - } - - private static void mangleData(final Symbol s) throws Exception { - /* Set the data symbol's name to its mangled version */ - throw new Exception("TODO: data symbol \"" + s.getName(true) + "\""); + return "?" + name + mangleFnAttrs(f, nameParts) + + mangleFnType(f.getSignature(true), dict); } private static String mangleIdentifier( final String ident, + final boolean isMethod, + final DataType retType, // Function return type, nullable final List dict ) { /* Mangle a fully qualified identifier @@ -129,10 +126,76 @@ public class MSVC7Mangle extends GhidraScript{ final List parts = Arrays.asList(ident.split("::")); Collections.reverse(parts); + // Non-method special names + parts.set(0, switch (parts.get(0)) { + case "operator_new" -> "?2"; + case "operator_delete" -> "?3"; + case "operator_new[]" -> "?_U"; + case "operator_delete[]" -> "?_V"; + default -> parts.get(0); + }); + + // Method special names + if (isMethod) { + final String clsName = parts.get(1); + parts.set(0, switch (parts.get(0)) { + // Definitely some cases missing + case "operator_=" -> "?4"; + case "operator_>>" -> "?5"; + case "operator_<<" -> "?6"; + case "operator_!" -> "?7"; + case "operator_==" -> "?8"; + case "operator_!=" -> "?9"; + case "operator_[]" -> "?A"; + case "operator_->" -> "?C"; + case "operator_*" -> "?D"; + case "operator_++" -> "?E"; + case "operator_--" -> "?F"; + case "operator_-" -> "?G"; + case "operator_+" -> "?H"; + case "operator_&" -> "?I"; + case "operator_->*" -> "?J"; + case "operator_/" -> "?K"; + case "operator_%" -> "?L"; + case "operator_<" -> "?M"; + case "operator_<=" -> "?N"; + case "operator_>" -> "?O"; + case "operator_>=" -> "?P"; + case "operator_," -> "?Q"; + case "operator_()" -> "?R"; + case "operator_~" -> "?S"; + case "operator_^" -> "?T"; + case "operator_|" -> "?U"; + case "operator_&&" -> "?V"; + case "operator_||" -> "?W"; + case "operator_*=" -> "?X"; + case "operator_+=" -> "?Y"; + case "operator_-=" -> "?Z"; + case "operator_/=" -> "?_0"; + case "operator_%=" -> "?_1"; + case "operator_>>=" -> "?_2"; + case "operator_<<=" -> "?_3"; + case "operator_&=" -> "?_4"; + case "operator_|=" -> "?_5"; + case "operator_^=" -> "?_6"; + case "`scalar_deleting_destructor'" -> "?_G"; + default -> + parts.get(0).equals( clsName) ? "?0" : + parts.get(0).equals("~" + clsName) ? "?1" : + retType != null && // Feeble attempt at user-defined conversions + parts.get(0).equals( + "operator_" + + retType.getName() + .replace(" ", "") + ) ? "?B" : + parts.get(0); + }); + } + // Apply any backreferences and combine together return parts.stream() - .map(s -> backref(s, dict).orElse(s + "@")) - .reduce("", String::concat); + .map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@"))) + .reduce("", String::concat) + "@"; } private static Optional backref( @@ -149,22 +212,14 @@ public class MSVC7Mangle extends GhidraScript{ } } - private String fnAttrs(final Function f) throws Exception { - /* Produce attribute characters for the given function - Certain assumptions must be made about e.g. visibility, which is - integrated into these attributes but fully erased from the final - binary. Everything is assumed to be public, and static methods and - const-ness are not considered. - */ + private String mangleFnAttrs( + final Function f, + final List name + ) { + /* Produce a string for a function's visibility and linkage */ return switch (f.getCallingConventionName()) { - case "__cdecl" -> "YA"; - case "__thiscall" -> isVirtual(f) ? "UAE" : "QAE"; - case "__fastcall" -> throw new Exception("TODO: __fastcall"); - case "__stdcall" -> throw new Exception("TODO: __stdcall"); - default -> throw new Exception( - f.getName(true) + - "(): Need to specify calling convention" - ); + case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method + default -> isStatic(name) ? "S" : "Y"; }; } @@ -176,23 +231,61 @@ public class MSVC7Mangle extends GhidraScript{ */ final Reference[] refs = getReferencesTo(f.getEntryPoint()); for (int i = 0; i < refs.length; i++) { - final Address addr = refs[i].getFromAddress(); - final String caller = getFunctionContaining(addr).getName(false); + final Address addr = refs[i].getFromAddress(); + final Optional caller = Optional.ofNullable(getFunctionContaining(addr)) + .map(x -> x.getName(false)); if ( - !getMemoryBlock(addr).isExecute() || - caller.equals("`scalar_deleting_destructor'") || - caller.startsWith("??_G") // From mangled name + !getMemoryBlock(addr).isExecute() || + caller.map(x -> x.equals("`scalar_deleting_destructor'")) + .orElse(false) || + caller.map(x -> x.startsWith("??_G")) // From mangled name + .orElse(false) ) return true; } return false; } + private static boolean isStatic(final List name) { + /* Determines whether a function is static from its name + Everything is normally assumed non-static, but certain methods are + automatically made static. + */ + return name.size() > 1 && Arrays.asList( + "operator_new" , "operator_new[]", + "operator_delete", "operator_delete[]" + ).contains(name.get(0)); + } + + private static String mangleFnType( + final FunctionSignature f, + final List dict + ) throws Exception { + /* Mangle everything in f but its name and visibility/linkage */ + + return mangleCallC(f) + mangleType(f.getReturnType(), dict) + + mangleArgs(f, dict) + "Z"; + } + + private static String mangleCallC(final FunctionSignature f) throws Exception { + /* Produce a string for a function's calling convention */ + return switch (f.getCallingConventionName()) { + case "__cdecl" -> "A"; + case "__thiscall" -> "E"; + case "__fastcall" -> "I"; + case "__stdcall" -> "G"; + default -> throw new Exception( + f.getName() + + "(): Need to specify calling convention" + ); + }; + } + private static String mangleType( final DataType t, final List dict - ) { + ) throws Exception { /* Mangle a data type in a function name All types are assumed to have no CV qualifiers. */ @@ -209,41 +302,65 @@ public class MSVC7Mangle extends GhidraScript{ case FloatDataType _ -> "M"; case DoubleDataType _ -> "N"; case LongDoubleDataType _ -> "O"; - case Pointer p -> "PA" + mangleType(p.getDataType(), dict); - case Union u -> "T" + mangleIdentifier(u.getName(), dict) + "@"; - case Structure s -> "U" + mangleIdentifier(s.getName(), dict) + "@"; - case Enum e -> "W4" + mangleIdentifier(e.getName(), dict) + "@"; + case Pointer p -> "P" + + (p.getDataType() instanceof FunctionSignature ? "6" : "A") + + mangleType(p.getDataType(), dict); + case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict); + case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict); + case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict); case VoidDataType _ -> "X"; case LongLongDataType _ -> "_J"; case UnsignedLongLongDataType _ -> "_K"; case BooleanDataType _ -> "_N"; case WideCharDataType _ -> "_W"; - case FunctionDefinition f -> "!!TODO " + f.getPrototypeString(true) + "!!"; + case FunctionSignature f -> mangleFnType(f, dict); case TypeDef d -> mangleType(d.getBaseDataType(), dict); - default -> "!!UNKNOWN " + t.getName() + "!!"; + default -> throw new Exception ("Unknown type \"" + t.getName() + "\""); }; } private static String mangleArgs( - final DataType[] args, - final List dict - ) { + final FunctionSignature f, + final List dict + ) throws Exception { /* Mangle the arguments for a function */ + final DataType[] args = Arrays.stream(f.getArguments()) + .map(ParameterDefinition::getDataType) + .toArray(DataType[]::new); + final ArrayList argDict = new ArrayList<>(); - return args.length == 0 ? - "X" : - Arrays.stream(args) - .map(a -> backref(a, argDict).orElse(mangleType(a, dict))) - .reduce("", String::concat) + "@"; + if (args.length == 0) return "X"; + else { + // I try to be more expression-oriented, but not being + // able to throw in lambdas, not having an error sum + // type, and not having applicative functors would + // means that using .stream().map().reduce() would + // require me to write stuff like + // (s1, s2) -> s1.flatMap(s -> s2.map(s + s2)) + // (i.e. substituting applicative for monad + functor) + // while also having much worse UX for errors + // + // It turns out that academic-sounding stuff everyone + // freaks out at is actually useful (and Optional still + // helped us out here) + String mangled = ""; + for (int i = 0; i < args.length; i++) + mangled += backref(args[i], argDict).orElse(mangleType(args[i], dict)); + return mangled + "@"; + } } - private static void makeGlobal(final Namespace ns) throws Exception { + private static String mangleData(final Symbol s) throws Exception { + /* Set the data symbol's name to its mangled version */ + throw new Exception("TODO: data symbol \"" + s.getName(true) + "\""); + } + + private static void makeGlobal(final Symbol s) throws Exception { /* Move into the global namespace */ // I cannot for the life of me find a more convenient way of // doing this - while (!ns.getParentNamespace().isGlobal()) - ns.setParentNamespace(ns.getParentNamespace() - .getParentNamespace()); + while (!s.isGlobal()) s.setNamespace(s.getParentNamespace() + .getParentNamespace()); } }