From e9b0c3c6bcaee5e89114a1099b555f7a9245f262 Mon Sep 17 00:00:00 2001 From: KeybadeBlox Date: Wed, 11 Feb 2026 00:46:33 -0500 Subject: [PATCH] Complete mangler script Now to hook it up to the delinker script. (I totally accidentally said "demangling" in the last commit) --- ghidra/ghidra_scripts/MSVC7Mangle.java | 178 +++++++++++++++++++++++-- 1 file changed, 164 insertions(+), 14 deletions(-) diff --git a/ghidra/ghidra_scripts/MSVC7Mangle.java b/ghidra/ghidra_scripts/MSVC7Mangle.java index 5c3e6b2..ff13d16 100644 --- a/ghidra/ghidra_scripts/MSVC7Mangle.java +++ b/ghidra/ghidra_scripts/MSVC7Mangle.java @@ -1,7 +1,7 @@ // Applies Visual C++ 7.0 name mangling to the symbols within the selected // address range (or the whole program if nothing is selected). // -// The implementation is missing a few obscure corners but mostly complete. +// The implementation is missing a few obscure corners but pretty complete. // Keep in mind that certain qualities that aren't visible to Ghidra, like // visibility or CV qualifiers, will always be assumed to be their most // permissive form (public, non-const, etc.). @@ -18,9 +18,11 @@ import ghidra.program.model.address.Address; import ghidra.program.model.listing.Data; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.FunctionSignature; +import ghidra.program.model.data.Array; import ghidra.program.model.data.BooleanDataType; import ghidra.program.model.data.CharDataType; import ghidra.program.model.data.DataType; +import ghidra.program.model.data.DefaultDataType; import ghidra.program.model.data.DoubleDataType; import ghidra.program.model.data.Enum; import ghidra.program.model.data.FloatDataType; @@ -32,7 +34,9 @@ import ghidra.program.model.data.ParameterDefinition; import ghidra.program.model.data.Pointer; import ghidra.program.model.data.ShortDataType; import ghidra.program.model.data.SignedCharDataType; +import ghidra.program.model.data.StringDataInstance; import ghidra.program.model.data.Structure; +import ghidra.program.model.data.TerminatedUnicodeDataType; import ghidra.program.model.data.TypeDef; import ghidra.program.model.data.Union; import ghidra.program.model.data.UnsignedCharDataType; @@ -54,6 +58,8 @@ import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.zip.CRC32; public class MSVC7Mangle extends GhidraScript{ @@ -71,7 +77,7 @@ public class MSVC7Mangle extends GhidraScript{ // Get mangled name final String mangled = switch (s.getObject()) { case Function f -> mangleFn (f); - case Data _ -> mangleData(s); + case Data d -> mangleData(d, s.getName(true)); default -> null; }; @@ -84,19 +90,22 @@ public class MSVC7Mangle extends GhidraScript{ } private String mangleFn(final Function f) throws Exception { - /* Set the function's name to its mangled version */ + /* Generate a mangled name for a function */ + // Special case for main() + if (f.getName(true).equals("main")) return "_main"; + final ArrayList dict = new ArrayList<>(); final List nameParts = Arrays.asList(f.getName(true).split("::")); Collections.reverse(nameParts); - final String unqualified = nameParts.get(0); final boolean isMethod = f.getCallingConventionName().equals("__thiscall") && nameParts.size() >= 2; final String name = mangleIdentifier(f.getName(true), isMethod, f.getReturnType(), dict); // Special methods with unique formats if (isMethod) { - final String clsName = nameParts.get(1); + final String unqualified = nameParts.get(0); + final String clsName = nameParts.get(1); if (unqualified.equals( clsName)) { // Constructor return "?" + name + "QAE@" + mangleArgs(f.getSignature(true), dict) + "Z"; @@ -120,16 +129,21 @@ public class MSVC7Mangle extends GhidraScript{ terminated by '@', and the whole identifier is terminated by another '@', e.g. Z@Y@X@@. Previously encountered names are kept in a dictionary to turn repeated names into backreferences, e.g. X::Y::X - would become X@Y@0@ (if starting with an empty dictionary). + would become X@Y@0@ (if starting with an empty dictionary). Some + special symbols like constructors and operators also get special case + names. */ // Break up names into their mangled order final List parts = Arrays.asList(ident.split("::")); Collections.reverse(parts); // Non-method special names + // (definitely some cases missing from special names, but + // they're probably not too likely to encounter in Ghidra) parts.set(0, switch (parts.get(0)) { case "operator_new" -> "?2"; case "operator_delete" -> "?3"; + case "`vftable'" -> "?_7"; case "operator_new[]" -> "?_U"; case "operator_delete[]" -> "?_V"; default -> parts.get(0); @@ -139,7 +153,6 @@ public class MSVC7Mangle extends GhidraScript{ if (isMethod) { final String clsName = parts.get(1); parts.set(0, switch (parts.get(0)) { - // Definitely some cases missing case "operator_=" -> "?4"; case "operator_>>" -> "?5"; case "operator_<<" -> "?6"; @@ -193,8 +206,9 @@ public class MSVC7Mangle extends GhidraScript{ } // Apply any backreferences and combine together + // (special names don't get a @ terminator) return parts.stream() - .map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@"))) + .map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@"))) .reduce("", String::concat) + "@"; } @@ -218,8 +232,8 @@ public class MSVC7Mangle extends GhidraScript{ ) { /* Produce a string for a function's visibility and linkage */ return switch (f.getCallingConventionName()) { - case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method - default -> isStatic(name) ? "S" : "Y"; + case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method + default -> isStatic(name) ? "S" : "Y" ; }; } @@ -289,6 +303,12 @@ public class MSVC7Mangle extends GhidraScript{ /* Mangle a data type in a function name All types are assumed to have no CV qualifiers. */ + if (t == null) throw new Exception ( + "A data type was reported as null. Ensure that all " + + "data types in the demangled code/data have been " + + "defined." + ); + return switch(t) { case SignedCharDataType _ -> "C"; case UnsignedCharDataType _ -> "E"; @@ -313,12 +333,56 @@ public class MSVC7Mangle extends GhidraScript{ case UnsignedLongLongDataType _ -> "_K"; case BooleanDataType _ -> "_N"; case WideCharDataType _ -> "_W"; + case Array a -> "PA" + mangleArrDims(a) + mangleType(arrType(a), dict); case FunctionSignature f -> mangleFnType(f, dict); case TypeDef d -> mangleType(d.getBaseDataType(), dict); - default -> throw new Exception ("Unknown type \"" + t.getName() + "\""); + case DefaultDataType _ -> throw new Exception ("Encountered data marked \"undefined\". All data types must be defined."); + default -> throw new Exception ("Unknown type \"" + t.getClass().getName() + "\""); }; } + private static String mangleArrDims(final Array a) { + /* Produce a mangled string describing the dimensions of an array + Format is Y + # of dimensions + dimension 1 + dimension 2 + ... + The outermost dimension decays to a pointer, so it's not included and + 1D arrays produce an empty dimension string. + */ + final List dims = new ArrayList<>(); + DataType t = a.getDataType(); + while (t instanceof Array a_) { + dims.add(a_.getNumElements()); + t = a_.getDataType(); + } + + return dims.size() == 0 ? "" : + "Y" + mangleNum(dims.size()) + + dims.stream() + .map(MSVC7Mangle::mangleNum) + .reduce("", String::concat); + } + + private static String mangleNum(final int n) { + /* Encode a numeric value into mangled form + Basically, values in the range 1-10 are converted to 0-9, and all other + numbers are encoded in hex using A, B, C... as 0, 1, 2..., terminated + by a @. + */ + return 0 < n && n <= 10 ? String.valueOf(n-1) : + Integer.toHexString(n) + .chars() + .mapToObj(c -> (char)c) + .map(c -> '0' <= c && c <= '9' ? c + 17 : + 'a' <= c && c <= 'f' ? c - 22 : '#') + .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) + .toString() + "@"; + } + + private static DataType arrType(final Array a) { + /* Get the scalar type of a (possibly multidimensional) array */ + final DataType t = a.getDataType(); + return t instanceof Array a_ ? arrType(a_) : t; + } + private static String mangleArgs( final FunctionSignature f, final List dict @@ -347,13 +411,99 @@ public class MSVC7Mangle extends GhidraScript{ String mangled = ""; for (int i = 0; i < args.length; i++) mangled += backref(args[i], argDict).orElse(mangleType(args[i], dict)); - return mangled + "@"; + return mangled + (f.hasVarArgs() ? "Z" : "@"); } } - private static String mangleData(final Symbol s) throws Exception { + private String mangleData( + final Data d, + final String name + ) throws Exception { /* Set the data symbol's name to its mangled version */ - throw new Exception("TODO: data symbol \"" + s.getName(true) + "\""); + // String constants + if (StringDataInstance.isString(d)) + return mangleString( + d.getBytes(), + d.getDataType() instanceof TerminatedUnicodeDataType + ); + + // Other data + final ArrayList dict = new ArrayList<>(); + final String ident = mangleIdentifier(name, false, null, dict); + + // vtable + if (ident.startsWith("?_7")) return "?" + ident + "6B@"; + + return "?" + ident + "3" + mangleType(d.getDataType(), dict) + + "A"; + } + + private static String mangleString( + final byte[] s, + final boolean wide + ) { + /* Produce a mangled symbol name for a string */ + // Make copy terminated at first null byte because Ghidra + // sometimes creates strings with trailing nulls + final byte[] bytes = Arrays.copyOf( + s, + IntStream.range(0, s.length) + .filter(i -> s[i] == '\0') + .findFirst() + .orElse(s.length-1) + 1 + ); + + return "??_C@_" + (wide ? "1" : "0") + + mangleNum(bytes.length) + mangleNum(jamcrc(bytes)) + + IntStream.range(0, Math.min(bytes.length, 32)) + .map(i -> Byte.toUnsignedInt(bytes[i])) + .mapToObj(MSVC7Mangle::mangleStrChar) + .reduce("", String::concat) + "@"; + } + + private static String mangleStrChar(final int c) { + /* Mangle a byte from a non-wide string */ + return switch (c) { + case ',' -> "?0"; + case '/' -> "?1"; + case '\\' -> "?2"; + case ':' -> "?3"; + case '.' -> "?4"; + case ' ' -> "?5"; + case '\u0011' -> "?6"; + case '\u0010' -> "?7"; + case '\'' -> "?8"; + case '-' -> "?9"; + default -> + (('A' + 0x80) <= c && c <= ('P' + 0x80)) || + (('a' + 0x80) <= c && c <= ('p' + 0x80)) ? "?" + String.valueOf((char)(c - 0x80)) : + ( '0' <= c && c <= '9' ) || + ( 'A' <= c && c <= 'Z' ) || + ( 'a' <= c && c <= 'z' ) || + c == '_' ? String.valueOf((char) c ) : + "?" + escapeStrChar(c); + }; + } + + private static String escapeStrChar(final int c) { + /* Produce an escaped character for a string literal of the form $XX */ + // Number without 0 padding + final String num = Integer.toHexString(c) + .chars() + .mapToObj(c_ -> (char)c_) + .map(c_ -> '0' <= c_ && c_ <= '9' ? c_ + 17 : + 'a' <= c_ && c_ <= 'f' ? c_ - 22 : '#') + .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) + .toString(); + + return "$" + (num.length() == 1 ? "A" : "") + num; + } + + private static int jamcrc(final byte[] buf) { + /* Calculate a JAMCRC checksum (inverted CRC32) */ + final CRC32 crc = new CRC32(); + crc.update(buf); + return (int)crc.getValue() ^ 0xFFFFFFFF; } private static void makeGlobal(final Symbol s) throws Exception {