// Applies Visual C++ 7.0 name mangling to the symbols within the selected // address range (or the whole program if nothing is selected). // // The implementation is missing a few obscure corners but pretty complete. // Keep in mind that certain qualities that aren't visible to Ghidra, like // visibility or CV qualifiers, will always be assumed to be their most // permissive form (public, non-const, etc.). // // Special symbol names like "operator new" or "scalar deleting destructor" // are given unique mangling. To properly mangle these, name them as they // appear in objdiff, replacing spaces with underscores, e.g. "operator_new" // and "`scalar_deleting_destructor'" (notice the ` and '). // // This script can be called in headless mode with the address ranges to mangle // as arguments, e.g. 0x1234-0x5678. Any symbols referenced by functions being // mangled will also be mangled in this mode (so that the references are // correct if the mangling is done in preparation for exporting functions). // // @category Symbol import ghidra.app.script.GhidraScript; import ghidra.program.flatapi.FlatProgramAPI; import ghidra.program.model.address.Address; import ghidra.program.model.address.AddressSet; import ghidra.program.model.data.Array; import ghidra.program.model.data.BooleanDataType; import ghidra.program.model.data.CharDataType; import ghidra.program.model.data.DataType; import ghidra.program.model.data.DefaultDataType; import ghidra.program.model.data.DoubleDataType; import ghidra.program.model.data.Enum; import ghidra.program.model.data.FloatDataType; import ghidra.program.model.data.IntegerDataType; import ghidra.program.model.data.LongDataType; import ghidra.program.model.data.LongDoubleDataType; import ghidra.program.model.data.LongLongDataType; import ghidra.program.model.data.ParameterDefinition; import ghidra.program.model.data.Pointer; import ghidra.program.model.data.ShortDataType; import ghidra.program.model.data.SignedCharDataType; import ghidra.program.model.data.StringDataInstance; import ghidra.program.model.data.Structure; import ghidra.program.model.data.TerminatedUnicodeDataType; import ghidra.program.model.data.TypeDef; import ghidra.program.model.data.Undefined; import ghidra.program.model.data.Union; import ghidra.program.model.data.UnsignedCharDataType; import ghidra.program.model.data.UnsignedIntegerDataType; import ghidra.program.model.data.UnsignedLongDataType; import ghidra.program.model.data.UnsignedLongLongDataType; import ghidra.program.model.data.UnsignedShortDataType; import ghidra.program.model.data.VoidDataType; import ghidra.program.model.data.WideCharDataType; import ghidra.program.model.listing.Data; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.FunctionSignature; import ghidra.program.model.listing.Instruction; import ghidra.program.model.symbol.Namespace; import ghidra.program.model.symbol.Reference; import ghidra.program.model.symbol.SourceType; import ghidra.program.model.symbol.Symbol; import ghidra.program.model.symbol.SymbolIterator; import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.zip.CRC32; public class MSVC7Mangle extends GhidraScript{ @Override public void run() throws Exception { // Get selected ranges from arguments if invoked headless if (isRunningHeadless()) { final AddressSet addr = new AddressSet(); Arrays.stream(getScriptArgs()).forEach(arg -> { final String[] range = arg.split("-"); addr.add(toAddr(range[0]), toAddr(range[1])); }); setCurrentSelection(addr); } final SymbolIterator iter = currentProgram.getSymbolTable() .getPrimarySymbolIterator(currentSelection, true); while (iter.hasNext() && !monitor.isCancelled()) { final Symbol s = iter.next(); mangle(s); if ( isRunningHeadless() && s.getObject() instanceof Function f ) { // Also mangle everything referenced inside f for ( Instruction ins = getFirstInstruction(f); ins != null && f.getBody().contains(ins.getAddress()); ins = ins.getNext() ) { final Reference[] refs = ins.getReferencesFrom(); for (int i = 0; i < refs.length; i++) { final Symbol symbol = getSymbolAt(refs[i].getToAddress()); if ( // Guard against spurious references to nonexisting things symbol != null && symbol.getObject() != null ) mangle(symbol); } } } } } private void mangle(final Symbol s) throws Exception { /* Set the given symbol's name to its mangled version */ // Skip if already mangled; skip jump tables final String name = s.getName(true); if ( name.charAt(0) == '?' || name.startsWith("switchD_") ) return; // Get mangled name final String mangled = switch (s.getObject()) { case Function f -> mangleFn (f); case Data d -> mangleData(d, name); default -> null; }; // Apply new name if (mangled != null) { s.setName(mangled, SourceType.USER_DEFINED); makeGlobal(s); if (s.getObject() instanceof Function f) { // Also apply to target function if f is thunk final Function thunked = f.getThunkedFunction(true); if (thunked != null) { final Symbol ts = thunked.getSymbol(); ts.setName(mangled, SourceType.USER_DEFINED); makeGlobal(ts); } } } } private String mangleFn(final Function f) throws Exception { /* Generate a mangled name for a function */ final String nameRaw = f.getName(true); // Special case for main() if (nameRaw.equals("main")) return "_main"; // Special symbols like intrinsics aren't mangled if (nameRaw.startsWith("__")) return nameRaw; final ArrayList dict = new ArrayList<>(); final List nameParts = Arrays.asList(nameRaw.split("::")); Collections.reverse(nameParts); final boolean isMethod = f.getCallingConventionName().equals("__thiscall") && nameParts.size() >= 2; final String name = mangleIdentifier(nameRaw, isMethod, f.getReturnType(), dict); // Special methods with unique formats if (isMethod) { final String unqualified = nameParts.get(0); final String clsName = nameParts.get(1); if (unqualified.equals( clsName)) { // Constructor return "?" + name + "QAE@" + mangleArgs(f.getSignature(true), dict, nameRaw + "()") + "Z"; } else if (unqualified.equals("~" + clsName)) { // Destructor return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ"; } } return "?" + name + mangleFnAttrs(f, nameParts) + mangleFnType(f.getSignature(true), dict, nameRaw + "()"); } private static String mangleIdentifier( final String ident, final boolean isMethod, final DataType retType, // Function return type, nullable final List dict ) { /* Mangle a fully qualified identifier Identifiers like X::Y::Z are mangled with names in reverse order each terminated by '@', and the whole identifier is terminated by another '@', e.g. Z@Y@X@@. Previously encountered names are kept in a dictionary to turn repeated names into backreferences, e.g. X::Y::X would become X@Y@0@ (if starting with an empty dictionary). Some special symbols like constructors and operators also get special case names. */ // Break up names into their mangled order final List parts = Arrays.asList(ident.split("::")); Collections.reverse(parts); // Non-method special names // (definitely some cases missing from special names, but // they're probably not too likely to encounter in Ghidra) parts.set(0, switch (parts.get(0)) { case "operator_new" -> "?2"; case "operator_delete" -> "?3"; case "`vftable'" -> "?_7"; case "operator_new[]" -> "?_U"; case "operator_delete[]" -> "?_V"; default -> parts.get(0); }); // Method special names if (isMethod) { final String clsName = parts.get(1); parts.set(0, switch (parts.get(0)) { case "operator_=" -> "?4"; case "operator_>>" -> "?5"; case "operator_<<" -> "?6"; case "operator_!" -> "?7"; case "operator_==" -> "?8"; case "operator_!=" -> "?9"; case "operator_[]" -> "?A"; case "operator_->" -> "?C"; case "operator_*" -> "?D"; case "operator_++" -> "?E"; case "operator_--" -> "?F"; case "operator_-" -> "?G"; case "operator_+" -> "?H"; case "operator_&" -> "?I"; case "operator_->*" -> "?J"; case "operator_/" -> "?K"; case "operator_%" -> "?L"; case "operator_<" -> "?M"; case "operator_<=" -> "?N"; case "operator_>" -> "?O"; case "operator_>=" -> "?P"; case "operator_," -> "?Q"; case "operator_()" -> "?R"; case "operator_~" -> "?S"; case "operator_^" -> "?T"; case "operator_|" -> "?U"; case "operator_&&" -> "?V"; case "operator_||" -> "?W"; case "operator_*=" -> "?X"; case "operator_+=" -> "?Y"; case "operator_-=" -> "?Z"; case "operator_/=" -> "?_0"; case "operator_%=" -> "?_1"; case "operator_>>=" -> "?_2"; case "operator_<<=" -> "?_3"; case "operator_&=" -> "?_4"; case "operator_|=" -> "?_5"; case "operator_^=" -> "?_6"; case "`scalar_deleting_destructor'" -> "?_G"; default -> parts.get(0).equals( clsName) ? "?0" : parts.get(0).equals("~" + clsName) ? "?1" : retType != null && // Feeble attempt at user-defined conversions parts.get(0).equals( "operator_" + retType.getName() .replace(" ", "") ) ? "?B" : parts.get(0); }); } // Apply any backreferences and combine together // (special names don't get a @ terminator) return parts.stream() .map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@"))) .reduce("", String::concat) + "@"; } private static Optional backref( final T x, final List dict ) { /* Produce a backreference string if x is found in dict */ if (x instanceof String s && s.startsWith("?")) return Optional.empty(); // No matching special names else switch (Integer.valueOf(dict.indexOf(x))) { case -1: dict.add(x); return Optional.empty(); case Integer ref: return Optional.of(ref.toString()); } } private String mangleFnAttrs( final Function f, final List name ) { /* Produce a string for a function's visibility and linkage */ return switch (f.getCallingConventionName()) { case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method default -> isStatic(name) ? "S" : "Y" ; }; } private boolean isVirtual(final Function f) { /* Determine whether a method is virtual We essentially check whether any references are from a vtable or a scalar deleting destructor. */ final Reference[] refs = getReferencesTo(f.getEntryPoint()); for (int i = 0; i < refs.length; i++) { final Data data = getDataContaining (refs[i].getFromAddress()); final Function func = getFunctionContaining(refs[i].getFromAddress()); if (data != null) { final Symbol s = getSymbolAt(data.getRoot() .getAddress()); if (s != null) { final String name = s.getName(false); if ( name.equals("`vftable'") || name.startsWith("??_7") ) return true; } } else if (func != null) { final String name = func.getName(false); if ( name.equals("`scalar_deleting_destructor'") || name.startsWith("??_G") ) return true; } } return false; } private static boolean isStatic(final List name) { /* Determines whether a function is static from its name Everything is normally assumed non-static, but certain methods are automatically made static. */ return name.size() > 1 && Arrays.asList( "operator_new" , "operator_new[]", "operator_delete", "operator_delete[]" ).contains(name.get(0)); } private static String mangleFnType( final FunctionSignature f, final List dict, final String loc ) throws Exception { /* Mangle everything in f but its name and visibility/linkage */ return mangleCallC(f) + mangleType(f.getReturnType(), dict, loc) + mangleArgs(f, dict, loc) + "Z"; } private static String mangleCallC(final FunctionSignature f) throws Exception { /* Produce a string for a function's calling convention */ return switch (f.getCallingConventionName()) { case "__cdecl" -> "A"; case "__thiscall" -> "E"; case "__fastcall" -> "I"; case "__stdcall" -> "G"; default -> throw new Exception( f.getName() + "(): Need to specify calling convention" ); }; } private static String mangleType( final DataType t, final List dict, final String loc ) throws Exception { /* Mangle a data type in a function name All types are assumed to have no CV qualifiers. */ if (t == null) throw new Exception ( "A data type at " + loc + " was reported as null. " + "Ensure that all data types in the code/data to " + "mangle have been defined." ); return switch(t) { case SignedCharDataType _ -> "C"; case UnsignedCharDataType _ -> "E"; case CharDataType _ -> "D"; // Must come after its child types case ShortDataType _ -> "F"; case UnsignedShortDataType _ -> "G"; case IntegerDataType _ -> "H"; case UnsignedIntegerDataType _ -> "I"; case LongDataType _ -> "J"; case UnsignedLongDataType _ -> "K"; case FloatDataType _ -> "M"; case DoubleDataType _ -> "N"; case LongDoubleDataType _ -> "O"; case Pointer p -> "P" + (p.getDataType() instanceof FunctionSignature ? "6" : "A") + mangleType(p.getDataType(), dict, loc); case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict); case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict); case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict); case VoidDataType _ -> "X"; case LongLongDataType _ -> "_J"; case UnsignedLongLongDataType _ -> "_K"; case BooleanDataType _ -> "_N"; case WideCharDataType _ -> "_W"; case Array a -> "PA" + mangleArrDims(a) + mangleType(arrType(a), dict, loc); case FunctionSignature f -> mangleFnType(f, dict, "function typedef \"" + f.getName() + "\""); case TypeDef d -> mangleType(d.getBaseDataType(), dict, "typedef \"" + d.getName() + "\""); case DefaultDataType _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined."); case Undefined _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined."); default -> throw new Exception ("Unknown type \"" + t.getClass().getName() + "\" at " + loc); }; } private static String mangleArrDims(final Array a) { /* Produce a mangled string describing the dimensions of an array Format is Y + # of dimensions + dimension 1 + dimension 2 + ... The outermost dimension decays to a pointer, so it's not included and 1D arrays produce an empty dimension string. */ final List dims = new ArrayList<>(); DataType t = a.getDataType(); while (t instanceof Array a_) { dims.add(a_.getNumElements()); t = a_.getDataType(); } return dims.size() == 0 ? "" : "Y" + mangleNum(dims.size()) + dims.stream() .map(MSVC7Mangle::mangleNum) .reduce("", String::concat); } private static String mangleNum(final int n) { /* Encode a numeric value into mangled form Basically, values in the range 1-10 are converted to 0-9, and all other numbers are encoded in hex using A, B, C... as 0, 1, 2..., terminated by a @. */ return 0 < n && n <= 10 ? String.valueOf(n-1) : Integer.toHexString(n) .chars() .mapToObj(c -> (char)c) .map(c -> '0' <= c && c <= '9' ? c + 17 : 'a' <= c && c <= 'f' ? c - 22 : '#') .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) .toString() + "@"; } private static DataType arrType(final Array a) { /* Get the scalar type of a (possibly multidimensional) array */ final DataType t = a.getDataType(); return t instanceof Array a_ ? arrType(a_) : t; } private static String mangleArgs( final FunctionSignature f, final List dict, final String loc ) throws Exception { /* Mangle the arguments for a function */ final DataType[] args = Arrays.stream(f.getArguments()) .map(ParameterDefinition::getDataType) .toArray(DataType[]::new); final ArrayList argDict = new ArrayList<>(); if (args.length == 0) return "X"; else { // I try to be more expression-oriented, but not being // able to throw in lambdas, not having an error sum // type, and not having applicative functors would // means that using .stream().map().reduce() would // require me to write stuff like // (s1, s2) -> s1.flatMap(s -> s2.map(s + s2)) // (i.e. substituting applicative for monad + functor) // while also having much worse UX for errors // // It turns out that academic-sounding stuff everyone // freaks out at is actually useful (and Optional still // helped us out here) String mangledArgs = ""; for (int i = 0; i < args.length; i++) { final String mangledArg = mangleType(args[i], dict, loc); mangledArgs += mangledArg.length() == 1 ? mangledArg : backref(args[i], argDict).orElse(mangledArg); } return mangledArgs + (f.hasVarArgs() ? "Z" : "@"); } } private String mangleData( final Data d, final String name ) throws Exception { /* Set the data symbol's name to its mangled version */ // String constants if (StringDataInstance.isString(d)) return mangleString( d.getBytes(), d.getDataType() instanceof TerminatedUnicodeDataType ); // Other data final ArrayList dict = new ArrayList<>(); final String ident = mangleIdentifier(name, false, null, dict); // vtable if (ident.startsWith("?_7")) return "?" + ident + "6B@"; return "?" + ident + "3" + mangleType(d.getDataType(), dict, "0x" + d.getAddress().toString()) + "A"; } private static String mangleString( final byte[] s, final boolean wide ) { /* Produce a mangled symbol name for a string */ // Make copy terminated at first null byte because Ghidra // sometimes creates strings with trailing nulls final byte[] bytes = Arrays.copyOf( s, IntStream.range(0, s.length) .filter(i -> s[i] == '\0') .findFirst() .orElse(s.length-1) + 1 ); return "??_C@_" + (wide ? "1" : "0") + mangleNum(bytes.length) + mangleNum(jamcrc(bytes)) + IntStream.range(0, Math.min(bytes.length, 32)) .map(i -> Byte.toUnsignedInt(bytes[i])) .mapToObj(MSVC7Mangle::mangleStrChar) .reduce("", String::concat) + "@"; } private static String mangleStrChar(final int c) { /* Mangle a byte from a non-wide string */ return switch (c) { case ',' -> "?0"; case '/' -> "?1"; case '\\' -> "?2"; case ':' -> "?3"; case '.' -> "?4"; case ' ' -> "?5"; case '\u0011' -> "?6"; case '\u0010' -> "?7"; case '\'' -> "?8"; case '-' -> "?9"; default -> (('A' + 0x80) <= c && c <= ('P' + 0x80)) || (('a' + 0x80) <= c && c <= ('p' + 0x80)) ? "?" + String.valueOf((char)(c - 0x80)) : ( '0' <= c && c <= '9' ) || ( 'A' <= c && c <= 'Z' ) || ( 'a' <= c && c <= 'z' ) || c == '_' ? String.valueOf((char) c ) : "?" + escapeStrChar(c); }; } private static String escapeStrChar(final int c) { /* Produce an escaped character for a string literal of the form $XX */ // Number without 0 padding final String num = Integer.toHexString(c) .chars() .mapToObj(c_ -> (char)c_) .map(c_ -> '0' <= c_ && c_ <= '9' ? c_ + 17 : 'a' <= c_ && c_ <= 'f' ? c_ - 22 : '#') .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) .toString(); return "$" + (num.length() == 1 ? "A" : "") + num; } private static int jamcrc(final byte[] buf) { /* Calculate a JAMCRC checksum (inverted CRC32) */ final CRC32 crc = new CRC32(); crc.update(buf); return (int)crc.getValue() ^ 0xFFFFFFFF; } private static void makeGlobal(final Symbol s) throws Exception { /* Move into the global namespace */ // I cannot for the life of me find a more convenient way of // doing this while (!s.isGlobal()) s.setNamespace(s.getParentNamespace() .getParentNamespace()); } }