JSRF-Decompilation/ghidra/ghidra_scripts/MSVC7Mangle.java
KeybadeBlox 3c4f0e72b8 Miscellaneous Ghidra script tweaks
No behavioural changes.
2026-02-18 18:18:24 -05:00

657 lines
24 KiB
Java

// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// The implementation is missing a few obscure corners but pretty complete.
// Keep in mind that certain qualities that aren't visible to Ghidra, like
// visibility or CV qualifiers, will always be assumed to be their most
// permissive form (public, non-const, etc.).
//
// Special symbol names like "operator new" or "scalar deleting destructor"
// are given unique mangling. To properly mangle these, name them as they
// appear in objdiff, replacing spaces with underscores, e.g. "operator_new"
// and "`scalar_deleting_destructor'" (notice the ` and ').
//
// MSVC also applies minor name mangling to C symbols. This can be enabled for
// a given symbol by placing it in a top-level namespace named extern_"C".
//
// This script can be called in headless mode with the address ranges to mangle
// as arguments, e.g. 0x1234-0x5678. Any symbols referenced by functions being
// mangled will also be mangled in this mode (so that the references are
// correct if the mangling is done in preparation for exporting functions).
//
// @category Symbol
import ghidra.app.script.GhidraScript;
import ghidra.program.flatapi.FlatProgramAPI;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.data.Array;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DefaultDataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
import ghidra.program.model.data.LongLongDataType;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TerminatedUnicodeDataType;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Undefined;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
import ghidra.program.model.data.UnsignedIntegerDataType;
import ghidra.program.model.data.UnsignedLongDataType;
import ghidra.program.model.data.UnsignedLongLongDataType;
import ghidra.program.model.data.UnsignedShortDataType;
import ghidra.program.model.data.VoidDataType;
import ghidra.program.model.data.WideCharDataType;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.symbol.Namespace;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.Symbol;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.zip.CRC32;
public class MSVC7Mangle extends GhidraScript {
@Override
public void run() throws Exception {
// Get selected ranges from arguments if invoked headless
if (isRunningHeadless()) {
final AddressSet addr = new AddressSet();
Arrays.stream(getScriptArgs()).forEach(arg -> {
final String[] range = arg.split("-");
addr.add(toAddr(range[0]), toAddr(range[1]));
});
setCurrentSelection(addr);
}
for (final Symbol s : currentProgram.getSymbolTable()
.getPrimarySymbolIterator(currentSelection, true)) {
mangle(s);
// Also mangle everything referenced inside functions
// if headless
if (
isRunningHeadless() &&
s.getObject() instanceof Function f
) mangleRefs(f);
}
}
private void mangle(final Symbol s) throws Exception {
/* Set the given symbol's name to its mangled version */
// Skip if already mangled; skip jump tables
final String name = s.getName(true);
if (
name.charAt(0) == '?' ||
name.startsWith("switchD_")
) return;
// Get mangled name
final String mangled = switch (s.getObject()) {
case Function f -> mangleFn (f);
case Data d -> mangleData(d, name);
default -> null;
};
// Apply new name
if (mangled != null) {
s.setName(mangled, SourceType.USER_DEFINED);
s.setNamespace(currentProgram.getGlobalNamespace());
if (s.getObject() instanceof Function f) {
// Also apply to target function if f is thunk
final Function thunked = f.getThunkedFunction(true);
if (thunked != null) {
final Symbol ts = thunked.getSymbol();
ts.setName(mangled, SourceType.USER_DEFINED);
ts.setNamespace(currentProgram.getGlobalNamespace());
}
}
}
}
private String mangleFn(final Function f) throws Exception {
/* Generate a mangled name for a function */
final String nameRaw = f.getName(true);
// main() and extern "C" symbols get C name mangling
// (some other things, do, too, but just use extern "C" instead
// of making me find and list them all...)
return nameRaw == "main" ||
nameRaw.startsWith("extern_\"C\"::") ? mangleCFn (f)
: mangleCppFn(f);
}
private static String mangleCFn(final Function f) throws Exception {
/* Produce a C function mangled name
(MSVC does indeed do this despite the folk wisdom that only C++ gets
name mangling; it is certainly simpler than the C++ sort, at least)
*/
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "_" + f.getName(false);
case "__stdcall" -> "_" + f.getName(false) + argSize(f);
case "__fastcall" -> "@" + f.getName(false) + argSize(f);
case "__thiscall" -> throw new Exception(
f.getName() +
"(): __thiscall not allowed for C symbols"
);
default -> throw new Exception(
f.getName() +
"(): Need to specify calling convention"
);
};
}
private static String argSize(final Function f) {
/* Produce the argument size suffix for a C function
The format is "@123" where "123" is however many bytes the arguments
occupy (each argument occupies at least four bytes).
*/
return "@" + Arrays.stream(f.getSignature(true).getArguments())
.map(ParameterDefinition::getDataType)
.map(t -> Math.max(t.getLength(), 4))
.reduce(0, Integer::sum)
.toString();
}
private String mangleCppFn(final Function f) throws Exception {
/* Produce a C++ function mangled name */
final String nameRaw = f.getName(true);
final ArrayList<String> dict = new ArrayList<>();
final List<String> nameParts = Arrays.asList(nameRaw.split("::"));
Collections.reverse(nameParts);
final boolean isMethod = f.getCallingConventionName()
.equals("__thiscall") &&
nameParts.size() >= 2;
final String name = mangleIdentifier(nameRaw, isMethod, f.getReturnType(), dict);
// Special methods with unique formats
if (isMethod) {
final String unqualified = nameParts.get(0);
final String clsName = nameParts.get(1);
if (unqualified.equals( clsName)) { // Constructor
return "?" + name + "QAE@" +
mangleArgs(f.getSignature(true), dict, nameRaw + "()") +
"Z";
} else if (unqualified.equals("~" + clsName)) { // Destructor
return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ";
}
}
return "?" + name + mangleFnAttrs(f, nameParts) +
mangleFnType(f.getSignature(true), dict, nameRaw + "()");
}
private static String mangleIdentifier(
final String ident,
final boolean isMethod,
final DataType retType, // Function return type, nullable
final List<String> dict
) {
/* Mangle a fully qualified identifier
Identifiers like X::Y::Z are mangled with names in reverse order each
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary). Some
special symbols like constructors and operators also get special case
names.
*/
// Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Non-method special names
// (definitely some cases missing from special names, but
// they're probably not too likely to encounter in Ghidra)
parts.set(0, switch (parts.get(0)) {
case "operator_new" -> "?2";
case "operator_delete" -> "?3";
case "`vftable'" -> "?_7";
case "operator_new[]" -> "?_U";
case "operator_delete[]" -> "?_V";
default -> parts.get(0);
});
// Method special names
if (isMethod) {
final String clsName = parts.get(1);
parts.set(0, switch (parts.get(0)) {
case "operator_=" -> "?4";
case "operator_>>" -> "?5";
case "operator_<<" -> "?6";
case "operator_!" -> "?7";
case "operator_==" -> "?8";
case "operator_!=" -> "?9";
case "operator_[]" -> "?A";
case "operator_->" -> "?C";
case "operator_*" -> "?D";
case "operator_++" -> "?E";
case "operator_--" -> "?F";
case "operator_-" -> "?G";
case "operator_+" -> "?H";
case "operator_&" -> "?I";
case "operator_->*" -> "?J";
case "operator_/" -> "?K";
case "operator_%" -> "?L";
case "operator_<" -> "?M";
case "operator_<=" -> "?N";
case "operator_>" -> "?O";
case "operator_>=" -> "?P";
case "operator_," -> "?Q";
case "operator_()" -> "?R";
case "operator_~" -> "?S";
case "operator_^" -> "?T";
case "operator_|" -> "?U";
case "operator_&&" -> "?V";
case "operator_||" -> "?W";
case "operator_*=" -> "?X";
case "operator_+=" -> "?Y";
case "operator_-=" -> "?Z";
case "operator_/=" -> "?_0";
case "operator_%=" -> "?_1";
case "operator_>>=" -> "?_2";
case "operator_<<=" -> "?_3";
case "operator_&=" -> "?_4";
case "operator_|=" -> "?_5";
case "operator_^=" -> "?_6";
case "`scalar_deleting_destructor'" -> "?_G";
default ->
parts.get(0).equals( clsName) ? "?0" :
parts.get(0).equals("~" + clsName) ? "?1" :
retType != null && // Feeble attempt at user-defined conversions
parts.get(0).equals(
"operator_" +
retType.getName()
.replace(" ", "")
) ? "?B" :
parts.get(0);
});
}
// Apply any backreferences and combine together
// (special names don't get a @ terminator)
return parts.stream()
.map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@")))
.reduce("", String::concat) + "@";
}
private static <T> Optional<String> backref(
final T x,
final List<T> dict
) {
/* Produce a backreference string if x is found in dict */
if (x instanceof String s && s.startsWith("?"))
return Optional.empty(); // No matching special names
else switch (Integer.valueOf(dict.indexOf(x))) {
case -1:
dict.add(x);
return Optional.empty();
case Integer ref:
return Optional.of(ref.toString());
}
}
private String mangleFnAttrs(
final Function f,
final List<String> name
) {
/* Produce a string for a function's visibility and linkage */
return switch (f.getCallingConventionName()) {
case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method
default -> isStatic(name) ? "S" : "Y" ;
};
}
private boolean isVirtual(final Function f) {
/* Determine whether a method is virtual
We essentially check whether any references are from a vtable or a
scalar deleting destructor.
*/
final Reference[] refs = getReferencesTo(f.getEntryPoint());
for (final Reference ref : refs) {
final Data data = getDataContaining (ref.getFromAddress());
final Function func = getFunctionContaining(ref.getFromAddress());
if (data != null) {
final Symbol s = getSymbolAt(data.getRoot()
.getAddress());
if (s != null) {
final String name = s.getName(false);
if (
name.equals("`vftable'") ||
name.startsWith("??_7")
) return true;
}
} else if (func != null) {
final String name = func.getName(false);
if (
name.equals("`scalar_deleting_destructor'") ||
name.startsWith("??_G")
) return true;
}
}
return false;
}
private static boolean isStatic(final List<String> name) {
/* Determines whether a function is static from its name
Everything is normally assumed non-static, but certain methods are
automatically made static.
*/
return name.size() > 1 && Arrays.asList(
"operator_new" , "operator_new[]",
"operator_delete", "operator_delete[]"
).contains(name.get(0));
}
private static String mangleFnType(
final FunctionSignature f,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle everything in f but its name and visibility/linkage */
return mangleCallC(f) +
mangleType(f.getReturnType(), dict, loc) +
mangleArgs(f, dict, loc) + "Z";
}
private static String mangleCallC(final FunctionSignature f) throws Exception {
/* Produce a string for a function's calling convention */
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "A";
case "__thiscall" -> "E";
case "__fastcall" -> "I";
case "__stdcall" -> "G";
default -> throw new Exception(
f.getName() +
"(): Need to specify calling convention"
);
};
}
private static String mangleType(
final DataType t,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
if (t == null) throw new Exception (
"A data type at " + loc + " was reported as null. " +
"Ensure that all data types in the code/data to " +
"mangle have been defined."
);
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
case CharDataType _ -> "D"; // Must come after its child types
case ShortDataType _ -> "F";
case UnsignedShortDataType _ -> "G";
case IntegerDataType _ -> "H";
case UnsignedIntegerDataType _ -> "I";
case LongDataType _ -> "J";
case UnsignedLongDataType _ -> "K";
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "P" +
(p.getDataType() instanceof FunctionSignature ? "6" : "A") +
mangleType(p.getDataType(), dict, loc);
case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict);
case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict);
case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict);
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case Array a -> "PA" + mangleArrDims(a) + mangleType(arrType(a), dict, loc);
case FunctionSignature f -> mangleFnType(f, dict, "function typedef \"" + f.getName() + "\"");
case TypeDef d -> mangleType(d.getBaseDataType(), dict, "typedef \"" + d.getName() + "\"");
case DefaultDataType _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined.");
case Undefined _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined.");
default -> throw new Exception ("Unknown type \"" + t.getClass().getName() + "\" at " + loc);
};
}
private static String mangleArrDims(final Array a) {
/* Produce a mangled string describing the dimensions of an array
Format is Y + # of dimensions + dimension 1 + dimension 2 + ...
The outermost dimension decays to a pointer, so it's not included and
1D arrays produce an empty dimension string.
*/
final List<Integer> dims = new ArrayList<>();
DataType t = a.getDataType();
while (t instanceof Array a_) {
dims.add(a_.getNumElements());
t = a_.getDataType();
}
return dims.size() == 0 ? "" :
"Y" + mangleNum(dims.size()) +
dims.stream()
.map(MSVC7Mangle::mangleNum)
.reduce("", String::concat);
}
private static String mangleNum(final int n) {
/* Encode a numeric value into mangled form
Basically, values in the range 1-10 are converted to 0-9, and all other
numbers are encoded in hex using A, B, C... as 0, 1, 2..., terminated
by a @.
*/
return 0 < n && n <= 10 ? String.valueOf(n-1) :
Integer.toHexString(n)
.chars()
.mapToObj(c -> (char)c)
.map(c -> '0' <= c && c <= '9' ? c + 17 :
'a' <= c && c <= 'f' ? c - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString() + "@";
}
private static DataType arrType(final Array a) {
/* Get the scalar type of a (possibly multidimensional) array */
final DataType t = a.getDataType();
return t instanceof Array a_ ? arrType(a_) : t;
}
private static String mangleArgs(
final FunctionSignature f,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle the arguments for a function */
final DataType[] args = Arrays.stream(f.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
final ArrayList<DataType> argDict = new ArrayList<>();
if (args.length == 0) return "X";
else {
// I try to be more expression-oriented, but not being
// able to throw in lambdas, not having an error sum
// type, and not having applicative functors would
// means that using .stream().map().reduce() would
// require me to write stuff like
// (s1, s2) -> s1.flatMap(s -> s2.map(s + s2))
// (i.e. substituting applicative for monad + functor)
// while also having much worse UX for errors
//
// It turns out that academic-sounding stuff everyone
// freaks out at is actually useful (and Optional still
// helped us out here)
String mangledArgs = "";
for (final DataType arg : args) {
final String mangledArg = mangleType(arg, dict, loc);
mangledArgs += mangledArg.length() == 1 ?
mangledArg :
backref(arg, argDict).orElse(mangledArg);
}
return mangledArgs + (f.hasVarArgs() ? "Z" : "@");
}
}
private String mangleData(
final Data d,
final String name
) throws Exception {
/* Set the data symbol's name to its mangled version */
// String constants
if (StringDataInstance.isString(d))
return mangleString(
d.getBytes(),
d.getDataType() instanceof TerminatedUnicodeDataType
);
// Other data
if (name.startsWith("extern_\"C\"::")) {
final String[] nameParts = name.split("::");
return "_" + nameParts[nameParts.length - 1];
}
final ArrayList<String> dict = new ArrayList<>();
final String ident = mangleIdentifier(name, false, null, dict);
// vtable
if (ident.startsWith("?_7")) return "?" + ident + "6B@";
return "?" + ident + "3" +
mangleType(d.getDataType(), dict, "0x" + d.getAddress().toString()) +
"A";
}
private static String mangleString(
final byte[] s,
final boolean wide
) {
/* Produce a mangled symbol name for a string */
// Make copy terminated at first null byte because Ghidra
// sometimes creates strings with trailing nulls
final byte[] bytes = Arrays.copyOf(
s,
IntStream.range(0, s.length)
.filter(i -> s[i] == '\0')
.findFirst()
.orElse(s.length-1) + 1
);
return "??_C@_" + (wide ? "1" : "0") +
mangleNum(bytes.length) + mangleNum(jamcrc(bytes)) +
IntStream.range(0, Math.min(bytes.length, 32))
.map(i -> Byte.toUnsignedInt(bytes[i]))
.mapToObj(MSVC7Mangle::mangleStrChar)
.reduce("", String::concat) + "@";
}
private static String mangleStrChar(final int c) {
/* Mangle a byte from a non-wide string */
return switch (c) {
case ',' -> "?0";
case '/' -> "?1";
case '\\' -> "?2";
case ':' -> "?3";
case '.' -> "?4";
case ' ' -> "?5";
case '\u0011' -> "?6";
case '\u0010' -> "?7";
case '\'' -> "?8";
case '-' -> "?9";
default ->
(('A' + 0x80) <= c && c <= ('P' + 0x80)) ||
(('a' + 0x80) <= c && c <= ('p' + 0x80)) ? "?" + String.valueOf((char)(c - 0x80)) :
( '0' <= c && c <= '9' ) ||
( 'A' <= c && c <= 'Z' ) ||
( 'a' <= c && c <= 'z' ) ||
c == '_' ? String.valueOf((char) c ) :
"?" + escapeStrChar(c);
};
}
private static String escapeStrChar(final int c) {
/* Produce an escaped character for a string literal of the form $XX */
// Number without 0 padding
final String num = Integer.toHexString(c)
.chars()
.mapToObj(c_ -> (char)c_)
.map(c_ -> '0' <= c_ && c_ <= '9' ? c_ + 17 :
'a' <= c_ && c_ <= 'f' ? c_ - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString();
return "$" + (num.length() == 1 ? "A" : "") + num;
}
private static int jamcrc(final byte[] buf) {
/* Calculate a JAMCRC checksum (inverted CRC32) */
final CRC32 crc = new CRC32();
crc.update(buf);
return (int)crc.getValue() ^ 0xFFFFFFFF;
}
private void mangleRefs(final Function f) throws Exception {
/* Mangle all symbols referenced in the body of a function */
for (
Instruction ins = getFirstInstruction(f);
ins != null && f.getBody().contains(ins.getAddress());
ins = ins.getNext()
) {
final Reference[] refs = ins.getReferencesFrom();
for (final Reference ref : refs) {
final Symbol symbol = getSymbolAt(ref.getToAddress());
// Guard against spurious references to nonexisting things
if (
symbol == null ||
symbol.getObject() == null ||
(
symbol.getObject() instanceof Data d &&
(
d.getBaseDataType() instanceof Undefined ||
d.getBaseDataType() instanceof DefaultDataType
) &&
ref.getSource() != SourceType.USER_DEFINED
)
) {
removeReference(ref);
continue;
}
mangle(symbol);
}
}
}
}