JSRF-Decompilation/ghidra/ghidra_scripts/MSVC7Mangle.java
KeybadeBlox ccd2cd37a5 Defend against spurious references during mangling
Ghidra sometimes identifies random numbers as pointers, which trips up
our mangler script when it encounters them as they generally have no
type information.  We now use heuristics to ignore such references.
2026-02-13 23:43:35 -05:00

615 lines
22 KiB
Java

// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// The implementation is missing a few obscure corners but pretty complete.
// Keep in mind that certain qualities that aren't visible to Ghidra, like
// visibility or CV qualifiers, will always be assumed to be their most
// permissive form (public, non-const, etc.).
//
// Special symbol names like "operator new" or "scalar deleting destructor"
// are given unique mangling. To properly mangle these, name them as they
// appear in objdiff, replacing spaces with underscores, e.g. "operator_new"
// and "`scalar_deleting_destructor'" (notice the ` and ').
//
// This script can be called in headless mode with the address ranges to mangle
// as arguments, e.g. 0x1234-0x5678. Any symbols referenced by functions being
// mangled will also be mangled in this mode (so that the references are
// correct if the mangling is done in preparation for exporting functions).
//
// @category Symbol
import ghidra.app.script.GhidraScript;
import ghidra.program.flatapi.FlatProgramAPI;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.data.Array;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DefaultDataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
import ghidra.program.model.data.IntegerDataType;
import ghidra.program.model.data.LongDataType;
import ghidra.program.model.data.LongDoubleDataType;
import ghidra.program.model.data.LongLongDataType;
import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TerminatedUnicodeDataType;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Undefined;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
import ghidra.program.model.data.UnsignedIntegerDataType;
import ghidra.program.model.data.UnsignedLongDataType;
import ghidra.program.model.data.UnsignedLongLongDataType;
import ghidra.program.model.data.UnsignedShortDataType;
import ghidra.program.model.data.VoidDataType;
import ghidra.program.model.data.WideCharDataType;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.symbol.Namespace;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolIterator;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.zip.CRC32;
public class MSVC7Mangle extends GhidraScript{
@Override
public void run() throws Exception {
// Get selected ranges from arguments if invoked headless
if (isRunningHeadless()) {
final AddressSet addr = new AddressSet();
Arrays.stream(getScriptArgs()).forEach(arg -> {
final String[] range = arg.split("-");
addr.add(toAddr(range[0]), toAddr(range[1]));
});
setCurrentSelection(addr);
}
final SymbolIterator iter = currentProgram.getSymbolTable()
.getPrimarySymbolIterator(currentSelection, true);
while (iter.hasNext() && !monitor.isCancelled()) {
final Symbol s = iter.next();
mangle(s);
// Also mangle everything referenced inside functions
// if headless
if (
isRunningHeadless() &&
s.getObject() instanceof Function f
) mangleRefs(f);
}
}
private void mangle(final Symbol s) throws Exception {
/* Set the given symbol's name to its mangled version */
// Skip if already mangled; skip jump tables
final String name = s.getName(true);
if (
name.charAt(0) == '?' ||
name.startsWith("switchD_")
) return;
// Get mangled name
final String mangled = switch (s.getObject()) {
case Function f -> mangleFn (f);
case Data d -> mangleData(d, name);
default -> null;
};
// Apply new name
if (mangled != null) {
s.setName(mangled, SourceType.USER_DEFINED);
s.setNamespace(currentProgram.getGlobalNamespace());
if (s.getObject() instanceof Function f) {
// Also apply to target function if f is thunk
final Function thunked = f.getThunkedFunction(true);
if (thunked != null) {
final Symbol ts = thunked.getSymbol();
ts.setName(mangled, SourceType.USER_DEFINED);
ts.setNamespace(currentProgram.getGlobalNamespace());
}
}
}
}
private String mangleFn(final Function f) throws Exception {
/* Generate a mangled name for a function */
final String nameRaw = f.getName(true);
// Special case for main()
if (nameRaw.equals("main")) return "_main";
// Special symbols like intrinsics aren't mangled
if (nameRaw.startsWith("__")) return nameRaw;
final ArrayList<String> dict = new ArrayList<>();
final List<String> nameParts = Arrays.asList(nameRaw.split("::"));
Collections.reverse(nameParts);
final boolean isMethod = f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2;
final String name = mangleIdentifier(nameRaw, isMethod, f.getReturnType(), dict);
// Special methods with unique formats
if (isMethod) {
final String unqualified = nameParts.get(0);
final String clsName = nameParts.get(1);
if (unqualified.equals( clsName)) { // Constructor
return "?" + name + "QAE@" +
mangleArgs(f.getSignature(true), dict, nameRaw + "()") +
"Z";
} else if (unqualified.equals("~" + clsName)) { // Destructor
return "?" + name + (isVirtual(f) ? "UAE" : "QAE") + "@XZ";
}
}
return "?" + name + mangleFnAttrs(f, nameParts) +
mangleFnType(f.getSignature(true), dict, nameRaw + "()");
}
private static String mangleIdentifier(
final String ident,
final boolean isMethod,
final DataType retType, // Function return type, nullable
final List<String> dict
) {
/* Mangle a fully qualified identifier
Identifiers like X::Y::Z are mangled with names in reverse order each
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary). Some
special symbols like constructors and operators also get special case
names.
*/
// Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Non-method special names
// (definitely some cases missing from special names, but
// they're probably not too likely to encounter in Ghidra)
parts.set(0, switch (parts.get(0)) {
case "operator_new" -> "?2";
case "operator_delete" -> "?3";
case "`vftable'" -> "?_7";
case "operator_new[]" -> "?_U";
case "operator_delete[]" -> "?_V";
default -> parts.get(0);
});
// Method special names
if (isMethod) {
final String clsName = parts.get(1);
parts.set(0, switch (parts.get(0)) {
case "operator_=" -> "?4";
case "operator_>>" -> "?5";
case "operator_<<" -> "?6";
case "operator_!" -> "?7";
case "operator_==" -> "?8";
case "operator_!=" -> "?9";
case "operator_[]" -> "?A";
case "operator_->" -> "?C";
case "operator_*" -> "?D";
case "operator_++" -> "?E";
case "operator_--" -> "?F";
case "operator_-" -> "?G";
case "operator_+" -> "?H";
case "operator_&" -> "?I";
case "operator_->*" -> "?J";
case "operator_/" -> "?K";
case "operator_%" -> "?L";
case "operator_<" -> "?M";
case "operator_<=" -> "?N";
case "operator_>" -> "?O";
case "operator_>=" -> "?P";
case "operator_," -> "?Q";
case "operator_()" -> "?R";
case "operator_~" -> "?S";
case "operator_^" -> "?T";
case "operator_|" -> "?U";
case "operator_&&" -> "?V";
case "operator_||" -> "?W";
case "operator_*=" -> "?X";
case "operator_+=" -> "?Y";
case "operator_-=" -> "?Z";
case "operator_/=" -> "?_0";
case "operator_%=" -> "?_1";
case "operator_>>=" -> "?_2";
case "operator_<<=" -> "?_3";
case "operator_&=" -> "?_4";
case "operator_|=" -> "?_5";
case "operator_^=" -> "?_6";
case "`scalar_deleting_destructor'" -> "?_G";
default ->
parts.get(0).equals( clsName) ? "?0" :
parts.get(0).equals("~" + clsName) ? "?1" :
retType != null && // Feeble attempt at user-defined conversions
parts.get(0).equals(
"operator_" +
retType.getName()
.replace(" ", "")
) ? "?B" :
parts.get(0);
});
}
// Apply any backreferences and combine together
// (special names don't get a @ terminator)
return parts.stream()
.map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@")))
.reduce("", String::concat) + "@";
}
private static <T> Optional<String> backref(
final T x,
final List<T> dict
) {
/* Produce a backreference string if x is found in dict */
if (x instanceof String s && s.startsWith("?"))
return Optional.empty(); // No matching special names
else switch (Integer.valueOf(dict.indexOf(x))) {
case -1:
dict.add(x);
return Optional.empty();
case Integer ref:
return Optional.of(ref.toString());
}
}
private String mangleFnAttrs(
final Function f,
final List<String> name
) {
/* Produce a string for a function's visibility and linkage */
return switch (f.getCallingConventionName()) {
case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method
default -> isStatic(name) ? "S" : "Y" ;
};
}
private boolean isVirtual(final Function f) {
/* Determine whether a method is virtual
We essentially check whether any references are from a vtable or a
scalar deleting destructor.
*/
final Reference[] refs = getReferencesTo(f.getEntryPoint());
for (int i = 0; i < refs.length; i++) {
final Data data = getDataContaining (refs[i].getFromAddress());
final Function func = getFunctionContaining(refs[i].getFromAddress());
if (data != null) {
final Symbol s = getSymbolAt(data.getRoot()
.getAddress());
if (s != null) {
final String name = s.getName(false);
if (
name.equals("`vftable'") ||
name.startsWith("??_7")
) return true;
}
} else if (func != null) {
final String name = func.getName(false);
if (
name.equals("`scalar_deleting_destructor'") ||
name.startsWith("??_G")
) return true;
}
}
return false;
}
private static boolean isStatic(final List<String> name) {
/* Determines whether a function is static from its name
Everything is normally assumed non-static, but certain methods are
automatically made static.
*/
return name.size() > 1 && Arrays.asList(
"operator_new" , "operator_new[]",
"operator_delete", "operator_delete[]"
).contains(name.get(0));
}
private static String mangleFnType(
final FunctionSignature f,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle everything in f but its name and visibility/linkage */
return mangleCallC(f) +
mangleType(f.getReturnType(), dict, loc) +
mangleArgs(f, dict, loc) + "Z";
}
private static String mangleCallC(final FunctionSignature f) throws Exception {
/* Produce a string for a function's calling convention */
return switch (f.getCallingConventionName()) {
case "__cdecl" -> "A";
case "__thiscall" -> "E";
case "__fastcall" -> "I";
case "__stdcall" -> "G";
default -> throw new Exception(
f.getName() +
"(): Need to specify calling convention"
);
};
}
private static String mangleType(
final DataType t,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
if (t == null) throw new Exception (
"A data type at " + loc + " was reported as null. " +
"Ensure that all data types in the code/data to " +
"mangle have been defined."
);
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
case CharDataType _ -> "D"; // Must come after its child types
case ShortDataType _ -> "F";
case UnsignedShortDataType _ -> "G";
case IntegerDataType _ -> "H";
case UnsignedIntegerDataType _ -> "I";
case LongDataType _ -> "J";
case UnsignedLongDataType _ -> "K";
case FloatDataType _ -> "M";
case DoubleDataType _ -> "N";
case LongDoubleDataType _ -> "O";
case Pointer p -> "P" +
(p.getDataType() instanceof FunctionSignature ? "6" : "A") +
mangleType(p.getDataType(), dict, loc);
case Union u -> "T" + mangleIdentifier(u.getName(), false, null, dict);
case Structure s -> "U" + mangleIdentifier(s.getName(), false, null, dict);
case Enum e -> "W4" + mangleIdentifier(e.getName(), false, null, dict);
case VoidDataType _ -> "X";
case LongLongDataType _ -> "_J";
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case Array a -> "PA" + mangleArrDims(a) + mangleType(arrType(a), dict, loc);
case FunctionSignature f -> mangleFnType(f, dict, "function typedef \"" + f.getName() + "\"");
case TypeDef d -> mangleType(d.getBaseDataType(), dict, "typedef \"" + d.getName() + "\"");
case DefaultDataType _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined.");
case Undefined _ -> throw new Exception ("Encountered data marked \"undefined\" at " + loc + ". Ensure that all data types in the code/data to mangle have been defined.");
default -> throw new Exception ("Unknown type \"" + t.getClass().getName() + "\" at " + loc);
};
}
private static String mangleArrDims(final Array a) {
/* Produce a mangled string describing the dimensions of an array
Format is Y + # of dimensions + dimension 1 + dimension 2 + ...
The outermost dimension decays to a pointer, so it's not included and
1D arrays produce an empty dimension string.
*/
final List<Integer> dims = new ArrayList<>();
DataType t = a.getDataType();
while (t instanceof Array a_) {
dims.add(a_.getNumElements());
t = a_.getDataType();
}
return dims.size() == 0 ? "" :
"Y" + mangleNum(dims.size()) +
dims.stream()
.map(MSVC7Mangle::mangleNum)
.reduce("", String::concat);
}
private static String mangleNum(final int n) {
/* Encode a numeric value into mangled form
Basically, values in the range 1-10 are converted to 0-9, and all other
numbers are encoded in hex using A, B, C... as 0, 1, 2..., terminated
by a @.
*/
return 0 < n && n <= 10 ? String.valueOf(n-1) :
Integer.toHexString(n)
.chars()
.mapToObj(c -> (char)c)
.map(c -> '0' <= c && c <= '9' ? c + 17 :
'a' <= c && c <= 'f' ? c - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString() + "@";
}
private static DataType arrType(final Array a) {
/* Get the scalar type of a (possibly multidimensional) array */
final DataType t = a.getDataType();
return t instanceof Array a_ ? arrType(a_) : t;
}
private static String mangleArgs(
final FunctionSignature f,
final List<String> dict,
final String loc
) throws Exception {
/* Mangle the arguments for a function */
final DataType[] args = Arrays.stream(f.getArguments())
.map(ParameterDefinition::getDataType)
.toArray(DataType[]::new);
final ArrayList<DataType> argDict = new ArrayList<>();
if (args.length == 0) return "X";
else {
// I try to be more expression-oriented, but not being
// able to throw in lambdas, not having an error sum
// type, and not having applicative functors would
// means that using .stream().map().reduce() would
// require me to write stuff like
// (s1, s2) -> s1.flatMap(s -> s2.map(s + s2))
// (i.e. substituting applicative for monad + functor)
// while also having much worse UX for errors
//
// It turns out that academic-sounding stuff everyone
// freaks out at is actually useful (and Optional still
// helped us out here)
String mangledArgs = "";
for (int i = 0; i < args.length; i++) {
final String mangledArg = mangleType(args[i], dict, loc);
mangledArgs += mangledArg.length() == 1 ?
mangledArg :
backref(args[i], argDict).orElse(mangledArg);
}
return mangledArgs + (f.hasVarArgs() ? "Z" : "@");
}
}
private String mangleData(
final Data d,
final String name
) throws Exception {
/* Set the data symbol's name to its mangled version */
// String constants
if (StringDataInstance.isString(d))
return mangleString(
d.getBytes(),
d.getDataType() instanceof TerminatedUnicodeDataType
);
// Other data
final ArrayList<String> dict = new ArrayList<>();
final String ident = mangleIdentifier(name, false, null, dict);
// vtable
if (ident.startsWith("?_7")) return "?" + ident + "6B@";
return "?" + ident + "3" +
mangleType(d.getDataType(), dict, "0x" + d.getAddress().toString()) +
"A";
}
private static String mangleString(
final byte[] s,
final boolean wide
) {
/* Produce a mangled symbol name for a string */
// Make copy terminated at first null byte because Ghidra
// sometimes creates strings with trailing nulls
final byte[] bytes = Arrays.copyOf(
s,
IntStream.range(0, s.length)
.filter(i -> s[i] == '\0')
.findFirst()
.orElse(s.length-1) + 1
);
return "??_C@_" + (wide ? "1" : "0") +
mangleNum(bytes.length) + mangleNum(jamcrc(bytes)) +
IntStream.range(0, Math.min(bytes.length, 32))
.map(i -> Byte.toUnsignedInt(bytes[i]))
.mapToObj(MSVC7Mangle::mangleStrChar)
.reduce("", String::concat) + "@";
}
private static String mangleStrChar(final int c) {
/* Mangle a byte from a non-wide string */
return switch (c) {
case ',' -> "?0";
case '/' -> "?1";
case '\\' -> "?2";
case ':' -> "?3";
case '.' -> "?4";
case ' ' -> "?5";
case '\u0011' -> "?6";
case '\u0010' -> "?7";
case '\'' -> "?8";
case '-' -> "?9";
default ->
(('A' + 0x80) <= c && c <= ('P' + 0x80)) ||
(('a' + 0x80) <= c && c <= ('p' + 0x80)) ? "?" + String.valueOf((char)(c - 0x80)) :
( '0' <= c && c <= '9' ) ||
( 'A' <= c && c <= 'Z' ) ||
( 'a' <= c && c <= 'z' ) ||
c == '_' ? String.valueOf((char) c ) :
"?" + escapeStrChar(c);
};
}
private static String escapeStrChar(final int c) {
/* Produce an escaped character for a string literal of the form $XX */
// Number without 0 padding
final String num = Integer.toHexString(c)
.chars()
.mapToObj(c_ -> (char)c_)
.map(c_ -> '0' <= c_ && c_ <= '9' ? c_ + 17 :
'a' <= c_ && c_ <= 'f' ? c_ - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString();
return "$" + (num.length() == 1 ? "A" : "") + num;
}
private static int jamcrc(final byte[] buf) {
/* Calculate a JAMCRC checksum (inverted CRC32) */
final CRC32 crc = new CRC32();
crc.update(buf);
return (int)crc.getValue() ^ 0xFFFFFFFF;
}
private void mangleRefs(final Function f) throws Exception {
/* Mangle all symbols referenced in the body of a function */
for (
Instruction ins = getFirstInstruction(f);
ins != null && f.getBody().contains(ins.getAddress());
ins = ins.getNext()
) {
final Reference[] refs = ins.getReferencesFrom();
for (int i = 0; i < refs.length; i++) {
final Symbol symbol = getSymbolAt(refs[i].getToAddress());
// Guard against spurious references to nonexisting things
if (
symbol == null ||
symbol.getObject() == null ||
(
symbol.getObject() instanceof Data d &&
(
d.getBaseDataType() instanceof Undefined ||
d.getBaseDataType() instanceof DefaultDataType
) &&
refs[i].getSource() != SourceType.USER_DEFINED
)
) {
removeReference(refs[i]);
continue;
}
mangle(symbol);
}
}
}
}