Complete mangler script

Now to hook it up to the delinker script.

(I totally accidentally said "demangling" in the last commit)
This commit is contained in:
KeybadeBlox 2026-02-11 00:46:33 -05:00
parent febf9fc172
commit e9b0c3c6bc

View file

@ -1,7 +1,7 @@
// Applies Visual C++ 7.0 name mangling to the symbols within the selected
// address range (or the whole program if nothing is selected).
//
// The implementation is missing a few obscure corners but mostly complete.
// The implementation is missing a few obscure corners but pretty complete.
// Keep in mind that certain qualities that aren't visible to Ghidra, like
// visibility or CV qualifiers, will always be assumed to be their most
// permissive form (public, non-const, etc.).
@ -18,9 +18,11 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionSignature;
import ghidra.program.model.data.Array;
import ghidra.program.model.data.BooleanDataType;
import ghidra.program.model.data.CharDataType;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DefaultDataType;
import ghidra.program.model.data.DoubleDataType;
import ghidra.program.model.data.Enum;
import ghidra.program.model.data.FloatDataType;
@ -32,7 +34,9 @@ import ghidra.program.model.data.ParameterDefinition;
import ghidra.program.model.data.Pointer;
import ghidra.program.model.data.ShortDataType;
import ghidra.program.model.data.SignedCharDataType;
import ghidra.program.model.data.StringDataInstance;
import ghidra.program.model.data.Structure;
import ghidra.program.model.data.TerminatedUnicodeDataType;
import ghidra.program.model.data.TypeDef;
import ghidra.program.model.data.Union;
import ghidra.program.model.data.UnsignedCharDataType;
@ -54,6 +58,8 @@ import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.zip.CRC32;
public class MSVC7Mangle extends GhidraScript{
@ -71,7 +77,7 @@ public class MSVC7Mangle extends GhidraScript{
// Get mangled name
final String mangled = switch (s.getObject()) {
case Function f -> mangleFn (f);
case Data _ -> mangleData(s);
case Data d -> mangleData(d, s.getName(true));
default -> null;
};
@ -84,18 +90,21 @@ public class MSVC7Mangle extends GhidraScript{
}
private String mangleFn(final Function f) throws Exception {
/* Set the function's name to its mangled version */
/* Generate a mangled name for a function */
// Special case for main()
if (f.getName(true).equals("main")) return "_main";
final ArrayList<String> dict = new ArrayList<>();
final List<String> nameParts = Arrays.asList(f.getName(true).split("::"));
Collections.reverse(nameParts);
final String unqualified = nameParts.get(0);
final boolean isMethod = f.getCallingConventionName().equals("__thiscall") &&
nameParts.size() >= 2;
final String name = mangleIdentifier(f.getName(true), isMethod, f.getReturnType(), dict);
// Special methods with unique formats
if (isMethod) {
final String unqualified = nameParts.get(0);
final String clsName = nameParts.get(1);
if (unqualified.equals( clsName)) { // Constructor
@ -120,16 +129,21 @@ public class MSVC7Mangle extends GhidraScript{
terminated by '@', and the whole identifier is terminated by another
'@', e.g. Z@Y@X@@. Previously encountered names are kept in a
dictionary to turn repeated names into backreferences, e.g. X::Y::X
would become X@Y@0@ (if starting with an empty dictionary).
would become X@Y@0@ (if starting with an empty dictionary). Some
special symbols like constructors and operators also get special case
names.
*/
// Break up names into their mangled order
final List<String> parts = Arrays.asList(ident.split("::"));
Collections.reverse(parts);
// Non-method special names
// (definitely some cases missing from special names, but
// they're probably not too likely to encounter in Ghidra)
parts.set(0, switch (parts.get(0)) {
case "operator_new" -> "?2";
case "operator_delete" -> "?3";
case "`vftable'" -> "?_7";
case "operator_new[]" -> "?_U";
case "operator_delete[]" -> "?_V";
default -> parts.get(0);
@ -139,7 +153,6 @@ public class MSVC7Mangle extends GhidraScript{
if (isMethod) {
final String clsName = parts.get(1);
parts.set(0, switch (parts.get(0)) {
// Definitely some cases missing
case "operator_=" -> "?4";
case "operator_>>" -> "?5";
case "operator_<<" -> "?6";
@ -193,6 +206,7 @@ public class MSVC7Mangle extends GhidraScript{
}
// Apply any backreferences and combine together
// (special names don't get a @ terminator)
return parts.stream()
.map(s -> backref(s, dict).orElse(s + (s.charAt(0) == '?' ? "" : "@")))
.reduce("", String::concat) + "@";
@ -219,7 +233,7 @@ public class MSVC7Mangle extends GhidraScript{
/* Produce a string for a function's visibility and linkage */
return switch (f.getCallingConventionName()) {
case "__thiscall" -> isVirtual(f) ? "UA" : "QA"; // "A" for non-const method
default -> isStatic(name) ? "S" : "Y";
default -> isStatic(name) ? "S" : "Y" ;
};
}
@ -289,6 +303,12 @@ public class MSVC7Mangle extends GhidraScript{
/* Mangle a data type in a function name
All types are assumed to have no CV qualifiers.
*/
if (t == null) throw new Exception (
"A data type was reported as null. Ensure that all " +
"data types in the demangled code/data have been " +
"defined."
);
return switch(t) {
case SignedCharDataType _ -> "C";
case UnsignedCharDataType _ -> "E";
@ -313,12 +333,56 @@ public class MSVC7Mangle extends GhidraScript{
case UnsignedLongLongDataType _ -> "_K";
case BooleanDataType _ -> "_N";
case WideCharDataType _ -> "_W";
case Array a -> "PA" + mangleArrDims(a) + mangleType(arrType(a), dict);
case FunctionSignature f -> mangleFnType(f, dict);
case TypeDef d -> mangleType(d.getBaseDataType(), dict);
default -> throw new Exception ("Unknown type \"" + t.getName() + "\"");
case DefaultDataType _ -> throw new Exception ("Encountered data marked \"undefined\". All data types must be defined.");
default -> throw new Exception ("Unknown type \"" + t.getClass().getName() + "\"");
};
}
private static String mangleArrDims(final Array a) {
/* Produce a mangled string describing the dimensions of an array
Format is Y + # of dimensions + dimension 1 + dimension 2 + ...
The outermost dimension decays to a pointer, so it's not included and
1D arrays produce an empty dimension string.
*/
final List<Integer> dims = new ArrayList<>();
DataType t = a.getDataType();
while (t instanceof Array a_) {
dims.add(a_.getNumElements());
t = a_.getDataType();
}
return dims.size() == 0 ? "" :
"Y" + mangleNum(dims.size()) +
dims.stream()
.map(MSVC7Mangle::mangleNum)
.reduce("", String::concat);
}
private static String mangleNum(final int n) {
/* Encode a numeric value into mangled form
Basically, values in the range 1-10 are converted to 0-9, and all other
numbers are encoded in hex using A, B, C... as 0, 1, 2..., terminated
by a @.
*/
return 0 < n && n <= 10 ? String.valueOf(n-1) :
Integer.toHexString(n)
.chars()
.mapToObj(c -> (char)c)
.map(c -> '0' <= c && c <= '9' ? c + 17 :
'a' <= c && c <= 'f' ? c - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString() + "@";
}
private static DataType arrType(final Array a) {
/* Get the scalar type of a (possibly multidimensional) array */
final DataType t = a.getDataType();
return t instanceof Array a_ ? arrType(a_) : t;
}
private static String mangleArgs(
final FunctionSignature f,
final List<String> dict
@ -347,13 +411,99 @@ public class MSVC7Mangle extends GhidraScript{
String mangled = "";
for (int i = 0; i < args.length; i++)
mangled += backref(args[i], argDict).orElse(mangleType(args[i], dict));
return mangled + "@";
return mangled + (f.hasVarArgs() ? "Z" : "@");
}
}
private static String mangleData(final Symbol s) throws Exception {
private String mangleData(
final Data d,
final String name
) throws Exception {
/* Set the data symbol's name to its mangled version */
throw new Exception("TODO: data symbol \"" + s.getName(true) + "\"");
// String constants
if (StringDataInstance.isString(d))
return mangleString(
d.getBytes(),
d.getDataType() instanceof TerminatedUnicodeDataType
);
// Other data
final ArrayList<String> dict = new ArrayList<>();
final String ident = mangleIdentifier(name, false, null, dict);
// vtable
if (ident.startsWith("?_7")) return "?" + ident + "6B@";
return "?" + ident + "3" + mangleType(d.getDataType(), dict) +
"A";
}
private static String mangleString(
final byte[] s,
final boolean wide
) {
/* Produce a mangled symbol name for a string */
// Make copy terminated at first null byte because Ghidra
// sometimes creates strings with trailing nulls
final byte[] bytes = Arrays.copyOf(
s,
IntStream.range(0, s.length)
.filter(i -> s[i] == '\0')
.findFirst()
.orElse(s.length-1) + 1
);
return "??_C@_" + (wide ? "1" : "0") +
mangleNum(bytes.length) + mangleNum(jamcrc(bytes)) +
IntStream.range(0, Math.min(bytes.length, 32))
.map(i -> Byte.toUnsignedInt(bytes[i]))
.mapToObj(MSVC7Mangle::mangleStrChar)
.reduce("", String::concat) + "@";
}
private static String mangleStrChar(final int c) {
/* Mangle a byte from a non-wide string */
return switch (c) {
case ',' -> "?0";
case '/' -> "?1";
case '\\' -> "?2";
case ':' -> "?3";
case '.' -> "?4";
case ' ' -> "?5";
case '\u0011' -> "?6";
case '\u0010' -> "?7";
case '\'' -> "?8";
case '-' -> "?9";
default ->
(('A' + 0x80) <= c && c <= ('P' + 0x80)) ||
(('a' + 0x80) <= c && c <= ('p' + 0x80)) ? "?" + String.valueOf((char)(c - 0x80)) :
( '0' <= c && c <= '9' ) ||
( 'A' <= c && c <= 'Z' ) ||
( 'a' <= c && c <= 'z' ) ||
c == '_' ? String.valueOf((char) c ) :
"?" + escapeStrChar(c);
};
}
private static String escapeStrChar(final int c) {
/* Produce an escaped character for a string literal of the form $XX */
// Number without 0 padding
final String num = Integer.toHexString(c)
.chars()
.mapToObj(c_ -> (char)c_)
.map(c_ -> '0' <= c_ && c_ <= '9' ? c_ + 17 :
'a' <= c_ && c_ <= 'f' ? c_ - 22 : '#')
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString();
return "$" + (num.length() == 1 ? "A" : "") + num;
}
private static int jamcrc(final byte[] buf) {
/* Calculate a JAMCRC checksum (inverted CRC32) */
final CRC32 crc = new CRC32();
crc.update(buf);
return (int)crc.getValue() ^ 0xFFFFFFFF;
}
private static void makeGlobal(final Symbol s) throws Exception {