Skip to content

Instantly share code, notes, and snippets.

@CynicRus
Created April 14, 2025 20:34
Show Gist options
  • Select an option

  • Save CynicRus/155a3499cf8ed188a71a32bb542558f4 to your computer and use it in GitHub Desktop.

Select an option

Save CynicRus/155a3499cf8ed188a71a32bb542558f4 to your computer and use it in GitHub Desktop.
watcom_rtti_parser
# Скрипт для IDA => 9, для разбора RTTI от Watcom C++ какой-то версии 1999 года
# Воссоздает структуру классов
# Автор: CynicRus, 2025 год.
import idautils
import idaapi
import idc
import ida_bytes
import ida_name
import ida_funcs
import ida_typeinf
import ida_segment
# Функция проверки, является ли адрес допустимым кодом
def is_valid_code_ea(ea):
try:
return ida_funcs.get_func(ea) is not None or idc.is_code(idc.get_full_flags(ea))
except Exception:
return False
# Функция извлечения строки из памяти по адресу
def get_string(ea):
result = ""
try:
while True:
byte = ida_bytes.get_byte(ea)
if byte == 0:
break
if byte < 0x20 or byte > 0x7F:
return None
result += chr(byte)
ea += 1
except Exception:
return None
return result
# Функция создания новой структуры (или объединения) с уникальным именем
def create_struc(name, is_union=False):
print(f"Создание структуры: {name}")
base_name = name
suffix = 0
while True:
tif = ida_typeinf.tinfo_t()
if not tif.get_named_type(ida_typeinf.get_idati(), name):
try:
udt_data = ida_typeinf.udt_type_data_t()
tif.create_udt(udt_data, ida_typeinf.BTF_STRUCT if not is_union else ida_typeinf.BTF_UNION)
tif.set_named_type(ida_typeinf.get_idati(), name, 0)
print(f"Структура {name} создана")
return tif
except Exception as e:
print(f"Ошибка при создании {name}: {str(e)}")
return None
suffix += 1
name = f"{base_name}_{suffix}"
# Функция проверки наличия поля в структуре по имени
def get_member_by_name(tif, name):
if not tif.is_udt():
return False
udm = ida_typeinf.udm_t()
udm.name = name
idx = tif.find_udm(udm, ida_typeinf.STRMEM_NAME)
return idx != -1
# Функция добавления поля в структуру
def add_struc_member(tif, name, offset, type_name, comment=None, size=4):
try:
if get_member_by_name(tif, name):
print(f"Поле {name} уже существует в структуре, пропуск")
return True
field_tif = ida_typeinf.tinfo_t()
if type_name == "dword":
field_tif.create_simple_type(ida_typeinf.BTF_UINT32)
elif type_name == "float":
field_tif.create_simple_type(ida_typeinf.BTF_FLOAT)
elif type_name == "int":
field_tif.create_simple_type(ida_typeinf.BTF_INT)
elif type_name == "ptr":
field_tif.create_ptr(ida_typeinf.tinfo_t(ida_typeinf.BTF_VOID))
elif type_name == "string":
field_tif.create_array(ida_typeinf.tinfo_t(ida_typeinf.BTF_CHAR), size)
else:
print(f"Неизвестный тип {type_name} для поля {name}")
return False
nbytes = size if type_name == "string" else 4
udm = ida_typeinf.udm_t()
udm.name = name
udm.type = field_tif
udm.offset = offset * 8
udm.size = nbytes * 8
result = tif.add_udm(udm, 0, 1, -1)
if result != ida_typeinf.TERR_OK:
print(f"Ошибка при добавлении поля {name}: код {result}")
return False
if comment:
print(f"Комментарий для поля {name}: {comment}")
print(f"Добавлено поле {name} на смещение {hex(offset)}")
return True
except Exception as e:
print(f"Ошибка при добавлении поля {name}: {str(e)}")
return False
# Функция проверки, является ли структура простой (без сложной инициализации)
def is_simple_structure(size, ctor_ea, dtor_ea):
if size <= 0x20 and size % 4 == 0:
if not (ctor_ea and dtor_ea and is_valid_code_ea(ctor_ea) and is_valid_code_ea(dtor_ea)):
return True
for ea in [ctor_ea, dtor_ea]:
if not ea:
continue
end_ea = idc.get_func_attr(ea, idc.FUNCATTR_END)
if end_ea == idc.BADADDR:
continue
curr_ea = ea
while curr_ea < end_ea:
inst = idc.generate_disasm_line(curr_ea, 0)
if 'mov' in inst and 'offset' in inst:
return False
curr_ea = idc.next_head(curr_ea)
if curr_ea == idc.BADADDR:
break
return True
return False
# Функция поиска RTTI структур в указанном сегменте
def find_rtti_structures(seg_start, seg_end):
rtti_structs = []
ea = seg_start
while ea <= seg_end - 0x28:
try:
if (ida_bytes.get_byte(ea) == 0 and
ida_bytes.get_byte(ea + 1) == 1 and
ida_bytes.get_byte(ea + 2) == 2 and
ida_bytes.get_byte(ea + 3) == 0):
name_ptr = ida_bytes.get_dword(ea + 0x14)
class_name = get_string(name_ptr) if name_ptr else None
if class_name and class_name.startswith('$') and class_name.endswith('$$'):
ctor_ea = ida_bytes.get_dword(ea + 4)
copy_ctor_ea = ida_bytes.get_dword(ea + 8)
dtor_ea = ida_bytes.get_dword(ea + 0xC)
# Читаем размер как два байта
size_bytes = ida_bytes.get_bytes(ea + 0x10, 2)
size = int.from_bytes(size_bytes, byteorder='little')
if size <= 0 or size > 0x10000: # Ограничение до 65536 байт
print(f"Пропущен RTTI на {hex(ea)}: нереальный размер {hex(size)}")
ea += 4
continue
inheritance_flag = ida_bytes.get_byte(ea + 0x11)
has_virtual_functions = inheritance_flag != 0
vtable_ea = None
for xref in idautils.XrefsTo(ea):
candidate_ea = xref.frm
if seg_start <= candidate_ea <= seg_end:
ptr = ida_bytes.get_dword(candidate_ea)
if ptr == ea or is_valid_code_ea(ptr):
vtable_ea = candidate_ea
has_virtual_functions = True
break
rtti_structs.append({
'ea': ea,
'name': class_name,
'name_ptr': name_ptr,
'ctor': ctor_ea if is_valid_code_ea(ctor_ea) else None,
'copy_ctor': copy_ctor_ea if is_valid_code_ea(copy_ctor_ea) else None,
'dtor': dtor_ea if is_valid_code_ea(dtor_ea) else None,
'size': size,
'base_class_ea': None,
'vtable_ea': vtable_ea,
'has_virtual_functions': has_virtual_functions
})
print(f"RTTI по {hex(ea)}: {class_name}, размер {hex(size)}, vtable: {hex(vtable_ea) if vtable_ea else 'None'}, виртуальные функции: {has_virtual_functions}")
except Exception as e:
print(f"Ошибка при анализе RTTI на {hex(ea)}: {str(e)}")
ea += 4
return rtti_structs
# Функция анализа конструктора для поиска vtable и полей
def analyze_constructor(ctor_ea, is_copy_ctor=False, is_dtor=False, rtti_size=None):
vtable_ea = None
fields = {}
if not ctor_ea or not is_valid_code_ea(ctor_ea):
return vtable_ea, fields
ea = ctor_ea
end_ea = idc.get_func_attr(ctor_ea, idc.FUNCATTR_END)
if end_ea == idc.BADADDR:
end_ea = ea + 0x500
vector_context = False
string_context = False
max_offset = 0
while ea < end_ea:
inst = idc.generate_disasm_line(ea, 0)
if not inst:
ea = idc.next_head(ea)
continue
print(f"Инструкция на {hex(ea)}: {inst}")
if 'lea' in inst and any(f'[{reg}+' in inst for reg in ['eax', 'ebx', 'ecx', 'edx']):
vector_context = True
if 'mov' in inst and 'byte ptr' in inst and 'offset' in inst:
string_context = True
if 'mov' in inst and any(f'[{reg}' in inst for reg in ['eax', 'ebx', 'ecx', 'edx']):
try:
for reg in ['eax', 'ebx', 'ecx', 'edx']:
if f'[{reg}' in inst:
offset_str = None
if f'[{reg}+' in inst:
offset_str = inst.split(f'[{reg}+')[1].split(']')[0]
elif f'[{reg}-' in inst:
offset_str = '-' + inst.split(f'[{reg}-')[1].split(']')[0]
elif f'[{reg}]' in inst:
offset_str = '0'
if offset_str and offset_str != 'ds':
offset = int(offset_str, 16) if 'h' in offset_str.lower() else int(offset_str)
if offset < 0 or (rtti_size and offset >= rtti_size):
continue
max_offset = max(max_offset, offset)
value = inst.split(', ')[1].strip() if ', ' in inst else 'unknown'
if any(x in value.lower() for x in ['eax', 'ebx', 'ecx', 'edx', '[', 'esp', 'ebp', 'unknown']):
continue
field_type = 'dword'
if 'movss' in inst or 'fld' in inst or 'fstp' in inst or 'xmm' in inst.lower():
field_type = 'float'
elif 'offset' in value:
target_ea = idc.get_name_ea_simple(value.split('offset ')[1].split(' ')[0])
if target_ea != idc.BADADDR:
target_name = idc.get_name(target_ea)
if 'class_' in target_name or 'struct_' in target_name or target_ea in [0x5997B4]:
field_type = 'ptr'
if not is_copy_ctor and not is_dtor and offset >= 0x14C:
vtable_ea = target_ea
fields[0] = {'type': 'ptr', 'value': f'vtable_0x{target_ea:X}'}
print(f"Обнаружена vtable по {hex(vtable_ea)}: {inst}")
else:
field_type = 'dword'
elif value.replace('.', '', 1).replace('-', '', 1).isdigit():
field_type = 'float' if '.' in value else 'int'
elif value in ['0', '1', '2', '3', '4', '5', '0.0', '1.0'] or value.endswith('.0'):
field_type = 'float' if vector_context else 'int'
elif 'h' in value.lower() and value.replace('h', '').replace('0x', '').replace('-', '').isdigit():
try:
num = int(value.replace('h', ''), 16)
if 0 <= num <= 1000:
field_type = 'int'
elif num in [0x3F800000, 0x40000000, 0x3F19999A]:
field_type = 'float'
else:
field_type = 'dword'
except ValueError:
field_type = 'dword'
fields[offset] = {'type': field_type, 'value': value}
print(f"Поле на {hex(offset)}: тип={field_type}, значение={value}")
except (ValueError, IndexError):
pass
# Обработка строки
if string_context and 'mov' in inst and 'byte ptr' in inst:
try:
for reg in ['edi']:
if f'[{reg}' in inst:
offset_str = None
if f'[{reg}+' in inst:
offset_str = inst.split(f'[{reg}+')[1].split(']')[0]
elif f'[{reg}]' in inst:
offset_str = '0'
if offset_str:
offset = int(offset_str, 16) if 'h' in offset_str.lower() else int(offset_str)
if offset < rtti_size:
fields[offset] = {'type': 'string', 'value': 'none', 'size': 5}
max_offset = max(max_offset, offset + 5)
print(f"Поле на {hex(offset)}: тип=string, значение=none")
except (ValueError, IndexError):
pass
ea = idc.next_head(ea)
if ea == idc.BADADDR:
break
return vtable_ea, fields, max_offset
# Функция поиска vtable и методов для класса
def find_vtable_for_class(rtti_ea, ctor_ea, seg_start, seg_end, processed_eas, size, vtable_ea=None, has_virtual_functions=False):
methods = []
if not has_virtual_functions and size <= 0x20:
print(f"Класс без виртуальных функций или малая структура, vtable не ищется")
return None, []
if vtable_ea:
try:
current_ea = vtable_ea
first_ptr = ida_bytes.get_dword(current_ea)
is_watcom_vtable = first_ptr == rtti_ea or is_valid_code_ea(first_ptr)
if is_watcom_vtable:
if first_ptr == rtti_ea:
current_ea += 8
while True:
ptr = ida_bytes.get_dword(current_ea)
if not ptr or not is_valid_code_ea(ptr):
break
ptr_name = idc.get_func_name(ptr)
if 'type_info' in ptr_name.lower():
break
methods.append(ptr)
current_ea += 4
if methods:
processed_eas.add(vtable_ea)
print(f"vtable найдена по {hex(vtable_ea)}, методов: {len(methods)}")
return vtable_ea, methods
except Exception as e:
print(f"Ошибка при анализе vtable по {hex(vtable_ea)}: {str(e)}")
vtable_ea = None
try:
if ctor_ea:
vtable_ea, _, _ = analyze_constructor(ctor_ea, rtti_size=size)
if vtable_ea:
current_ea = vtable_ea
first_ptr = ida_bytes.get_dword(current_ea)
is_watcom_vtable = first_ptr == rtti_ea or is_valid_code_ea(first_ptr)
if is_watcom_vtable:
if first_ptr == rtti_ea:
current_ea += 8
methods = []
for _ in range(20):
ptr = ida_bytes.get_dword(current_ea)
if not ptr or not is_valid_code_ea(ptr):
break
ptr_name = idc.get_func_name(ptr)
if 'type_info' in ptr_name.lower():
break
methods.append(ptr)
current_ea += 4
if methods:
processed_eas.add(vtable_ea)
print(f"vtable найдена по {hex(vtable_ea)}, методов: {len(methods)}")
return vtable_ea, methods
except Exception as e:
print(f"Ошибка при анализе конструктора для vtable: {str(e)}")
try:
for xref in idautils.XrefsTo(rtti_ea):
candidate_ea = xref.frm
if seg_start <= candidate_ea <= seg_end and candidate_ea not in processed_eas:
current_ea = candidate_ea
first_ptr = ida_bytes.get_dword(current_ea)
is_watcom_vtable = first_ptr == rtti_ea or is_valid_code_ea(first_ptr)
if is_watcom_vtable:
if first_ptr == rtti_ea:
current_ea += 8
methods = []
for _ in range(20):
ptr = ida_bytes.get_dword(current_ea)
if not ptr or not is_valid_code_ea(ptr):
break
ptr_name = idc.get_func_name(ptr)
if 'type_info' in ptr_name.lower():
break
methods.append(ptr)
current_ea += 4
if methods:
processed_eas.add(candidate_ea)
print(f"vtable найдена через xref по {hex(candidate_ea)}, методов: {len(methods)}")
return candidate_ea, methods
except Exception as e:
print(f"Ошибка при анализе xrefs для vtable: {str(e)}")
print(f"vtable не найдена для RTTI {hex(rtti_ea)}")
return None, []
# Функция создания структуры класса и vtable
def create_class_structure(class_name, vtable_ea, methods, fields, rtti_ea, rtti_size, base_class_name, max_offset):
clean_name = class_name.replace('$', '').replace(' ', '_')
class_struct_name = f"class_{clean_name}"
vtable_struct_name = f"vtable_{clean_name}"
class_tif = create_struc(class_struct_name)
if not class_tif:
print(f"Не удалось создать структуру {class_struct_name}")
return
offset = 0
added_offsets = set()
if base_class_name:
base_clean_name = base_class_name.replace('$', '').replace(' ', '_')
base_struct_name = f"class_{base_clean_name}"
result = add_struc_member(class_tif, "base", offset, "ptr", f"Base class {base_class_name}")
if result:
added_offsets.add(offset)
offset += 4
print(f"Добавлен базовый класс {base_class_name} на смещение {hex(offset - 4)}")
else:
print(f"Не удалось добавить базовый класс {base_class_name}")
if vtable_ea and methods:
vtable_tif = create_struc(vtable_struct_name)
if vtable_tif:
first_ptr = ida_bytes.get_dword(vtable_ea)
is_watcom_vtable = first_ptr == rtti_ea
method_offset = 8 if is_watcom_vtable else 0
if is_watcom_vtable:
add_struc_member(vtable_tif, "type_info", 0, "ptr", "Pointer to __TI")
add_struc_member(vtable_tif, "this_offset", 4, "dword", "This pointer offset")
for i, method_ea in enumerate(methods):
method_name = idc.get_func_name(method_ea) or f"method_{i:04X}"
add_struc_member(vtable_tif, f"method_{i}", method_offset + i * 4, "ptr", f"-> {method_name}")
print(f"Добавлен метод {method_name} на смещение {hex(method_offset + i * 4)} в {vtable_struct_name}")
idc.set_name(vtable_ea, vtable_struct_name, ida_name.SN_CHECK)
result = add_struc_member(class_tif, "vtable", offset, "ptr", f"vtable* {vtable_struct_name}")
if result:
added_offsets.add(offset)
offset += 4
print(f"Добавлена vtable на смещение {hex(offset - 4)}")
else:
print(f"Не удалось добавить vtable в {class_struct_name}")
print(f"Добавление полей из fields: {[(hex(k), v) for k, v in sorted(fields.items())]}")
# Используем минимальный размер на основе конструктора
effective_size = min(rtti_size, max_offset + 4) if max_offset > 0 else rtti_size
if effective_size > 0x3000:
effective_size = max_offset + 4
print(f"Ограничен размер до {hex(effective_size)} из-за больших смещений")
for field_offset in sorted(fields.keys()):
if field_offset >= offset and field_offset < effective_size and field_offset not in added_offsets:
info = fields[field_offset]
field_type = info['type']
field_name = f"field_{field_offset:04X}"
comment = f"Value: {info['value']}"
size = info.get('size', 4)
result = add_struc_member(class_tif, field_name, field_offset, field_type, comment, size)
if result:
added_offsets.add(field_offset)
print(f"Добавлено поле {field_name} на смещение {hex(field_offset)}")
else:
print(f"Не удалось добавить поле {field_name} на смещение {hex(field_offset)}")
# Добавляем заглушки только до effective_size
while offset < effective_size:
if offset not in added_offsets:
field_name = f"field_{offset:04X}"
field_type = "float" if rtti_size <= 0x20 else "dword"
result = add_struc_member(class_tif, field_name, offset, field_type, "Unknown field")
if result:
added_offsets.add(offset)
print(f"Добавлена заглушка {field_name} на смещение {hex(offset)}")
else:
print(f"Не удалось добавить заглушку {field_name} на смещение {hex(offset)}")
offset += 4
print(f"Создана структура {class_struct_name}, размер {hex(effective_size)}")
# функция переименования функций
def rename_functions(class_name, ctor_ea, copy_ctor_ea, dtor_ea):
clean_name = class_name.replace('$', '').replace(' ', '_')
if ctor_ea and is_valid_code_ea(ctor_ea):
ida_name.set_name(ctor_ea, f"{clean_name}::ctor", ida_name.SN_CHECK)
if copy_ctor_ea and is_valid_code_ea(copy_ctor_ea):
ida_name.set_name(copy_ctor_ea, f"{clean_name}::copy_ctor", ida_name.SN_CHECK)
if dtor_ea and is_valid_code_ea(dtor_ea):
ida_name.set_name(dtor_ea, f"{clean_name}::dtor", ida_name.SN_CHECK)
def main():
segments = []
seg = ida_segment.get_first_seg()
while seg:
if seg.perm & ida_segment.SEGPERM_READ:
segments.append((seg.start_ea, seg.end_ea, seg.name))
seg = ida_segment.get_next_seg(seg.start_ea)
rtti_structs = []
processed_eas = set()
for seg_start, seg_end, seg_name in segments:
rtti_structs.extend(find_rtti_structures(seg_start, seg_end))
for rtti in rtti_structs:
class_name = rtti['name']
rtti_ea = rtti['ea']
ctor_ea = rtti['ctor']
copy_ctor_ea = rtti['copy_ctor']
dtor_ea = rtti['dtor']
size = rtti['size']
base_class_ea = rtti['base_class_ea']
vtable_ea = rtti.get('vtable_ea')
has_virtual_functions = rtti.get('has_virtual_functions', False)
print(f"\nОбработка {class_name} (RTTI: {hex(rtti_ea)}, размер: {hex(size)})")
try:
is_simple = is_simple_structure(size, ctor_ea, dtor_ea)
rename_functions(class_name, ctor_ea, copy_ctor_ea, dtor_ea)
fields = {}
max_offset = 0
vtable_ea, methods = find_vtable_for_class(rtti_ea, ctor_ea, seg_start, seg_end, processed_eas, size, vtable_ea, has_virtual_functions)
for ea, is_copy, is_dtor in [(ctor_ea, False, False), (copy_ctor_ea, True, False), (dtor_ea, False, True)]:
if ea:
try:
_, ctor_fields, ctor_max_offset = analyze_constructor(ea, is_copy, is_dtor, size)
for offset, info in ctor_fields.items():
if offset not in fields or fields[offset]['type'] == 'dword':
fields[offset] = info
print(f"Обновлено поле на {hex(offset)}: тип={info['type']}, значение={info['value']}")
max_offset = max(max_offset, ctor_max_offset)
except Exception as e:
print(f"Ошибка при анализе функции {hex(ea)}: {str(e)}")
if is_simple and not vtable_ea:
offset = 0
while offset < size:
if offset not in fields:
fields[offset] = {'type': 'float', 'value': '0.0'}
print(f"Добавлено поле-заглушка на {hex(offset)}: тип=float")
offset += 4
base_class_name = None
if base_class_ea:
for other_rtti in rtti_structs:
if other_rtti['ea'] == base_class_ea:
base_class_name = other_rtti['name']
print(f"Базовый класс для {class_name}: {base_class_name}")
break
create_class_structure(class_name, vtable_ea, methods, fields, rtti_ea, size, base_class_name, max_offset)
except Exception as e:
print(f"Ошибка при обработке {class_name} (RTTI: {hex(rtti_ea)}): {str(e)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment