goldberg_emulator/detours/detours.cpp
2020-05-20 18:40:59 -04:00

2549 lines
84 KiB
C++

//////////////////////////////////////////////////////////////////////////////
//
// Core Detours Functionality (detours.cpp of detours.lib)
//
// Microsoft Research Detours Package, Version 4.0.1
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//#define DETOUR_DEBUG 1
#define DETOURS_INTERNAL
#include "detours.h"
#if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH
#error detours.h version mismatch
#endif
#define NOTHROW
//////////////////////////////////////////////////////////////////////////////
//
struct _DETOUR_ALIGN
{
BYTE obTarget : 3;
BYTE obTrampoline : 5;
};
C_ASSERT(sizeof(_DETOUR_ALIGN) == 1);
//////////////////////////////////////////////////////////////////////////////
//
// Region reserved for system DLLs, which cannot be used for trampolines.
//
static PVOID s_pSystemRegionLowerBound = (PVOID)(ULONG_PTR)0x70000000;
static PVOID s_pSystemRegionUpperBound = (PVOID)(ULONG_PTR)0x80000000;
//////////////////////////////////////////////////////////////////////////////
//
static bool detour_is_imported(PBYTE pbCode, PBYTE pbAddress)
{
MEMORY_BASIC_INFORMATION mbi;
VirtualQuery((PVOID)pbCode, &mbi, sizeof(mbi));
__try {
PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)mbi.AllocationBase;
if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
return false;
}
PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader +
pDosHeader->e_lfanew);
if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) {
return false;
}
if (pbAddress >= ((PBYTE)pDosHeader +
pNtHeader->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress) &&
pbAddress < ((PBYTE)pDosHeader +
pNtHeader->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress +
pNtHeader->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size)) {
return true;
}
}
#pragma prefast(suppress:28940, "A bad pointer means this probably isn't a PE header.")
__except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ?
EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) {
return false;
}
return false;
}
inline ULONG_PTR detour_2gb_below(ULONG_PTR address)
{
return (address > (ULONG_PTR)0x7ff80000) ? address - 0x7ff80000 : 0x80000;
}
inline ULONG_PTR detour_2gb_above(ULONG_PTR address)
{
#if defined(DETOURS_64BIT)
return (address < (ULONG_PTR)0xffffffff80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfffffffffff80000;
#else
return (address < (ULONG_PTR)0x80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfff80000;
#endif
}
///////////////////////////////////////////////////////////////////////// X86.
//
#ifdef DETOURS_X86
struct _DETOUR_TRAMPOLINE
{
BYTE rbCode[30]; // target code + jmp to pbRemain
BYTE cbCode; // size of moved target code.
BYTE cbCodeBreak; // padding to make debugging easier.
BYTE rbRestore[22]; // original target code.
BYTE cbRestore; // size of original target code.
BYTE cbRestoreBreak; // padding to make debugging easier.
_DETOUR_ALIGN rAlign[8]; // instruction alignment array.
PBYTE pbRemain; // first instruction after moved code. [free list]
PBYTE pbDetour; // first instruction of detour function.
};
C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 72);
enum {
SIZE_OF_JMP = 5
};
inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal)
{
PBYTE pbJmpSrc = pbCode + 5;
*pbCode++ = 0xE9; // jmp +imm32
*((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc);
return pbCode;
}
inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal)
{
*pbCode++ = 0xff; // jmp [+imm32]
*pbCode++ = 0x25;
*((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal);
return pbCode;
}
inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
{
while (pbCode < pbLimit) {
*pbCode++ = 0xcc; // brk;
}
return pbCode;
}
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
{
if (pbCode == NULL) {
return NULL;
}
if (ppGlobals != NULL) {
*ppGlobals = NULL;
}
// First, skip over the import vector if there is one.
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32]
// Looks like an import alias jump, then get the code it points to.
PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2];
if (detour_is_imported(pbCode, pbTarget)) {
PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget;
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
// Then, skip over a patch jump
if (pbCode[0] == 0xeb) { // jmp +imm8
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
pbCode = pbNew;
// First, skip over the import vector if there is one.
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32]
// Looks like an import alias jump, then get the code it points to.
PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2];
if (detour_is_imported(pbCode, pbTarget)) {
pbNew = *(UNALIGNED PBYTE *)pbTarget;
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
// Finally, skip over a long jump if it is the target of the patch jump.
else if (pbCode[0] == 0xe9) { // jmp +imm32
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
return pbCode;
}
inline void detour_find_jmp_bounds(PBYTE pbCode,
PDETOUR_TRAMPOLINE *ppLower,
PDETOUR_TRAMPOLINE *ppUpper)
{
// We have to place trampolines within +/- 2GB of code.
ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
// And, within +/- 2GB of relative jmp targets.
if (pbCode[0] == 0xe9) { // jmp +imm32
PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
if (pbNew < pbCode) {
hi = detour_2gb_above((ULONG_PTR)pbNew);
}
else {
lo = detour_2gb_below((ULONG_PTR)pbNew);
}
DETOUR_TRACE(("[%p..%p..%p] +imm32\n", lo, pbCode, hi));
}
*ppLower = (PDETOUR_TRAMPOLINE)lo;
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
}
inline BOOL detour_does_code_end_function(PBYTE pbCode)
{
if (pbCode[0] == 0xeb || // jmp +imm8
pbCode[0] == 0xe9 || // jmp +imm32
pbCode[0] == 0xe0 || // jmp eax
pbCode[0] == 0xc2 || // ret +imm8
pbCode[0] == 0xc3 || // ret
pbCode[0] == 0xcc) { // brk
return TRUE;
}
else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) { // rep ret
return TRUE;
}
else if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
return TRUE;
}
else if ((pbCode[0] == 0x26 || // jmp es:
pbCode[0] == 0x2e || // jmp cs:
pbCode[0] == 0x36 || // jmp ss:
pbCode[0] == 0x3e || // jmp ds:
pbCode[0] == 0x64 || // jmp fs:
pbCode[0] == 0x65) && // jmp gs:
pbCode[1] == 0xff && // jmp [+imm32]
pbCode[2] == 0x25) {
return TRUE;
}
return FALSE;
}
inline ULONG detour_is_code_filler(PBYTE pbCode)
{
// 1-byte through 11-byte NOPs.
if (pbCode[0] == 0x90) {
return 1;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x90) {
return 2;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) {
return 3;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 &&
pbCode[3] == 0x00) {
return 4;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00) {
return 5;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) {
return 6;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00) {
return 7;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00) {
return 8;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) {
return 9;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F &&
pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
pbCode[9] == 0x00) {
return 10;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 &&
pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
pbCode[9] == 0x00 && pbCode[10] == 0x00) {
return 11;
}
// int 3.
if (pbCode[0] == 0xcc) {
return 1;
}
return 0;
}
#endif // DETOURS_X86
///////////////////////////////////////////////////////////////////////// X64.
//
#ifdef DETOURS_X64
struct _DETOUR_TRAMPOLINE
{
// An X64 instuction can be 15 bytes long.
// In practice 11 seems to be the limit.
BYTE rbCode[30]; // target code + jmp to pbRemain.
BYTE cbCode; // size of moved target code.
BYTE cbCodeBreak; // padding to make debugging easier.
BYTE rbRestore[30]; // original target code.
BYTE cbRestore; // size of original target code.
BYTE cbRestoreBreak; // padding to make debugging easier.
_DETOUR_ALIGN rAlign[8]; // instruction alignment array.
PBYTE pbRemain; // first instruction after moved code. [free list]
PBYTE pbDetour; // first instruction of detour function.
BYTE rbCodeIn[8]; // jmp [pbDetour]
};
C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 96);
enum {
SIZE_OF_JMP = 5
};
inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal)
{
PBYTE pbJmpSrc = pbCode + 5;
*pbCode++ = 0xE9; // jmp +imm32
*((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc);
return pbCode;
}
inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal)
{
PBYTE pbJmpSrc = pbCode + 6;
*pbCode++ = 0xff; // jmp [+imm32]
*pbCode++ = 0x25;
*((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal - pbJmpSrc);
return pbCode;
}
inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
{
while (pbCode < pbLimit) {
*pbCode++ = 0xcc; // brk;
}
return pbCode;
}
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
{
if (pbCode == NULL) {
return NULL;
}
if (ppGlobals != NULL) {
*ppGlobals = NULL;
}
// First, skip over the import vector if there is one.
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
// Looks like an import alias jump, then get the code it points to.
PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
if (detour_is_imported(pbCode, pbTarget)) {
PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget;
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
// Then, skip over a patch jump
if (pbCode[0] == 0xeb) { // jmp +imm8
PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1];
DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew));
pbCode = pbNew;
// First, skip over the import vector if there is one.
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
// Looks like an import alias jump, then get the code it points to.
PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
if (detour_is_imported(pbCode, pbTarget)) {
pbNew = *(UNALIGNED PBYTE *)pbTarget;
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
// Finally, skip over a long jump if it is the target of the patch jump.
else if (pbCode[0] == 0xe9) { // jmp +imm32
pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew));
pbCode = pbNew;
}
}
return pbCode;
}
inline void detour_find_jmp_bounds(PBYTE pbCode,
PDETOUR_TRAMPOLINE *ppLower,
PDETOUR_TRAMPOLINE *ppUpper)
{
// We have to place trampolines within +/- 2GB of code.
ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
// And, within +/- 2GB of relative jmp vectors.
if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
PBYTE pbNew = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2];
if (pbNew < pbCode) {
hi = detour_2gb_above((ULONG_PTR)pbNew);
}
else {
lo = detour_2gb_below((ULONG_PTR)pbNew);
}
DETOUR_TRACE(("[%p..%p..%p] [+imm32]\n", lo, pbCode, hi));
}
// And, within +/- 2GB of relative jmp targets.
else if (pbCode[0] == 0xe9) { // jmp +imm32
PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1];
if (pbNew < pbCode) {
hi = detour_2gb_above((ULONG_PTR)pbNew);
}
else {
lo = detour_2gb_below((ULONG_PTR)pbNew);
}
DETOUR_TRACE(("[%p..%p..%p] +imm32\n", lo, pbCode, hi));
}
*ppLower = (PDETOUR_TRAMPOLINE)lo;
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
}
inline BOOL detour_does_code_end_function(PBYTE pbCode)
{
if (pbCode[0] == 0xeb || // jmp +imm8
pbCode[0] == 0xe9 || // jmp +imm32
pbCode[0] == 0xe0 || // jmp eax
pbCode[0] == 0xc2 || // ret +imm8
pbCode[0] == 0xc3 || // ret
pbCode[0] == 0xcc) { // brk
return TRUE;
}
else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) { // rep ret
return TRUE;
}
else if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32]
return TRUE;
}
else if ((pbCode[0] == 0x26 || // jmp es:
pbCode[0] == 0x2e || // jmp cs:
pbCode[0] == 0x36 || // jmp ss:
pbCode[0] == 0x3e || // jmp ds:
pbCode[0] == 0x64 || // jmp fs:
pbCode[0] == 0x65) && // jmp gs:
pbCode[1] == 0xff && // jmp [+imm32]
pbCode[2] == 0x25) {
return TRUE;
}
return FALSE;
}
inline ULONG detour_is_code_filler(PBYTE pbCode)
{
// 1-byte through 11-byte NOPs.
if (pbCode[0] == 0x90) {
return 1;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x90) {
return 2;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) {
return 3;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 &&
pbCode[3] == 0x00) {
return 4;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00) {
return 5;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) {
return 6;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00) {
return 7;
}
if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 &&
pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00) {
return 8;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F &&
pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) {
return 9;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F &&
pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
pbCode[9] == 0x00) {
return 10;
}
if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 &&
pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 &&
pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 &&
pbCode[9] == 0x00 && pbCode[10] == 0x00) {
return 11;
}
// int 3.
if (pbCode[0] == 0xcc) {
return 1;
}
return 0;
}
#endif // DETOURS_X64
//////////////////////////////////////////////////////////////////////// IA64.
//
#ifdef DETOURS_IA64
struct _DETOUR_TRAMPOLINE
{
// On the IA64, a trampoline is used for both incoming and outgoing calls.
//
// The trampoline contains the following bundles for the outgoing call:
// movl gp=target_gp;
// <relocated target bundle>
// brl target_code;
//
// The trampoline contains the following bundles for the incoming call:
// alloc r41=ar.pfs, b, 0, 8, 0
// mov r40=rp
//
// adds r50=0, r39
// adds r49=0, r38
// adds r48=0, r37 ;;
//
// adds r47=0, r36
// adds r46=0, r35
// adds r45=0, r34
//
// adds r44=0, r33
// adds r43=0, r32
// adds r42=0, gp ;;
//
// movl gp=ffffffff`ffffffff ;;
//
// brl.call.sptk.few rp=disas!TestCodes+20e0 (00000000`00404ea0) ;;
//
// adds gp=0, r42
// mov rp=r40, +0 ;;
// mov.i ar.pfs=r41
//
// br.ret.sptk.many rp ;;
//
// This way, we only have to relocate a single bundle.
//
// The complicated incoming trampoline is required because we have to
// create an additional stack frame so that we save and restore the gp.
// We must do this because gp is a caller-saved register, but not saved
// if the caller thinks the target is in the same DLL, which changes
// when we insert a detour.
//
DETOUR_IA64_BUNDLE bMovlTargetGp; // Bundle which sets target GP
BYTE rbCode[sizeof(DETOUR_IA64_BUNDLE)]; // moved bundle.
DETOUR_IA64_BUNDLE bBrlRemainEip; // Brl to pbRemain
// This must be adjacent to bBranchIslands.
// Each instruction in the moved bundle could be a IP-relative chk or branch or call.
// Any such instructions are changed to point to a brl in bBranchIslands.
// This must be adjacent to bBrlRemainEip -- see "pbPool".
DETOUR_IA64_BUNDLE bBranchIslands[DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE];
// Target of brl inserted in target function
DETOUR_IA64_BUNDLE bAllocFrame; // alloc frame
DETOUR_IA64_BUNDLE bSave37to39; // save r37, r38, r39.
DETOUR_IA64_BUNDLE bSave34to36; // save r34, r35, r36.
DETOUR_IA64_BUNDLE bSaveGPto33; // save gp, r32, r33.
DETOUR_IA64_BUNDLE bMovlDetourGp; // set detour GP.
DETOUR_IA64_BUNDLE bCallDetour; // call detour.
DETOUR_IA64_BUNDLE bPopFrameGp; // pop frame and restore gp.
DETOUR_IA64_BUNDLE bReturn; // return to caller.
PLABEL_DESCRIPTOR pldTrampoline;
BYTE rbRestore[sizeof(DETOUR_IA64_BUNDLE)]; // original target bundle.
BYTE cbRestore; // size of original target code.
BYTE cbCode; // size of moved target code.
_DETOUR_ALIGN rAlign[14]; // instruction alignment array.
PBYTE pbRemain; // first instruction after moved code. [free list]
PBYTE pbDetour; // first instruction of detour function.
PPLABEL_DESCRIPTOR ppldDetour; // [pbDetour,gpDetour]
PPLABEL_DESCRIPTOR ppldTarget; // [pbTarget,gpDetour]
};
C_ASSERT(sizeof(DETOUR_IA64_BUNDLE) == 16);
C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 256 + DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE * 16);
enum {
SIZE_OF_JMP = sizeof(DETOUR_IA64_BUNDLE)
};
inline PBYTE detour_skip_jmp(PBYTE pPointer, PVOID *ppGlobals)
{
PBYTE pGlobals = NULL;
PBYTE pbCode = NULL;
if (pPointer != NULL) {
PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)pPointer;
pbCode = (PBYTE)ppld->EntryPoint;
pGlobals = (PBYTE)ppld->GlobalPointer;
}
if (ppGlobals != NULL) {
*ppGlobals = pGlobals;
}
if (pbCode == NULL) {
return NULL;
}
DETOUR_IA64_BUNDLE *pb = (DETOUR_IA64_BUNDLE *)pbCode;
// IA64 Local Import Jumps look like:
// addl r2=ffffffff`ffe021c0, gp ;;
// ld8 r2=[r2]
// nop.i 0 ;;
//
// ld8 r3=[r2], 8 ;;
// ld8 gp=[r2]
// mov b6=r3, +0
//
// nop.m 0
// nop.i 0
// br.cond.sptk.few b6
//
// 002024000200100b
if ((pb[0].wide[0] & 0xfffffc000603ffff) == 0x002024000200100b &&
pb[0].wide[1] == 0x0004000000203008 &&
pb[1].wide[0] == 0x001014180420180a &&
pb[1].wide[1] == 0x07000830c0203008 &&
pb[2].wide[0] == 0x0000000100000010 &&
pb[2].wide[1] == 0x0080006000000200) {
ULONG64 offset =
((pb[0].wide[0] & 0x0000000001fc0000) >> 18) | // imm7b
((pb[0].wide[0] & 0x000001ff00000000) >> 25) | // imm9d
((pb[0].wide[0] & 0x00000000f8000000) >> 11); // imm5c
if (pb[0].wide[0] & 0x0000020000000000) { // sign
offset |= 0xffffffffffe00000;
}
PBYTE pbTarget = pGlobals + offset;
DETOUR_TRACE(("%p: potential import jump, target=%p\n", pb, pbTarget));
if (detour_is_imported(pbCode, pbTarget) && *(PBYTE*)pbTarget != NULL) {
DETOUR_TRACE(("%p: is import jump, label=%p\n", pb, *(PBYTE *)pbTarget));
PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)*(PBYTE *)pbTarget;
pbCode = (PBYTE)ppld->EntryPoint;
pGlobals = (PBYTE)ppld->GlobalPointer;
if (ppGlobals != NULL) {
*ppGlobals = pGlobals;
}
}
}
return pbCode;
}
inline void detour_find_jmp_bounds(PBYTE pbCode,
PDETOUR_TRAMPOLINE *ppLower,
PDETOUR_TRAMPOLINE *ppUpper)
{
(void)pbCode;
*ppLower = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0x0000000000080000;
*ppUpper = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0xfffffffffff80000;
}
inline BOOL detour_does_code_end_function(PBYTE pbCode)
{
// Routine not needed on IA64.
(void)pbCode;
return FALSE;
}
inline ULONG detour_is_code_filler(PBYTE pbCode)
{
// Routine not needed on IA64.
(void)pbCode;
return 0;
}
#endif // DETOURS_IA64
#ifdef DETOURS_ARM
struct _DETOUR_TRAMPOLINE
{
// A Thumb-2 instruction can be 2 or 4 bytes long.
BYTE rbCode[62]; // target code + jmp to pbRemain
BYTE cbCode; // size of moved target code.
BYTE cbCodeBreak; // padding to make debugging easier.
BYTE rbRestore[22]; // original target code.
BYTE cbRestore; // size of original target code.
BYTE cbRestoreBreak; // padding to make debugging easier.
_DETOUR_ALIGN rAlign[8]; // instruction alignment array.
PBYTE pbRemain; // first instruction after moved code. [free list]
PBYTE pbDetour; // first instruction of detour function.
};
C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 104);
enum {
SIZE_OF_JMP = 8
};
inline PBYTE align4(PBYTE pValue)
{
return (PBYTE)(((ULONG)pValue) & ~(ULONG)3u);
}
inline ULONG fetch_thumb_opcode(PBYTE pbCode)
{
ULONG Opcode = *(UINT16 *)&pbCode[0];
if (Opcode >= 0xe800) {
Opcode = (Opcode << 16) | *(UINT16 *)&pbCode[2];
}
return Opcode;
}
inline void write_thumb_opcode(PBYTE &pbCode, ULONG Opcode)
{
if (Opcode >= 0x10000) {
*((UINT16*&)pbCode)++ = Opcode >> 16;
}
*((UINT16*&)pbCode)++ = (UINT16)Opcode;
}
PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal)
{
PBYTE pbLiteral;
if (ppPool != NULL) {
*ppPool = *ppPool - 4;
pbLiteral = *ppPool;
}
else {
pbLiteral = align4(pbCode + 6);
}
*((PBYTE*&)pbLiteral) = DETOURS_PBYTE_TO_PFUNC(pbJmpVal);
LONG delta = pbLiteral - align4(pbCode + 4);
write_thumb_opcode(pbCode, 0xf8dff000 | delta); // LDR PC,[PC+n]
if (ppPool == NULL) {
if (((ULONG)pbCode & 2) != 0) {
write_thumb_opcode(pbCode, 0xdefe); // BREAK
}
pbCode += 4;
}
return pbCode;
}
inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
{
while (pbCode < pbLimit) {
write_thumb_opcode(pbCode, 0xdefe);
}
return pbCode;
}
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
{
if (pbCode == NULL) {
return NULL;
}
if (ppGlobals != NULL) {
*ppGlobals = NULL;
}
// Skip over the import jump if there is one.
pbCode = (PBYTE)DETOURS_PFUNC_TO_PBYTE(pbCode);
ULONG Opcode = fetch_thumb_opcode(pbCode);
if ((Opcode & 0xfbf08f00) == 0xf2400c00) { // movw r12,#xxxx
ULONG Opcode2 = fetch_thumb_opcode(pbCode+4);
if ((Opcode2 & 0xfbf08f00) == 0xf2c00c00) { // movt r12,#xxxx
ULONG Opcode3 = fetch_thumb_opcode(pbCode+8);
if (Opcode3 == 0xf8dcf000) { // ldr pc,[r12]
PBYTE pbTarget = (PBYTE)(((Opcode2 << 12) & 0xf7000000) |
((Opcode2 << 1) & 0x08000000) |
((Opcode2 << 16) & 0x00ff0000) |
((Opcode >> 4) & 0x0000f700) |
((Opcode >> 15) & 0x00000800) |
((Opcode >> 0) & 0x000000ff));
if (detour_is_imported(pbCode, pbTarget)) {
PBYTE pbNew = *(PBYTE *)pbTarget;
pbNew = DETOURS_PFUNC_TO_PBYTE(pbNew);
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
return pbNew;
}
}
}
}
return pbCode;
}
inline void detour_find_jmp_bounds(PBYTE pbCode,
PDETOUR_TRAMPOLINE *ppLower,
PDETOUR_TRAMPOLINE *ppUpper)
{
// We have to place trampolines within +/- 2GB of code.
ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
*ppLower = (PDETOUR_TRAMPOLINE)lo;
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
}
inline BOOL detour_does_code_end_function(PBYTE pbCode)
{
ULONG Opcode = fetch_thumb_opcode(pbCode);
if ((Opcode & 0xffffff87) == 0x4700 || // bx <reg>
(Opcode & 0xf800d000) == 0xf0009000) { // b <imm20>
return TRUE;
}
if ((Opcode & 0xffff8000) == 0xe8bd8000) { // pop {...,pc}
__debugbreak();
return TRUE;
}
if ((Opcode & 0xffffff00) == 0x0000bd00) { // pop {...,pc}
__debugbreak();
return TRUE;
}
return FALSE;
}
inline ULONG detour_is_code_filler(PBYTE pbCode)
{
if (pbCode[0] == 0x00 && pbCode[1] == 0xbf) { // nop.
return 2;
}
if (pbCode[0] == 0x00 && pbCode[1] == 0x00) { // zero-filled padding.
return 2;
}
return 0;
}
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
struct _DETOUR_TRAMPOLINE
{
// An ARM64 instruction is 4 bytes long.
//
// The overwrite is always composed of 3 instructions (12 bytes) which perform an indirect jump
// using _DETOUR_TRAMPOLINE::pbDetour as the address holding the target location.
//
// Copied instructions can expand.
//
// The scheme using MovImmediate can cause an instruction
// to grow as much as 6 times.
// That would be Bcc or Tbz with a large address space:
// 4 instructions to form immediate
// inverted tbz/bcc
// br
//
// An expansion of 4 is not uncommon -- bl/blr and small address space:
// 3 instructions to form immediate
// br or brl
//
// A theoretical maximum for rbCode is thefore 4*4*6 + 16 = 112 (another 16 for jmp to pbRemain).
//
// With literals, the maximum expansion is 5, including the literals: 4*4*5 + 16 = 96.
//
// The number is rounded up to 128. m_rbScratchDst should match this.
//
BYTE rbCode[128]; // target code + jmp to pbRemain
BYTE cbCode; // size of moved target code.
BYTE cbCodeBreak[3]; // padding to make debugging easier.
BYTE rbRestore[24]; // original target code.
BYTE cbRestore; // size of original target code.
BYTE cbRestoreBreak[3]; // padding to make debugging easier.
_DETOUR_ALIGN rAlign[8]; // instruction alignment array.
PBYTE pbRemain; // first instruction after moved code. [free list]
PBYTE pbDetour; // first instruction of detour function.
};
C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 184);
enum {
SIZE_OF_JMP = 12
};
inline ULONG fetch_opcode(PBYTE pbCode)
{
return *(ULONG *)pbCode;
}
inline void write_opcode(PBYTE &pbCode, ULONG Opcode)
{
*(ULONG *)pbCode = Opcode;
pbCode += 4;
}
struct ARM64_INDIRECT_JMP {
struct {
ULONG Rd : 5;
ULONG immhi : 19;
ULONG iop : 5;
ULONG immlo : 2;
ULONG op : 1;
} ardp;
struct {
ULONG Rt : 5;
ULONG Rn : 5;
ULONG imm : 12;
ULONG opc : 2;
ULONG iop1 : 2;
ULONG V : 1;
ULONG iop2 : 3;
ULONG size : 2;
} ldr;
ULONG br;
};
#pragma warning(push)
#pragma warning(disable:4201)
union ARM64_INDIRECT_IMM {
struct {
ULONG64 pad : 12;
ULONG64 adrp_immlo : 2;
ULONG64 adrp_immhi : 19;
};
LONG64 value;
};
#pragma warning(pop)
PBYTE detour_gen_jmp_indirect(BYTE *pbCode, ULONG64 *pbJmpVal)
{
// adrp x17, [jmpval]
// ldr x17, [x17, jmpval]
// br x17
struct ARM64_INDIRECT_JMP *pIndJmp;
union ARM64_INDIRECT_IMM jmpIndAddr;
jmpIndAddr.value = (((LONG64)pbJmpVal) & 0xFFFFFFFFFFFFF000) -
(((LONG64)pbCode) & 0xFFFFFFFFFFFFF000);
pIndJmp = (struct ARM64_INDIRECT_JMP *)pbCode;
pbCode = (BYTE *)(pIndJmp + 1);
pIndJmp->ardp.Rd = 17;
pIndJmp->ardp.immhi = jmpIndAddr.adrp_immhi;
pIndJmp->ardp.iop = 0x10;
pIndJmp->ardp.immlo = jmpIndAddr.adrp_immlo;
pIndJmp->ardp.op = 1;
pIndJmp->ldr.Rt = 17;
pIndJmp->ldr.Rn = 17;
pIndJmp->ldr.imm = (((ULONG64)pbJmpVal) & 0xFFF) / 8;
pIndJmp->ldr.opc = 1;
pIndJmp->ldr.iop1 = 1;
pIndJmp->ldr.V = 0;
pIndJmp->ldr.iop2 = 7;
pIndJmp->ldr.size = 3;
pIndJmp->br = 0xD61F0220;
return pbCode;
}
PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal)
{
PBYTE pbLiteral;
if (ppPool != NULL) {
*ppPool = *ppPool - 8;
pbLiteral = *ppPool;
}
else {
pbLiteral = pbCode + 8;
}
*((PBYTE*&)pbLiteral) = pbJmpVal;
LONG delta = (LONG)(pbLiteral - pbCode);
write_opcode(pbCode, 0x58000011 | ((delta / 4) << 5)); // LDR X17,[PC+n]
write_opcode(pbCode, 0xd61f0000 | (17 << 5)); // BR X17
if (ppPool == NULL) {
pbCode += 8;
}
return pbCode;
}
inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit)
{
while (pbCode < pbLimit) {
write_opcode(pbCode, 0xd4100000 | (0xf000 << 5));
}
return pbCode;
}
inline INT64 detour_sign_extend(UINT64 value, UINT bits)
{
const UINT left = 64 - bits;
const INT64 m1 = -1;
const INT64 wide = (INT64)(value << left);
const INT64 sign = (wide < 0) ? (m1 << left) : 0;
return value | sign;
}
inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals)
{
if (pbCode == NULL) {
return NULL;
}
if (ppGlobals != NULL) {
*ppGlobals = NULL;
}
// Skip over the import jump if there is one.
pbCode = (PBYTE)pbCode;
ULONG Opcode = fetch_opcode(pbCode);
if ((Opcode & 0x9f00001f) == 0x90000010) { // adrp x16, IAT
ULONG Opcode2 = fetch_opcode(pbCode + 4);
if ((Opcode2 & 0xffe003ff) == 0xf9400210) { // ldr x16, [x16, IAT]
ULONG Opcode3 = fetch_opcode(pbCode + 8);
if (Opcode3 == 0xd61f0200) { // br x16
/* https://static.docs.arm.com/ddi0487/bb/DDI0487B_b_armv8_arm.pdf
The ADRP instruction shifts a signed, 21-bit immediate left by 12 bits, adds it to the value of the program counter with
the bottom 12 bits cleared to zero, and then writes the result to a general-purpose register. This permits the
calculation of the address at a 4KB aligned memory region. In conjunction with an ADD (immediate) instruction, or
a Load/Store instruction with a 12-bit immediate offset, this allows for the calculation of, or access to, any address
within +/- 4GB of the current PC.
PC-rel. addressing
This section describes the encoding of the PC-rel. addressing instruction class. The encodings in this section are
decoded from Data Processing -- Immediate on page C4-226.
Add/subtract (immediate)
This section describes the encoding of the Add/subtract (immediate) instruction class. The encodings in this section
are decoded from Data Processing -- Immediate on page C4-226.
Decode fields
Instruction page
op
0 ADR
1 ADRP
C6.2.10 ADRP
Form PC-relative address to 4KB page adds an immediate value that is shifted left by 12 bits, to the PC value to
form a PC-relative address, with the bottom 12 bits masked out, and writes the result to the destination register.
ADRP <Xd>, <label>
imm = SignExtend(immhi:immlo:Zeros(12), 64);
31 30 29 28 27 26 25 24 23 5 4 0
1 immlo 1 0 0 0 0 immhi Rd
9 0
Rd is hardcoded as 0x10 above.
Immediate is 21 signed bits split into 2 bits and 19 bits, and is scaled by 4K.
*/
UINT64 const pageLow2 = (Opcode >> 29) & 3;
UINT64 const pageHigh19 = (Opcode >> 5) & ~(~0ui64 << 19);
INT64 const page = detour_sign_extend((pageHigh19 << 2) | pageLow2, 21) << 12;
/* https://static.docs.arm.com/ddi0487/bb/DDI0487B_b_armv8_arm.pdf
C6.2.101 LDR (immediate)
Load Register (immediate) loads a word or doubleword from memory and writes it to a register. The address that is
used for the load is calculated from a base register and an immediate offset.
The Unsigned offset variant scales the immediate offset value by the size of the value accessed before adding it
to the base register value.
Unsigned offset
64-bit variant Applies when size == 11.
31 30 29 28 27 26 25 24 23 22 21 10 9 5 4 0
1 x 1 1 1 0 0 1 0 1 imm12 Rn Rt
F 9 4 200 10
That is, two low 5 bit fields are registers, hardcoded as 0x10 and 0x10 << 5 above,
then unsigned size-unscaled (8) 12-bit offset, then opcode bits 0xF94.
*/
UINT64 const offset = ((Opcode2 >> 10) & ~(~0ui64 << 12)) << 3;
PBYTE const pbTarget = (PBYTE)((ULONG64)pbCode & 0xfffffffffffff000ULL) + page + offset;
if (detour_is_imported(pbCode, pbTarget)) {
PBYTE pbNew = *(PBYTE *)pbTarget;
DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew));
return pbNew;
}
}
}
}
return pbCode;
}
inline void detour_find_jmp_bounds(PBYTE pbCode,
PDETOUR_TRAMPOLINE *ppLower,
PDETOUR_TRAMPOLINE *ppUpper)
{
// The encoding used by detour_gen_jmp_indirect actually enables a
// displacement of +/- 4GiB. In the future, this could be changed to
// reflect that. For now, just reuse the x86 logic which is plenty.
ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode);
ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode);
DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi));
*ppLower = (PDETOUR_TRAMPOLINE)lo;
*ppUpper = (PDETOUR_TRAMPOLINE)hi;
}
inline BOOL detour_does_code_end_function(PBYTE pbCode)
{
ULONG Opcode = fetch_opcode(pbCode);
if ((Opcode & 0xfffffc1f) == 0xd65f0000 || // br <reg>
(Opcode & 0xfc000000) == 0x14000000) { // b <imm26>
return TRUE;
}
return FALSE;
}
inline ULONG detour_is_code_filler(PBYTE pbCode)
{
if (*(ULONG *)pbCode == 0xd503201f) { // nop.
return 4;
}
if (*(ULONG *)pbCode == 0x00000000) { // zero-filled padding.
return 4;
}
return 0;
}
#endif // DETOURS_ARM64
//////////////////////////////////////////////// Trampoline Memory Management.
//
struct DETOUR_REGION
{
ULONG dwSignature;
DETOUR_REGION * pNext; // Next region in list of regions.
DETOUR_TRAMPOLINE * pFree; // List of free trampolines in this region.
};
typedef DETOUR_REGION * PDETOUR_REGION;
const ULONG DETOUR_REGION_SIGNATURE = 'Rrtd';
const ULONG DETOUR_REGION_SIZE = 0x10000;
const ULONG DETOUR_TRAMPOLINES_PER_REGION = (DETOUR_REGION_SIZE
/ sizeof(DETOUR_TRAMPOLINE)) - 1;
static PDETOUR_REGION s_pRegions = NULL; // List of all regions.
static PDETOUR_REGION s_pRegion = NULL; // Default region.
static DWORD detour_writable_trampoline_regions()
{
// Mark all of the regions as writable.
for (PDETOUR_REGION pRegion = s_pRegions; pRegion != NULL; pRegion = pRegion->pNext) {
DWORD dwOld;
if (!VirtualProtect(pRegion, DETOUR_REGION_SIZE, PAGE_EXECUTE_READWRITE, &dwOld)) {
return GetLastError();
}
}
return NO_ERROR;
}
static void detour_runnable_trampoline_regions()
{
HANDLE hProcess = GetCurrentProcess();
// Mark all of the regions as executable.
for (PDETOUR_REGION pRegion = s_pRegions; pRegion != NULL; pRegion = pRegion->pNext) {
DWORD dwOld;
VirtualProtect(pRegion, DETOUR_REGION_SIZE, PAGE_EXECUTE_READ, &dwOld);
FlushInstructionCache(hProcess, pRegion, DETOUR_REGION_SIZE);
}
}
static PBYTE detour_alloc_round_down_to_region(PBYTE pbTry)
{
// WinXP64 returns free areas that aren't REGION aligned to 32-bit applications.
ULONG_PTR extra = ((ULONG_PTR)pbTry) & (DETOUR_REGION_SIZE - 1);
if (extra != 0) {
pbTry -= extra;
}
return pbTry;
}
static PBYTE detour_alloc_round_up_to_region(PBYTE pbTry)
{
// WinXP64 returns free areas that aren't REGION aligned to 32-bit applications.
ULONG_PTR extra = ((ULONG_PTR)pbTry) & (DETOUR_REGION_SIZE - 1);
if (extra != 0) {
ULONG_PTR adjust = DETOUR_REGION_SIZE - extra;
pbTry += adjust;
}
return pbTry;
}
// Starting at pbLo, try to allocate a memory region, continue until pbHi.
static PVOID detour_alloc_region_from_lo(PBYTE pbLo, PBYTE pbHi)
{
PBYTE pbTry = detour_alloc_round_up_to_region(pbLo);
DETOUR_TRACE((" Looking for free region in %p..%p from %p:\n", pbLo, pbHi, pbTry));
for (; pbTry < pbHi;) {
MEMORY_BASIC_INFORMATION mbi;
if (pbTry >= s_pSystemRegionLowerBound && pbTry <= s_pSystemRegionUpperBound) {
// Skip region reserved for system DLLs, but preserve address space entropy.
pbTry += 0x08000000;
continue;
}
ZeroMemory(&mbi, sizeof(mbi));
if (!VirtualQuery(pbTry, &mbi, sizeof(mbi))) {
break;
}
DETOUR_TRACE((" Try %p => %p..%p %6x\n",
pbTry,
mbi.BaseAddress,
(PBYTE)mbi.BaseAddress + mbi.RegionSize - 1,
mbi.State));
if (mbi.State == MEM_FREE && mbi.RegionSize >= DETOUR_REGION_SIZE) {
PVOID pv = VirtualAlloc(pbTry,
DETOUR_REGION_SIZE,
MEM_COMMIT|MEM_RESERVE,
PAGE_EXECUTE_READWRITE);
if (pv != NULL) {
return pv;
}
pbTry += DETOUR_REGION_SIZE;
}
else {
pbTry = detour_alloc_round_up_to_region((PBYTE)mbi.BaseAddress + mbi.RegionSize);
}
}
return NULL;
}
// Starting at pbHi, try to allocate a memory region, continue until pbLo.
static PVOID detour_alloc_region_from_hi(PBYTE pbLo, PBYTE pbHi)
{
PBYTE pbTry = detour_alloc_round_down_to_region(pbHi - DETOUR_REGION_SIZE);
DETOUR_TRACE((" Looking for free region in %p..%p from %p:\n", pbLo, pbHi, pbTry));
for (; pbTry > pbLo;) {
MEMORY_BASIC_INFORMATION mbi;
DETOUR_TRACE((" Try %p\n", pbTry));
if (pbTry >= s_pSystemRegionLowerBound && pbTry <= s_pSystemRegionUpperBound) {
// Skip region reserved for system DLLs, but preserve address space entropy.
pbTry -= 0x08000000;
continue;
}
ZeroMemory(&mbi, sizeof(mbi));
if (!VirtualQuery(pbTry, &mbi, sizeof(mbi))) {
break;
}
DETOUR_TRACE((" Try %p => %p..%p %6x\n",
pbTry,
mbi.BaseAddress,
(PBYTE)mbi.BaseAddress + mbi.RegionSize - 1,
mbi.State));
if (mbi.State == MEM_FREE && mbi.RegionSize >= DETOUR_REGION_SIZE) {
PVOID pv = VirtualAlloc(pbTry,
DETOUR_REGION_SIZE,
MEM_COMMIT|MEM_RESERVE,
PAGE_EXECUTE_READWRITE);
if (pv != NULL) {
return pv;
}
pbTry -= DETOUR_REGION_SIZE;
}
else {
pbTry = detour_alloc_round_down_to_region((PBYTE)mbi.AllocationBase
- DETOUR_REGION_SIZE);
}
}
return NULL;
}
static PVOID detour_alloc_trampoline_allocate_new(PBYTE pbTarget,
PDETOUR_TRAMPOLINE pLo,
PDETOUR_TRAMPOLINE pHi)
{
PVOID pbTry = NULL;
// NB: We must always also start the search at an offset from pbTarget
// in order to maintain ASLR entropy.
#if defined(DETOURS_64BIT)
// Try looking 1GB below or lower.
if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) {
pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget - 0x40000000);
}
// Try looking 1GB above or higher.
if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) {
pbTry = detour_alloc_region_from_lo(pbTarget + 0x40000000, (PBYTE)pHi);
}
// Try looking 1GB below or higher.
if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) {
pbTry = detour_alloc_region_from_lo(pbTarget - 0x40000000, pbTarget);
}
// Try looking 1GB above or lower.
if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) {
pbTry = detour_alloc_region_from_hi(pbTarget, pbTarget + 0x40000000);
}
#endif
// Try anything below.
if (pbTry == NULL) {
pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget);
}
// try anything above.
if (pbTry == NULL) {
pbTry = detour_alloc_region_from_lo(pbTarget, (PBYTE)pHi);
}
return pbTry;
}
PVOID WINAPI DetourAllocateRegionWithinJumpBounds(_In_ LPCVOID pbTarget,
_Out_ PDWORD pcbAllocatedSize)
{
PDETOUR_TRAMPOLINE pLo;
PDETOUR_TRAMPOLINE pHi;
detour_find_jmp_bounds((PBYTE)pbTarget, &pLo, &pHi);
PVOID pbNewlyAllocated =
detour_alloc_trampoline_allocate_new((PBYTE)pbTarget, pLo, pHi);
if (pbNewlyAllocated == NULL) {
DETOUR_TRACE(("Couldn't find available memory region!\n"));
*pcbAllocatedSize = 0;
return NULL;
}
*pcbAllocatedSize = DETOUR_REGION_SIZE;
return pbNewlyAllocated;
}
static PDETOUR_TRAMPOLINE detour_alloc_trampoline(PBYTE pbTarget)
{
// We have to place trampolines within +/- 2GB of target.
PDETOUR_TRAMPOLINE pLo;
PDETOUR_TRAMPOLINE pHi;
detour_find_jmp_bounds(pbTarget, &pLo, &pHi);
PDETOUR_TRAMPOLINE pTrampoline = NULL;
// Insure that there is a default region.
if (s_pRegion == NULL && s_pRegions != NULL) {
s_pRegion = s_pRegions;
}
// First check the default region for an valid free block.
if (s_pRegion != NULL && s_pRegion->pFree != NULL &&
s_pRegion->pFree >= pLo && s_pRegion->pFree <= pHi) {
found_region:
pTrampoline = s_pRegion->pFree;
// do a last sanity check on region.
if (pTrampoline < pLo || pTrampoline > pHi) {
return NULL;
}
s_pRegion->pFree = (PDETOUR_TRAMPOLINE)pTrampoline->pbRemain;
memset(pTrampoline, 0xcc, sizeof(*pTrampoline));
return pTrampoline;
}
// Then check the existing regions for a valid free block.
for (s_pRegion = s_pRegions; s_pRegion != NULL; s_pRegion = s_pRegion->pNext) {
if (s_pRegion != NULL && s_pRegion->pFree != NULL &&
s_pRegion->pFree >= pLo && s_pRegion->pFree <= pHi) {
goto found_region;
}
}
// We need to allocate a new region.
// Round pbTarget down to 64KB block.
pbTarget = pbTarget - (PtrToUlong(pbTarget) & 0xffff);
PVOID pbNewlyAllocated =
detour_alloc_trampoline_allocate_new(pbTarget, pLo, pHi);
if (pbNewlyAllocated != NULL) {
s_pRegion = (DETOUR_REGION*)pbNewlyAllocated;
s_pRegion->dwSignature = DETOUR_REGION_SIGNATURE;
s_pRegion->pFree = NULL;
s_pRegion->pNext = s_pRegions;
s_pRegions = s_pRegion;
DETOUR_TRACE((" Allocated region %p..%p\n\n",
s_pRegion, ((PBYTE)s_pRegion) + DETOUR_REGION_SIZE - 1));
// Put everything but the first trampoline on the free list.
PBYTE pFree = NULL;
pTrampoline = ((PDETOUR_TRAMPOLINE)s_pRegion) + 1;
for (int i = DETOUR_TRAMPOLINES_PER_REGION - 1; i > 1; i--) {
pTrampoline[i].pbRemain = pFree;
pFree = (PBYTE)&pTrampoline[i];
}
s_pRegion->pFree = (PDETOUR_TRAMPOLINE)pFree;
goto found_region;
}
DETOUR_TRACE(("Couldn't find available memory region!\n"));
return NULL;
}
static void detour_free_trampoline(PDETOUR_TRAMPOLINE pTrampoline)
{
PDETOUR_REGION pRegion = (PDETOUR_REGION)
((ULONG_PTR)pTrampoline & ~(ULONG_PTR)0xffff);
memset(pTrampoline, 0, sizeof(*pTrampoline));
pTrampoline->pbRemain = (PBYTE)pRegion->pFree;
pRegion->pFree = pTrampoline;
}
static BOOL detour_is_region_empty(PDETOUR_REGION pRegion)
{
// Stop if the region isn't a region (this would be bad).
if (pRegion->dwSignature != DETOUR_REGION_SIGNATURE) {
return FALSE;
}
PBYTE pbRegionBeg = (PBYTE)pRegion;
PBYTE pbRegionLim = pbRegionBeg + DETOUR_REGION_SIZE;
// Stop if any of the trampolines aren't free.
PDETOUR_TRAMPOLINE pTrampoline = ((PDETOUR_TRAMPOLINE)pRegion) + 1;
for (int i = 0; i < DETOUR_TRAMPOLINES_PER_REGION; i++) {
if (pTrampoline[i].pbRemain != NULL &&
(pTrampoline[i].pbRemain < pbRegionBeg ||
pTrampoline[i].pbRemain >= pbRegionLim)) {
return FALSE;
}
}
// OK, the region is empty.
return TRUE;
}
static void detour_free_unused_trampoline_regions()
{
PDETOUR_REGION *ppRegionBase = &s_pRegions;
PDETOUR_REGION pRegion = s_pRegions;
while (pRegion != NULL) {
if (detour_is_region_empty(pRegion)) {
*ppRegionBase = pRegion->pNext;
VirtualFree(pRegion, 0, MEM_RELEASE);
s_pRegion = NULL;
}
else {
ppRegionBase = &pRegion->pNext;
}
pRegion = *ppRegionBase;
}
}
///////////////////////////////////////////////////////// Transaction Structs.
//
struct DetourThread
{
DetourThread * pNext;
HANDLE hThread;
};
struct DetourOperation
{
DetourOperation * pNext;
BOOL fIsRemove;
PBYTE * ppbPointer;
PBYTE pbTarget;
PDETOUR_TRAMPOLINE pTrampoline;
ULONG dwPerm;
};
static BOOL s_fIgnoreTooSmall = FALSE;
static BOOL s_fRetainRegions = FALSE;
static LONG s_nPendingThreadId = 0; // Thread owning pending transaction.
static LONG s_nPendingError = NO_ERROR;
static PVOID * s_ppPendingError = NULL;
static DetourThread * s_pPendingThreads = NULL;
static DetourOperation * s_pPendingOperations = NULL;
//////////////////////////////////////////////////////////////////////////////
//
PVOID WINAPI DetourCodeFromPointer(_In_ PVOID pPointer,
_Out_opt_ PVOID *ppGlobals)
{
return detour_skip_jmp((PBYTE)pPointer, ppGlobals);
}
//////////////////////////////////////////////////////////// Transaction APIs.
//
BOOL WINAPI DetourSetIgnoreTooSmall(_In_ BOOL fIgnore)
{
BOOL fPrevious = s_fIgnoreTooSmall;
s_fIgnoreTooSmall = fIgnore;
return fPrevious;
}
BOOL WINAPI DetourSetRetainRegions(_In_ BOOL fRetain)
{
BOOL fPrevious = s_fRetainRegions;
s_fRetainRegions = fRetain;
return fPrevious;
}
PVOID WINAPI DetourSetSystemRegionLowerBound(_In_ PVOID pSystemRegionLowerBound)
{
PVOID pPrevious = s_pSystemRegionLowerBound;
s_pSystemRegionLowerBound = pSystemRegionLowerBound;
return pPrevious;
}
PVOID WINAPI DetourSetSystemRegionUpperBound(_In_ PVOID pSystemRegionUpperBound)
{
PVOID pPrevious = s_pSystemRegionUpperBound;
s_pSystemRegionUpperBound = pSystemRegionUpperBound;
return pPrevious;
}
LONG WINAPI DetourTransactionBegin()
{
// Only one transaction is allowed at a time.
_Benign_race_begin_
if (s_nPendingThreadId != 0) {
return ERROR_INVALID_OPERATION;
}
_Benign_race_end_
// Make sure only one thread can start a transaction.
if (InterlockedCompareExchange(&s_nPendingThreadId, (LONG)GetCurrentThreadId(), 0) != 0) {
return ERROR_INVALID_OPERATION;
}
s_pPendingOperations = NULL;
s_pPendingThreads = NULL;
s_ppPendingError = NULL;
// Make sure the trampoline pages are writable.
s_nPendingError = detour_writable_trampoline_regions();
return s_nPendingError;
}
LONG WINAPI DetourTransactionAbort()
{
if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
return ERROR_INVALID_OPERATION;
}
// Restore all of the page permissions.
for (DetourOperation *o = s_pPendingOperations; o != NULL;) {
// We don't care if this fails, because the code is still accessible.
DWORD dwOld;
VirtualProtect(o->pbTarget, o->pTrampoline->cbRestore,
o->dwPerm, &dwOld);
if (!o->fIsRemove) {
if (o->pTrampoline) {
detour_free_trampoline(o->pTrampoline);
o->pTrampoline = NULL;
}
}
DetourOperation *n = o->pNext;
delete o;
o = n;
}
s_pPendingOperations = NULL;
// Make sure the trampoline pages are no longer writable.
detour_runnable_trampoline_regions();
// Resume any suspended threads.
for (DetourThread *t = s_pPendingThreads; t != NULL;) {
// There is nothing we can do if this fails.
ResumeThread(t->hThread);
DetourThread *n = t->pNext;
delete t;
t = n;
}
s_pPendingThreads = NULL;
s_nPendingThreadId = 0;
return NO_ERROR;
}
LONG WINAPI DetourTransactionCommit()
{
return DetourTransactionCommitEx(NULL);
}
static BYTE detour_align_from_trampoline(PDETOUR_TRAMPOLINE pTrampoline, BYTE obTrampoline)
{
for (LONG n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
if (pTrampoline->rAlign[n].obTrampoline == obTrampoline) {
return pTrampoline->rAlign[n].obTarget;
}
}
return 0;
}
static LONG detour_align_from_target(PDETOUR_TRAMPOLINE pTrampoline, LONG obTarget)
{
for (LONG n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
if (pTrampoline->rAlign[n].obTarget == obTarget) {
return pTrampoline->rAlign[n].obTrampoline;
}
}
return 0;
}
LONG WINAPI DetourTransactionCommitEx(_Out_opt_ PVOID **pppFailedPointer)
{
if (pppFailedPointer != NULL) {
// Used to get the last error.
*pppFailedPointer = s_ppPendingError;
}
if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
return ERROR_INVALID_OPERATION;
}
// If any of the pending operations failed, then we abort the whole transaction.
if (s_nPendingError != NO_ERROR) {
DETOUR_BREAK();
DetourTransactionAbort();
return s_nPendingError;
}
// Common variables.
DetourOperation *o;
DetourThread *t;
BOOL freed = FALSE;
// Insert or remove each of the detours.
for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
if (o->fIsRemove) {
CopyMemory(o->pbTarget,
o->pTrampoline->rbRestore,
o->pTrampoline->cbRestore);
#ifdef DETOURS_IA64
*o->ppbPointer = (PBYTE)o->pTrampoline->ppldTarget;
#endif // DETOURS_IA64
#ifdef DETOURS_X86
*o->ppbPointer = o->pbTarget;
#endif // DETOURS_X86
#ifdef DETOURS_X64
*o->ppbPointer = o->pbTarget;
#endif // DETOURS_X64
#ifdef DETOURS_ARM
*o->ppbPointer = DETOURS_PBYTE_TO_PFUNC(o->pbTarget);
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
*o->ppbPointer = o->pbTarget;
#endif // DETOURS_ARM
}
else {
DETOUR_TRACE(("detours: pbTramp =%p, pbRemain=%p, pbDetour=%p, cbRestore=%d\n",
o->pTrampoline,
o->pTrampoline->pbRemain,
o->pTrampoline->pbDetour,
o->pTrampoline->cbRestore));
DETOUR_TRACE(("detours: pbTarget=%p: "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x [before]\n",
o->pbTarget,
o->pbTarget[0], o->pbTarget[1], o->pbTarget[2], o->pbTarget[3],
o->pbTarget[4], o->pbTarget[5], o->pbTarget[6], o->pbTarget[7],
o->pbTarget[8], o->pbTarget[9], o->pbTarget[10], o->pbTarget[11]));
#ifdef DETOURS_IA64
((DETOUR_IA64_BUNDLE*)o->pbTarget)
->SetBrl((UINT64)&o->pTrampoline->bAllocFrame);
*o->ppbPointer = (PBYTE)&o->pTrampoline->pldTrampoline;
#endif // DETOURS_IA64
#ifdef DETOURS_X64
detour_gen_jmp_indirect(o->pTrampoline->rbCodeIn, &o->pTrampoline->pbDetour);
PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, o->pTrampoline->rbCodeIn);
pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
*o->ppbPointer = o->pTrampoline->rbCode;
UNREFERENCED_PARAMETER(pbCode);
#endif // DETOURS_X64
#ifdef DETOURS_X86
PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, o->pTrampoline->pbDetour);
pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
*o->ppbPointer = o->pTrampoline->rbCode;
UNREFERENCED_PARAMETER(pbCode);
#endif // DETOURS_X86
#ifdef DETOURS_ARM
PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, NULL, o->pTrampoline->pbDetour);
pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
*o->ppbPointer = DETOURS_PBYTE_TO_PFUNC(o->pTrampoline->rbCode);
UNREFERENCED_PARAMETER(pbCode);
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
PBYTE pbCode = detour_gen_jmp_indirect(o->pbTarget, (ULONG64*)&(o->pTrampoline->pbDetour));
pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain);
*o->ppbPointer = o->pTrampoline->rbCode;
UNREFERENCED_PARAMETER(pbCode);
#endif // DETOURS_ARM64
DETOUR_TRACE(("detours: pbTarget=%p: "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x [after]\n",
o->pbTarget,
o->pbTarget[0], o->pbTarget[1], o->pbTarget[2], o->pbTarget[3],
o->pbTarget[4], o->pbTarget[5], o->pbTarget[6], o->pbTarget[7],
o->pbTarget[8], o->pbTarget[9], o->pbTarget[10], o->pbTarget[11]));
DETOUR_TRACE(("detours: pbTramp =%p: "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x\n",
o->pTrampoline,
o->pTrampoline->rbCode[0], o->pTrampoline->rbCode[1],
o->pTrampoline->rbCode[2], o->pTrampoline->rbCode[3],
o->pTrampoline->rbCode[4], o->pTrampoline->rbCode[5],
o->pTrampoline->rbCode[6], o->pTrampoline->rbCode[7],
o->pTrampoline->rbCode[8], o->pTrampoline->rbCode[9],
o->pTrampoline->rbCode[10], o->pTrampoline->rbCode[11]));
#ifdef DETOURS_IA64
DETOUR_TRACE(("\n"));
DETOUR_TRACE(("detours: &pldTrampoline =%p\n",
&o->pTrampoline->pldTrampoline));
DETOUR_TRACE(("detours: &bMovlTargetGp =%p [%p]\n",
&o->pTrampoline->bMovlTargetGp,
o->pTrampoline->bMovlTargetGp.GetMovlGp()));
DETOUR_TRACE(("detours: &rbCode =%p [%p]\n",
&o->pTrampoline->rbCode,
((DETOUR_IA64_BUNDLE&)o->pTrampoline->rbCode).GetBrlTarget()));
DETOUR_TRACE(("detours: &bBrlRemainEip =%p [%p]\n",
&o->pTrampoline->bBrlRemainEip,
o->pTrampoline->bBrlRemainEip.GetBrlTarget()));
DETOUR_TRACE(("detours: &bMovlDetourGp =%p [%p]\n",
&o->pTrampoline->bMovlDetourGp,
o->pTrampoline->bMovlDetourGp.GetMovlGp()));
DETOUR_TRACE(("detours: &bBrlDetourEip =%p [%p]\n",
&o->pTrampoline->bCallDetour,
o->pTrampoline->bCallDetour.GetBrlTarget()));
DETOUR_TRACE(("detours: pldDetour =%p [%p]\n",
o->pTrampoline->ppldDetour->EntryPoint,
o->pTrampoline->ppldDetour->GlobalPointer));
DETOUR_TRACE(("detours: pldTarget =%p [%p]\n",
o->pTrampoline->ppldTarget->EntryPoint,
o->pTrampoline->ppldTarget->GlobalPointer));
DETOUR_TRACE(("detours: pbRemain =%p\n",
o->pTrampoline->pbRemain));
DETOUR_TRACE(("detours: pbDetour =%p\n",
o->pTrampoline->pbDetour));
DETOUR_TRACE(("\n"));
#endif // DETOURS_IA64
}
}
// Update any suspended threads.
for (t = s_pPendingThreads; t != NULL; t = t->pNext) {
CONTEXT cxt;
cxt.ContextFlags = CONTEXT_CONTROL;
#undef DETOURS_EIP
#ifdef DETOURS_X86
#define DETOURS_EIP Eip
#endif // DETOURS_X86
#ifdef DETOURS_X64
#define DETOURS_EIP Rip
#endif // DETOURS_X64
#ifdef DETOURS_IA64
#define DETOURS_EIP StIIP
#endif // DETOURS_IA64
#ifdef DETOURS_ARM
#define DETOURS_EIP Pc
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
#define DETOURS_EIP Pc
#endif // DETOURS_ARM64
typedef ULONG_PTR DETOURS_EIP_TYPE;
if (GetThreadContext(t->hThread, &cxt)) {
for (o = s_pPendingOperations; o != NULL; o = o->pNext) {
if (o->fIsRemove) {
if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pTrampoline &&
cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pTrampoline
+ sizeof(o->pTrampoline))
) {
cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
((ULONG_PTR)o->pbTarget
+ detour_align_from_trampoline(o->pTrampoline,
(BYTE)(cxt.DETOURS_EIP
- (DETOURS_EIP_TYPE)(ULONG_PTR)
o->pTrampoline)));
SetThreadContext(t->hThread, &cxt);
}
}
else {
if (cxt.DETOURS_EIP >= (DETOURS_EIP_TYPE)(ULONG_PTR)o->pbTarget &&
cxt.DETOURS_EIP < (DETOURS_EIP_TYPE)((ULONG_PTR)o->pbTarget
+ o->pTrampoline->cbRestore)
) {
cxt.DETOURS_EIP = (DETOURS_EIP_TYPE)
((ULONG_PTR)o->pTrampoline
+ detour_align_from_target(o->pTrampoline,
(BYTE)(cxt.DETOURS_EIP
- (DETOURS_EIP_TYPE)(ULONG_PTR)
o->pbTarget)));
SetThreadContext(t->hThread, &cxt);
}
}
}
}
#undef DETOURS_EIP
}
// Restore all of the page permissions and flush the icache.
HANDLE hProcess = GetCurrentProcess();
for (o = s_pPendingOperations; o != NULL;) {
// We don't care if this fails, because the code is still accessible.
DWORD dwOld;
VirtualProtect(o->pbTarget, o->pTrampoline->cbRestore, o->dwPerm, &dwOld);
FlushInstructionCache(hProcess, o->pbTarget, o->pTrampoline->cbRestore);
if (o->fIsRemove && o->pTrampoline) {
detour_free_trampoline(o->pTrampoline);
o->pTrampoline = NULL;
freed = true;
}
DetourOperation *n = o->pNext;
delete o;
o = n;
}
s_pPendingOperations = NULL;
// Free any trampoline regions that are now unused.
if (freed && !s_fRetainRegions) {
detour_free_unused_trampoline_regions();
}
// Make sure the trampoline pages are no longer writable.
detour_runnable_trampoline_regions();
// Resume any suspended threads.
for (t = s_pPendingThreads; t != NULL;) {
// There is nothing we can do if this fails.
ResumeThread(t->hThread);
DetourThread *n = t->pNext;
delete t;
t = n;
}
s_pPendingThreads = NULL;
s_nPendingThreadId = 0;
if (pppFailedPointer != NULL) {
*pppFailedPointer = s_ppPendingError;
}
return s_nPendingError;
}
LONG WINAPI DetourUpdateThread(_In_ HANDLE hThread)
{
LONG error;
// If any of the pending operations failed, then we don't need to do this.
if (s_nPendingError != NO_ERROR) {
return s_nPendingError;
}
// Silently (and safely) drop any attempt to suspend our own thread.
if (hThread == GetCurrentThread()) {
return NO_ERROR;
}
DetourThread *t = new NOTHROW DetourThread;
if (t == NULL) {
error = ERROR_NOT_ENOUGH_MEMORY;
fail:
if (t != NULL) {
delete t;
t = NULL;
}
s_nPendingError = error;
s_ppPendingError = NULL;
DETOUR_BREAK();
return error;
}
if (SuspendThread(hThread) == (DWORD)-1) {
error = GetLastError();
DETOUR_BREAK();
goto fail;
}
t->hThread = hThread;
t->pNext = s_pPendingThreads;
s_pPendingThreads = t;
return NO_ERROR;
}
///////////////////////////////////////////////////////////// Transacted APIs.
//
LONG WINAPI DetourAttach(_Inout_ PVOID *ppPointer,
_In_ PVOID pDetour)
{
return DetourAttachEx(ppPointer, pDetour, NULL, NULL, NULL);
}
LONG WINAPI DetourAttachEx(_Inout_ PVOID *ppPointer,
_In_ PVOID pDetour,
_Out_opt_ PDETOUR_TRAMPOLINE *ppRealTrampoline,
_Out_opt_ PVOID *ppRealTarget,
_Out_opt_ PVOID *ppRealDetour)
{
LONG error = NO_ERROR;
if (ppRealTrampoline != NULL) {
*ppRealTrampoline = NULL;
}
if (ppRealTarget != NULL) {
*ppRealTarget = NULL;
}
if (ppRealDetour != NULL) {
*ppRealDetour = NULL;
}
if (pDetour == NULL) {
DETOUR_TRACE(("empty detour\n"));
return ERROR_INVALID_PARAMETER;
}
if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
DETOUR_TRACE(("transaction conflict with thread id=%d\n", s_nPendingThreadId));
return ERROR_INVALID_OPERATION;
}
// If any of the pending operations failed, then we don't need to do this.
if (s_nPendingError != NO_ERROR) {
DETOUR_TRACE(("pending transaction error=%d\n", s_nPendingError));
return s_nPendingError;
}
if (ppPointer == NULL) {
DETOUR_TRACE(("ppPointer is null\n"));
return ERROR_INVALID_HANDLE;
}
if (*ppPointer == NULL) {
error = ERROR_INVALID_HANDLE;
s_nPendingError = error;
s_ppPendingError = ppPointer;
DETOUR_TRACE(("*ppPointer is null (ppPointer=%p)\n", ppPointer));
DETOUR_BREAK();
return error;
}
PBYTE pbTarget = (PBYTE)*ppPointer;
PDETOUR_TRAMPOLINE pTrampoline = NULL;
DetourOperation *o = NULL;
#ifdef DETOURS_IA64
PPLABEL_DESCRIPTOR ppldDetour = (PPLABEL_DESCRIPTOR)pDetour;
PPLABEL_DESCRIPTOR ppldTarget = (PPLABEL_DESCRIPTOR)pbTarget;
PVOID pDetourGlobals = NULL;
PVOID pTargetGlobals = NULL;
pDetour = (PBYTE)DetourCodeFromPointer(ppldDetour, &pDetourGlobals);
pbTarget = (PBYTE)DetourCodeFromPointer(ppldTarget, &pTargetGlobals);
DETOUR_TRACE((" ppldDetour=%p, code=%p [gp=%p]\n",
ppldDetour, pDetour, pDetourGlobals));
DETOUR_TRACE((" ppldTarget=%p, code=%p [gp=%p]\n",
ppldTarget, pbTarget, pTargetGlobals));
#else // DETOURS_IA64
pbTarget = (PBYTE)DetourCodeFromPointer(pbTarget, NULL);
pDetour = DetourCodeFromPointer(pDetour, NULL);
#endif // !DETOURS_IA64
// Don't follow a jump if its destination is the target function.
// This happens when the detour does nothing other than call the target.
if (pDetour == (PVOID)pbTarget) {
if (s_fIgnoreTooSmall) {
goto stop;
}
else {
DETOUR_BREAK();
goto fail;
}
}
if (ppRealTarget != NULL) {
*ppRealTarget = pbTarget;
}
if (ppRealDetour != NULL) {
*ppRealDetour = pDetour;
}
o = new NOTHROW DetourOperation;
if (o == NULL) {
error = ERROR_NOT_ENOUGH_MEMORY;
fail:
s_nPendingError = error;
DETOUR_BREAK();
stop:
if (pTrampoline != NULL) {
detour_free_trampoline(pTrampoline);
pTrampoline = NULL;
if (ppRealTrampoline != NULL) {
*ppRealTrampoline = NULL;
}
}
if (o != NULL) {
delete o;
o = NULL;
}
s_ppPendingError = ppPointer;
return error;
}
pTrampoline = detour_alloc_trampoline(pbTarget);
if (pTrampoline == NULL) {
error = ERROR_NOT_ENOUGH_MEMORY;
DETOUR_BREAK();
goto fail;
}
if (ppRealTrampoline != NULL) {
*ppRealTrampoline = pTrampoline;
}
DETOUR_TRACE(("detours: pbTramp=%p, pDetour=%p\n", pTrampoline, pDetour));
memset(pTrampoline->rAlign, 0, sizeof(pTrampoline->rAlign));
// Determine the number of movable target instructions.
PBYTE pbSrc = pbTarget;
PBYTE pbTrampoline = pTrampoline->rbCode;
#ifdef DETOURS_IA64
PBYTE pbPool = (PBYTE)(&pTrampoline->bBranchIslands + 1);
#else
PBYTE pbPool = pbTrampoline + sizeof(pTrampoline->rbCode);
#endif
ULONG cbTarget = 0;
ULONG cbJump = SIZE_OF_JMP;
ULONG nAlign = 0;
#ifdef DETOURS_ARM
// On ARM, we need an extra instruction when the function isn't 32-bit aligned.
// Check if the existing code is another detour (or at least a similar
// "ldr pc, [PC+0]" jump.
if ((ULONG)pbTarget & 2) {
cbJump += 2;
ULONG op = fetch_thumb_opcode(pbSrc);
if (op == 0xbf00) {
op = fetch_thumb_opcode(pbSrc + 2);
if (op == 0xf8dff000) { // LDR PC,[PC]
*((PUSHORT&)pbTrampoline)++ = *((PUSHORT&)pbSrc)++;
*((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
*((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
cbTarget = (LONG)(pbSrc - pbTarget);
// We will fall through the "while" because cbTarget is now >= cbJump.
}
}
}
else {
ULONG op = fetch_thumb_opcode(pbSrc);
if (op == 0xf8dff000) { // LDR PC,[PC]
*((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
*((PULONG&)pbTrampoline)++ = *((PULONG&)pbSrc)++;
cbTarget = (LONG)(pbSrc - pbTarget);
// We will fall through the "while" because cbTarget is now >= cbJump.
}
}
#endif
while (cbTarget < cbJump) {
PBYTE pbOp = pbSrc;
LONG lExtra = 0;
DETOUR_TRACE((" DetourCopyInstruction(%p,%p)\n",
pbTrampoline, pbSrc));
pbSrc = (PBYTE)
DetourCopyInstruction(pbTrampoline, (PVOID*)&pbPool, pbSrc, NULL, &lExtra);
DETOUR_TRACE((" DetourCopyInstruction() = %p (%d bytes)\n",
pbSrc, (int)(pbSrc - pbOp)));
pbTrampoline += (pbSrc - pbOp) + lExtra;
cbTarget = (LONG)(pbSrc - pbTarget);
pTrampoline->rAlign[nAlign].obTarget = cbTarget;
pTrampoline->rAlign[nAlign].obTrampoline = pbTrampoline - pTrampoline->rbCode;
nAlign++;
if (nAlign >= ARRAYSIZE(pTrampoline->rAlign)) {
break;
}
if (detour_does_code_end_function(pbOp)) {
break;
}
}
// Consume, but don't duplicate padding if it is needed and available.
while (cbTarget < cbJump) {
LONG cFiller = detour_is_code_filler(pbSrc);
if (cFiller == 0) {
break;
}
pbSrc += cFiller;
cbTarget = (LONG)(pbSrc - pbTarget);
}
#if DETOUR_DEBUG
{
DETOUR_TRACE((" detours: rAlign ["));
LONG n = 0;
for (n = 0; n < ARRAYSIZE(pTrampoline->rAlign); n++) {
if (pTrampoline->rAlign[n].obTarget == 0 &&
pTrampoline->rAlign[n].obTrampoline == 0) {
break;
}
DETOUR_TRACE((" %d/%d",
pTrampoline->rAlign[n].obTarget,
pTrampoline->rAlign[n].obTrampoline
));
}
DETOUR_TRACE((" ]\n"));
}
#endif
if (cbTarget < cbJump || nAlign > ARRAYSIZE(pTrampoline->rAlign)) {
// Too few instructions.
error = ERROR_INVALID_BLOCK;
if (s_fIgnoreTooSmall) {
goto stop;
}
else {
DETOUR_BREAK();
goto fail;
}
}
if (pbTrampoline > pbPool) {
__debugbreak();
}
pTrampoline->cbCode = (BYTE)(pbTrampoline - pTrampoline->rbCode);
pTrampoline->cbRestore = (BYTE)cbTarget;
CopyMemory(pTrampoline->rbRestore, pbTarget, cbTarget);
#if !defined(DETOURS_IA64)
if (cbTarget > sizeof(pTrampoline->rbCode) - cbJump) {
// Too many instructions.
error = ERROR_INVALID_HANDLE;
DETOUR_BREAK();
goto fail;
}
#endif // !DETOURS_IA64
pTrampoline->pbRemain = pbTarget + cbTarget;
pTrampoline->pbDetour = (PBYTE)pDetour;
#ifdef DETOURS_IA64
pTrampoline->ppldDetour = ppldDetour;
pTrampoline->ppldTarget = ppldTarget;
pTrampoline->pldTrampoline.EntryPoint = (UINT64)&pTrampoline->bMovlTargetGp;
pTrampoline->pldTrampoline.GlobalPointer = (UINT64)pDetourGlobals;
((DETOUR_IA64_BUNDLE *)pTrampoline->rbCode)->SetStop();
pTrampoline->bMovlTargetGp.SetMovlGp((UINT64)pTargetGlobals);
pTrampoline->bBrlRemainEip.SetBrl((UINT64)pTrampoline->pbRemain);
// Alloc frame: alloc r41=ar.pfs,11,0,8,0; mov r40=rp
pTrampoline->bAllocFrame.wide[0] = 0x00000580164d480c;
pTrampoline->bAllocFrame.wide[1] = 0x00c4000500000200;
// save r36, r37, r38.
pTrampoline->bSave37to39.wide[0] = 0x031021004e019001;
pTrampoline->bSave37to39.wide[1] = 0x8401280600420098;
// save r34,r35,r36: adds r47=0,r36; adds r46=0,r35; adds r45=0,r34
pTrampoline->bSave34to36.wide[0] = 0x02e0210048017800;
pTrampoline->bSave34to36.wide[1] = 0x84011005a042008c;
// save gp,r32,r33" adds r44=0,r33; adds r43=0,r32; adds r42=0,gp ;;
pTrampoline->bSaveGPto33.wide[0] = 0x02b0210042016001;
pTrampoline->bSaveGPto33.wide[1] = 0x8400080540420080;
// set detour GP.
pTrampoline->bMovlDetourGp.SetMovlGp((UINT64)pDetourGlobals);
// call detour: brl.call.sptk.few rp=detour ;;
pTrampoline->bCallDetour.wide[0] = 0x0000000100000005;
pTrampoline->bCallDetour.wide[1] = 0xd000001000000000;
pTrampoline->bCallDetour.SetBrlTarget((UINT64)pDetour);
// pop frame & gp: adds gp=0,r42; mov rp=r40,+0;; mov.i ar.pfs=r41
pTrampoline->bPopFrameGp.wide[0] = 0x4000210054000802;
pTrampoline->bPopFrameGp.wide[1] = 0x00aa029000038005;
// return to caller: br.ret.sptk.many rp ;;
pTrampoline->bReturn.wide[0] = 0x0000000100000019;
pTrampoline->bReturn.wide[1] = 0x0084000880000200;
DETOUR_TRACE(("detours: &bMovlTargetGp=%p\n", &pTrampoline->bMovlTargetGp));
DETOUR_TRACE(("detours: &bMovlDetourGp=%p\n", &pTrampoline->bMovlDetourGp));
#endif // DETOURS_IA64
pbTrampoline = pTrampoline->rbCode + pTrampoline->cbCode;
#ifdef DETOURS_X64
pbTrampoline = detour_gen_jmp_indirect(pbTrampoline, &pTrampoline->pbRemain);
pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
#endif // DETOURS_X64
#ifdef DETOURS_X86
pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, pTrampoline->pbRemain);
pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
#endif // DETOURS_X86
#ifdef DETOURS_ARM
pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, &pbPool, pTrampoline->pbRemain);
pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
#endif // DETOURS_ARM
#ifdef DETOURS_ARM64
pbTrampoline = detour_gen_jmp_immediate(pbTrampoline, &pbPool, pTrampoline->pbRemain);
pbTrampoline = detour_gen_brk(pbTrampoline, pbPool);
#endif // DETOURS_ARM64
(void)pbTrampoline;
DWORD dwOld = 0;
if (!VirtualProtect(pbTarget, cbTarget, PAGE_EXECUTE_READWRITE, &dwOld)) {
error = GetLastError();
DETOUR_BREAK();
goto fail;
}
DETOUR_TRACE(("detours: pbTarget=%p: "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x\n",
pbTarget,
pbTarget[0], pbTarget[1], pbTarget[2], pbTarget[3],
pbTarget[4], pbTarget[5], pbTarget[6], pbTarget[7],
pbTarget[8], pbTarget[9], pbTarget[10], pbTarget[11]));
DETOUR_TRACE(("detours: pbTramp =%p: "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x "
"%02x %02x %02x %02x\n",
pTrampoline,
pTrampoline->rbCode[0], pTrampoline->rbCode[1],
pTrampoline->rbCode[2], pTrampoline->rbCode[3],
pTrampoline->rbCode[4], pTrampoline->rbCode[5],
pTrampoline->rbCode[6], pTrampoline->rbCode[7],
pTrampoline->rbCode[8], pTrampoline->rbCode[9],
pTrampoline->rbCode[10], pTrampoline->rbCode[11]));
o->fIsRemove = FALSE;
o->ppbPointer = (PBYTE*)ppPointer;
o->pTrampoline = pTrampoline;
o->pbTarget = pbTarget;
o->dwPerm = dwOld;
o->pNext = s_pPendingOperations;
s_pPendingOperations = o;
return NO_ERROR;
}
LONG WINAPI DetourDetach(_Inout_ PVOID *ppPointer,
_In_ PVOID pDetour)
{
LONG error = NO_ERROR;
if (s_nPendingThreadId != (LONG)GetCurrentThreadId()) {
return ERROR_INVALID_OPERATION;
}
// If any of the pending operations failed, then we don't need to do this.
if (s_nPendingError != NO_ERROR) {
return s_nPendingError;
}
if (pDetour == NULL) {
return ERROR_INVALID_PARAMETER;
}
if (ppPointer == NULL) {
return ERROR_INVALID_HANDLE;
}
if (*ppPointer == NULL) {
error = ERROR_INVALID_HANDLE;
s_nPendingError = error;
s_ppPendingError = ppPointer;
DETOUR_BREAK();
return error;
}
DetourOperation *o = new NOTHROW DetourOperation;
if (o == NULL) {
error = ERROR_NOT_ENOUGH_MEMORY;
fail:
s_nPendingError = error;
DETOUR_BREAK();
stop:
if (o != NULL) {
delete o;
o = NULL;
}
s_ppPendingError = ppPointer;
return error;
}
#ifdef DETOURS_IA64
PPLABEL_DESCRIPTOR ppldTrampo = (PPLABEL_DESCRIPTOR)*ppPointer;
PPLABEL_DESCRIPTOR ppldDetour = (PPLABEL_DESCRIPTOR)pDetour;
PVOID pDetourGlobals = NULL;
PVOID pTrampoGlobals = NULL;
pDetour = (PBYTE)DetourCodeFromPointer(ppldDetour, &pDetourGlobals);
PDETOUR_TRAMPOLINE pTrampoline = (PDETOUR_TRAMPOLINE)
DetourCodeFromPointer(ppldTrampo, &pTrampoGlobals);
DETOUR_TRACE((" ppldDetour=%p, code=%p [gp=%p]\n",
ppldDetour, pDetour, pDetourGlobals));
DETOUR_TRACE((" ppldTrampo=%p, code=%p [gp=%p]\n",
ppldTrampo, pTrampoline, pTrampoGlobals));
DETOUR_TRACE(("\n"));
DETOUR_TRACE(("detours: &pldTrampoline =%p\n",
&pTrampoline->pldTrampoline));
DETOUR_TRACE(("detours: &bMovlTargetGp =%p [%p]\n",
&pTrampoline->bMovlTargetGp,
pTrampoline->bMovlTargetGp.GetMovlGp()));
DETOUR_TRACE(("detours: &rbCode =%p [%p]\n",
&pTrampoline->rbCode,
((DETOUR_IA64_BUNDLE&)pTrampoline->rbCode).GetBrlTarget()));
DETOUR_TRACE(("detours: &bBrlRemainEip =%p [%p]\n",
&pTrampoline->bBrlRemainEip,
pTrampoline->bBrlRemainEip.GetBrlTarget()));
DETOUR_TRACE(("detours: &bMovlDetourGp =%p [%p]\n",
&pTrampoline->bMovlDetourGp,
pTrampoline->bMovlDetourGp.GetMovlGp()));
DETOUR_TRACE(("detours: &bBrlDetourEip =%p [%p]\n",
&pTrampoline->bCallDetour,
pTrampoline->bCallDetour.GetBrlTarget()));
DETOUR_TRACE(("detours: pldDetour =%p [%p]\n",
pTrampoline->ppldDetour->EntryPoint,
pTrampoline->ppldDetour->GlobalPointer));
DETOUR_TRACE(("detours: pldTarget =%p [%p]\n",
pTrampoline->ppldTarget->EntryPoint,
pTrampoline->ppldTarget->GlobalPointer));
DETOUR_TRACE(("detours: pbRemain =%p\n",
pTrampoline->pbRemain));
DETOUR_TRACE(("detours: pbDetour =%p\n",
pTrampoline->pbDetour));
DETOUR_TRACE(("\n"));
#else // !DETOURS_IA64
PDETOUR_TRAMPOLINE pTrampoline =
(PDETOUR_TRAMPOLINE)DetourCodeFromPointer(*ppPointer, NULL);
pDetour = DetourCodeFromPointer(pDetour, NULL);
#endif // !DETOURS_IA64
////////////////////////////////////// Verify that Trampoline is in place.
//
LONG cbTarget = pTrampoline->cbRestore;
PBYTE pbTarget = pTrampoline->pbRemain - cbTarget;
if (cbTarget == 0 || cbTarget > sizeof(pTrampoline->rbCode)) {
error = ERROR_INVALID_BLOCK;
if (s_fIgnoreTooSmall) {
goto stop;
}
else {
DETOUR_BREAK();
goto fail;
}
}
if (pTrampoline->pbDetour != pDetour) {
error = ERROR_INVALID_BLOCK;
if (s_fIgnoreTooSmall) {
goto stop;
}
else {
DETOUR_BREAK();
goto fail;
}
}
DWORD dwOld = 0;
if (!VirtualProtect(pbTarget, cbTarget,
PAGE_EXECUTE_READWRITE, &dwOld)) {
error = GetLastError();
DETOUR_BREAK();
goto fail;
}
o->fIsRemove = TRUE;
o->ppbPointer = (PBYTE*)ppPointer;
o->pTrampoline = pTrampoline;
o->pbTarget = pbTarget;
o->dwPerm = dwOld;
o->pNext = s_pPendingOperations;
s_pPendingOperations = o;
return NO_ERROR;
}
//////////////////////////////////////////////////////////////////////////////
//
// Helpers for manipulating page protection.
//
// For reference:
// PAGE_NOACCESS 0x01
// PAGE_READONLY 0x02
// PAGE_READWRITE 0x04
// PAGE_WRITECOPY 0x08
// PAGE_EXECUTE 0x10
// PAGE_EXECUTE_READ 0x20
// PAGE_EXECUTE_READWRITE 0x40
// PAGE_EXECUTE_WRITECOPY 0x80
// PAGE_GUARD ...
// PAGE_NOCACHE ...
// PAGE_WRITECOMBINE ...
#define DETOUR_PAGE_EXECUTE_ALL (PAGE_EXECUTE | \
PAGE_EXECUTE_READ | \
PAGE_EXECUTE_READWRITE | \
PAGE_EXECUTE_WRITECOPY)
#define DETOUR_PAGE_NO_EXECUTE_ALL (PAGE_NOACCESS | \
PAGE_READONLY | \
PAGE_READWRITE | \
PAGE_WRITECOPY)
#define DETOUR_PAGE_ATTRIBUTES (~(DETOUR_PAGE_EXECUTE_ALL | DETOUR_PAGE_NO_EXECUTE_ALL))
C_ASSERT((DETOUR_PAGE_NO_EXECUTE_ALL << 4) == DETOUR_PAGE_EXECUTE_ALL);
static DWORD DetourPageProtectAdjustExecute(_In_ DWORD dwOldProtect,
_In_ DWORD dwNewProtect)
// Copy EXECUTE from dwOldProtect to dwNewProtect.
{
bool const fOldExecute = ((dwOldProtect & DETOUR_PAGE_EXECUTE_ALL) != 0);
bool const fNewExecute = ((dwNewProtect & DETOUR_PAGE_EXECUTE_ALL) != 0);
if (fOldExecute && !fNewExecute) {
dwNewProtect = ((dwNewProtect & DETOUR_PAGE_NO_EXECUTE_ALL) << 4)
| (dwNewProtect & DETOUR_PAGE_ATTRIBUTES);
}
else if (!fOldExecute && fNewExecute) {
dwNewProtect = ((dwNewProtect & DETOUR_PAGE_EXECUTE_ALL) >> 4)
| (dwNewProtect & DETOUR_PAGE_ATTRIBUTES);
}
return dwNewProtect;
}
_Success_(return != FALSE)
BOOL WINAPI DetourVirtualProtectSameExecuteEx(_In_ HANDLE hProcess,
_In_ PVOID pAddress,
_In_ SIZE_T nSize,
_In_ DWORD dwNewProtect,
_Out_ PDWORD pdwOldProtect)
// Some systems do not allow executability of a page to change. This function applies
// dwNewProtect to [pAddress, nSize), but preserving the previous executability.
// This function is meant to be a drop-in replacement for some uses of VirtualProtectEx.
// When "restoring" page protection, there is no need to use this function.
{
MEMORY_BASIC_INFORMATION mbi;
// Query to get existing execute access.
ZeroMemory(&mbi, sizeof(mbi));
if (VirtualQueryEx(hProcess, pAddress, &mbi, sizeof(mbi)) == 0) {
return FALSE;
}
return VirtualProtectEx(hProcess, pAddress, nSize,
DetourPageProtectAdjustExecute(mbi.Protect, dwNewProtect),
pdwOldProtect);
}
_Success_(return != FALSE)
BOOL WINAPI DetourVirtualProtectSameExecute(_In_ PVOID pAddress,
_In_ SIZE_T nSize,
_In_ DWORD dwNewProtect,
_Out_ PDWORD pdwOldProtect)
{
return DetourVirtualProtectSameExecuteEx(GetCurrentProcess(),
pAddress, nSize, dwNewProtect, pdwOldProtect);
}
// End of File