diff --git a/include/elfhacks.h b/include/elfhacks.h new file mode 100644 index 00000000..d1be8e2c --- /dev/null +++ b/include/elfhacks.h @@ -0,0 +1,198 @@ +/** + * \file src/elfhacks.h + * \brief elfhacks application interface + * \author Pyry Haulos + * \date 2007-2008 + */ + +/* elfhacks.h -- Various ELF run-time hacks + version 0.4.1, March 9th, 2008 + + Copyright (C) 2007-2008 Pyry Haulos + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Pyry Haulos +*/ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define __PUBLIC __attribute__ ((visibility ("default"))) + +#ifdef __x86_64__ +# define __elf64 +#endif +#ifdef __i386__ +# define __elf32 +#endif + +#ifdef __elf64 +# define ELFW_R_SYM ELF64_R_SYM +# define ElfW_Sword Elf64_Sxword +#else +# ifdef __elf32 +# define ELFW_R_SYM ELF32_R_SYM +# define ElfW_Sword Elf32_Sword +# else +# error neither __elf32 nor __elf64 is defined +# endif +#endif + +/** + * \defgroup elfhacks elfhacks + * Elfhacks is a collection of functions that aim for retvieving + * or modifying progam's dynamic linking information at run-time. + * \{ + */ + +/** + * \brief elfhacks program object + */ +typedef struct { + /** file name */ + const char *name; + /** base address in memory */ + ElfW(Addr) addr; + /** program headers */ + const ElfW(Phdr) *phdr; + /** number of program headers */ + ElfW(Half) phnum; + /** .dynamic */ + ElfW(Dyn) *dynamic; + /** .symtab */ + ElfW(Sym) *symtab; + /** .strtab */ + const char *strtab; + /** symbol hash table (DT_HASH) */ + ElfW(Word) *hash; + /** symbol hash table (DT_GNU_HASH) */ + Elf32_Word *gnu_hash; +} eh_obj_t; + +/** + * \brief elfhacks symbol + */ +typedef struct { + /** symbol name */ + const char *name; + /** corresponding ElfW(Sym) */ + ElfW(Sym) *sym; + /** elfhacks object this symbol is associated to */ + eh_obj_t *obj; +} eh_sym_t; + +/** + * \brief elfhacks relocation + */ +typedef struct { + /** symbol this relocation is associated to */ + eh_sym_t *sym; + /** corresponding ElfW(Rel) (NULL if this is Rela) */ + ElfW(Rel) *rel; + /** corresponding ElfW(Rela) (NULL if this is Rel) */ + ElfW(Rela) *rela; + /** elfhacks program object */ + eh_obj_t *obj; +} eh_rel_t; + +/** + * \brief Iterate objects callback + */ +typedef int (*eh_iterate_obj_callback_func)(eh_obj_t *obj, void *arg); +/** + * \brief Iterate symbols callback + */ +typedef int (*eh_iterate_sym_callback_func)(eh_sym_t *sym, void *arg); +/** + * \brief Iterate relocations callback + */ +typedef int (*eh_iterate_rel_callback_func)(eh_rel_t *rel, void *arg); + +/** + * \brief Initializes eh_obj_t for given soname + * + * Matching is done using fnmatch() so wildcards and other standard + * filename metacharacters and expressions work. + * + * If soname is NULL, this function returns the main program object. + * \param obj elfhacks object + * \param soname object's soname (see /proc/pid/maps) or NULL for main + * \return 0 on success otherwise a positive error code +*/ +__PUBLIC int eh_find_obj(eh_obj_t *obj, const char *soname); + +/** + * \brief Walk through list of objects + * \param callback callback function + * \param arg argument passed to callback function + * \return 0 on success otherwise an error code + */ +__PUBLIC int eh_iterate_obj(eh_iterate_obj_callback_func callback, void *arg); + +/** + * \brief Finds symbol in object's .dynsym and retrvieves its value. + * \param obj elfhacks program object + * \param name symbol to find + * \param to returned value + * \return 0 on success otherwise a positive error code +*/ +__PUBLIC int eh_find_sym(eh_obj_t *obj, const char *name, void **to); + +/** + * \brief Walk through list of symbols in object + * \param obj elfhacks program object + * \param callback callback function + * \param arg argument passed to callback function + * \return 0 on success otherwise an error code + */ +__PUBLIC int eh_iterate_sym(eh_obj_t *obj, eh_iterate_sym_callback_func callback, void *arg); + +/** + * \brief Iterates through object's .rel.plt and .rela.plt and sets every + * occurrence of some symbol to the specified value. + * \param obj elfhacks program object + * \param sym symbol to replace + * \param val new value + * \return 0 on success otherwise a positive error code +*/ +__PUBLIC int eh_set_rel(eh_obj_t *obj, const char *sym, void *val); + +/** + * \brief Walk through object's .rel.plt and .rela.plt + * \param obj elfhacks program object + * \param callback callback function + * \param arg argument passed to callback function + */ +__PUBLIC int eh_iterate_rel(eh_obj_t *obj, eh_iterate_rel_callback_func callback, void *arg); + +/** + * \brief Destroy eh_obj_t object. + * \param obj elfhacks program object + * \return 0 on success otherwise a positive error code +*/ +__PUBLIC int eh_destroy_obj(eh_obj_t *obj); + +/** \} */ + +#ifdef __cplusplus +} +#endif diff --git a/src/elfhacks.cpp b/src/elfhacks.cpp new file mode 100644 index 00000000..48ebfe6d --- /dev/null +++ b/src/elfhacks.cpp @@ -0,0 +1,606 @@ +/** + * \file src/elfhacks.c + * \brief various ELF run-time hacks + * \author Pyry Haulos + * \date 2007-2008 + * For conditions of distribution and use, see copyright notice in elfhacks.h + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include "elfhacks.h" + +/** + * \addtogroup elfhacks + * \{ + */ + +struct eh_iterate_callback_args { + eh_iterate_obj_callback_func callback; + void *arg; +}; + +int eh_check_addr(eh_obj_t *obj, const void *addr); +int eh_find_callback(struct dl_phdr_info *info, size_t size, void *argptr); +int eh_find_next_dyn(eh_obj_t *obj, ElfW_Sword tag, int i, ElfW(Dyn) **next); +int eh_init_obj(eh_obj_t *obj); + +int eh_set_rela_plt(eh_obj_t *obj, int p, const char *sym, void *val); +int eh_set_rel_plt(eh_obj_t *obj, int p, const char *sym, void *val); + +int eh_iterate_rela_plt(eh_obj_t *obj, int p, eh_iterate_rel_callback_func callback, void *arg); +int eh_iterate_rel_plt(eh_obj_t *obj, int p, eh_iterate_rel_callback_func callback, void *arg); + +int eh_find_sym_hash(eh_obj_t *obj, const char *name, eh_sym_t *sym); +int eh_find_sym_gnu_hash(eh_obj_t *obj, const char *name, eh_sym_t *sym); + +ElfW(Word) eh_hash_elf(const char *name); +Elf32_Word eh_hash_gnu(const char *name); + +int eh_find_callback(struct dl_phdr_info *info, size_t size, void *argptr) +{ + eh_obj_t *find = (eh_obj_t *) argptr; + + if (find->name == NULL) { + if (strcmp(info->dlpi_name, "")) + return 0; + } else if (fnmatch(find->name, info->dlpi_name, 0)) + return 0; + + if (find->name == NULL) /* TODO readlink? */ + find->name = "/proc/self/exe"; + else + find->name = info->dlpi_name; + find->addr = info->dlpi_addr; + + /* segment headers */ + find->phdr = info->dlpi_phdr; + find->phnum = info->dlpi_phnum; + + return 0; +} + +int eh_iterate_callback(struct dl_phdr_info *info, size_t size, void *argptr) +{ + struct eh_iterate_callback_args *args = (eh_iterate_callback_args *)argptr; + eh_obj_t obj; + int ret = 0; + + /* eh_init_obj needs phdr and phnum */ + obj.phdr = info->dlpi_phdr; + obj.phnum = info->dlpi_phnum; + obj.addr = info->dlpi_addr; + obj.name = info->dlpi_name; + + if ((ret = eh_init_obj(&obj))) { + if (ret == ENOTSUP) /* just skip */ + return 0; + return ret; + } + + if ((ret = args->callback(&obj, args->arg))) + return ret; + + if ((ret = eh_destroy_obj(&obj))) + return ret; + + return 0; +} + +int eh_iterate_obj(eh_iterate_obj_callback_func callback, void *arg) +{ + int ret; + struct eh_iterate_callback_args args; + + args.callback = callback; + args.arg = arg; + + if ((ret = dl_iterate_phdr(eh_iterate_callback, &args))) + return ret; + + return 0; +} + +int eh_find_obj(eh_obj_t *obj, const char *soname) +{ + /* This function uses glibc-specific dl_iterate_phdr(). + Another way could be parsing /proc/self/exe or using + pmap() on Solaris or *BSD */ + obj->phdr = NULL; + obj->name = soname; + dl_iterate_phdr(eh_find_callback, obj); + + if (!obj->phdr) + return EAGAIN; + + return eh_init_obj(obj); +} + +int eh_check_addr(eh_obj_t *obj, const void *addr) +{ + /* + Check that given address is inside program's + memory maps. PT_LOAD program headers tell us + where program has been loaded into. + */ + int p; + for (p = 0; p < obj->phnum; p++) { + if (obj->phdr[p].p_type == PT_LOAD) { + if (((ElfW(Addr)) addr < obj->phdr[p].p_memsz + obj->phdr[p].p_vaddr + obj->addr) && + ((ElfW(Addr)) addr >= obj->phdr[p].p_vaddr + obj->addr)) + return 0; + } + } + + return EINVAL; +} + +int eh_init_obj(eh_obj_t *obj) +{ + /* + ELF spec says in section header documentation, that: + "An object file may have only one dynamic section." + + Let's assume it means that object has only one PT_DYNAMIC + as well. + */ + int p; + obj->dynamic = NULL; + for (p = 0; p < obj->phnum; p++) { + if (obj->phdr[p].p_type == PT_DYNAMIC) { + if (obj->dynamic) + return ENOTSUP; + + obj->dynamic = (ElfW(Dyn) *) (obj->phdr[p].p_vaddr + obj->addr); + } + } + + if (!obj->dynamic) + return ENOTSUP; + + /* + ELF spec says that program is allowed to have more than one + .strtab but does not describe how string table indexes translate + to multiple string tables. + + And spec says that only one SHT_HASH is allowed, does it mean that + obj has only one DT_HASH? + + About .symtab it does not mention anything about if multiple + symbol tables are allowed or not. + + Maybe st_shndx is the key here? + */ + obj->strtab = NULL; + obj->hash = NULL; + obj->gnu_hash = NULL; + obj->symtab = NULL; + p = 0; + while (obj->dynamic[p].d_tag != DT_NULL) { + if (obj->dynamic[p].d_tag == DT_STRTAB) { + if (obj->strtab) + return ENOTSUP; + + obj->strtab = (const char *) obj->dynamic[p].d_un.d_ptr; + } else if (obj->dynamic[p].d_tag == DT_HASH) { + if (obj->hash) + return ENOTSUP; + + obj->hash = (ElfW(Word) *) obj->dynamic[p].d_un.d_ptr; + } else if (obj->dynamic[p].d_tag == DT_GNU_HASH) { + if (obj->gnu_hash) + return ENOTSUP; + + obj->gnu_hash = (Elf32_Word *) obj->dynamic[p].d_un.d_ptr; + } else if (obj->dynamic[p].d_tag == DT_SYMTAB) { + if (obj->symtab) + return ENOTSUP; + + obj->symtab = (ElfW(Sym) *) obj->dynamic[p].d_un.d_ptr; + } + p++; + } + + /* This is here to catch b0rken headers (vdso) */ + if ((eh_check_addr(obj, (const void *) obj->strtab)) | + (eh_check_addr(obj, (const void *) obj->symtab))) + return ENOTSUP; + + if (obj->hash) { + /* DT_HASH found */ + if (eh_check_addr(obj, (void *) obj->hash)) + obj->hash = NULL; + } else if (obj->gnu_hash) { + /* DT_GNU_HASH found */ + if (eh_check_addr(obj, (void *) obj->gnu_hash)) + obj->gnu_hash = NULL; + } + + return 0; +} + +int eh_find_sym(eh_obj_t *obj, const char *name, void **to) +{ + eh_sym_t sym; + + /* DT_GNU_HASH is faster ;) */ + if (obj->gnu_hash) { + if (!eh_find_sym_gnu_hash(obj, name, &sym)) { + *to = (void *) (sym.sym->st_value + obj->addr); + return 0; + } + } + + /* maybe it is in DT_HASH or DT_GNU_HASH is not present */ + if (obj->hash) { + if (!eh_find_sym_hash(obj, name, &sym)) { + *to = (void *) (sym.sym->st_value + obj->addr); + return 0; + } + } + + return EAGAIN; +} + +ElfW(Word) eh_hash_elf(const char *name) +{ + ElfW(Word) tmp, hash = 0; + const unsigned char *uname = (const unsigned char *) name; + int c; + + while ((c = *uname++) != '\0') { + hash = (hash << 4) + c; + if ((tmp = (hash & 0xf0000000)) != 0) { + hash ^= tmp >> 24; + hash ^= tmp; + } + } + + return hash; +} + +int eh_find_sym_hash(eh_obj_t *obj, const char *name, eh_sym_t *sym) +{ + ElfW(Word) hash, *chain; + ElfW(Sym) *esym; + unsigned int bucket_idx, idx; + + if (!obj->hash) + return ENOTSUP; + + if (obj->hash[0] == 0) + return EAGAIN; + + hash = eh_hash_elf(name); + /* + First item in DT_HASH is nbucket, second is nchain. + hash % nbucket gives us our bucket index. + */ + bucket_idx = obj->hash[2 + (hash % obj->hash[0])]; + chain = &obj->hash[2 + obj->hash[0] + bucket_idx]; + + idx = 0; + sym->sym = NULL; + + /* we have to check symtab[bucket_idx] first */ + esym = &obj->symtab[bucket_idx]; + if (esym->st_name) { + if (!strcmp(&obj->strtab[esym->st_name], name)) + sym->sym = esym; + } + + while ((sym->sym == NULL) && + (chain[idx] != STN_UNDEF)) { + esym = &obj->symtab[chain[idx]]; + + if (esym->st_name) { + if (!strcmp(&obj->strtab[esym->st_name], name)) + sym->sym = esym; + } + + idx++; + } + + /* symbol not found */ + if (sym->sym == NULL) + return EAGAIN; + + sym->obj = obj; + sym->name = &obj->strtab[sym->sym->st_name]; + + return 0; +} + +Elf32_Word eh_hash_gnu(const char *name) +{ + Elf32_Word hash = 5381; + const unsigned char *uname = (const unsigned char *) name; + int c; + + while ((c = *uname++) != '\0') + hash = (hash << 5) + hash + c; + + return hash & 0xffffffff; +} + +int eh_find_sym_gnu_hash(eh_obj_t *obj, const char *name, eh_sym_t *sym) +{ + Elf32_Word *buckets, *chain_zero, *hasharr; + ElfW(Addr) *bitmask, bitmask_word; + Elf32_Word symbias, bitmask_nwords, bucket, + nbuckets, bitmask_idxbits, shift; + Elf32_Word hash, hashbit1, hashbit2; + ElfW(Sym) *esym; + + if (!obj->gnu_hash) + return ENOTSUP; + + if (obj->gnu_hash[0] == 0) + return EAGAIN; + + sym->sym = NULL; + + /* + Initialize our hash table stuff + + DT_GNU_HASH is(?): + [nbuckets] [symbias] [bitmask_nwords] [shift] + [bitmask_nwords * ElfW(Addr)] <- bitmask + [nbuckets * Elf32_Word] <- buckets + ...chains? - symbias... + */ + nbuckets = obj->gnu_hash[0]; + symbias = obj->gnu_hash[1]; + bitmask_nwords = obj->gnu_hash[2]; /* must be power of two */ + bitmask_idxbits = bitmask_nwords - 1; + shift = obj->gnu_hash[3]; + bitmask = (ElfW(Addr) *) &obj->gnu_hash[4]; + buckets = &obj->gnu_hash[4 + (__ELF_NATIVE_CLASS / 32) * bitmask_nwords]; + chain_zero = &buckets[nbuckets] - symbias; + + /* hash our symbol */ + hash = eh_hash_gnu(name); + + /* bitmask stuff... no idea really :D */ + bitmask_word = bitmask[(hash / __ELF_NATIVE_CLASS) & bitmask_idxbits]; + hashbit1 = hash & (__ELF_NATIVE_CLASS - 1); + hashbit2 = (hash >> shift) & (__ELF_NATIVE_CLASS - 1); + + /* wtf this does actually? */ + if (!((bitmask_word >> hashbit1) & (bitmask_word >> hashbit2) & 1)) + return EAGAIN; + + /* locate bucket */ + bucket = buckets[hash % nbuckets]; + if (bucket == 0) + return EAGAIN; + + /* and find match in chain */ + hasharr = &chain_zero[bucket]; + do { + if (((*hasharr ^ hash) >> 1) == 0) { + /* hash matches, but does the name? */ + esym = &obj->symtab[hasharr - chain_zero]; + if (esym->st_name) { + if (!strcmp(&obj->strtab[esym->st_name], name)) { + sym->sym = esym; + break; + } + } + } + } while ((*hasharr++ & 1u) == 0); + + /* symbol not found */ + if (sym->sym == NULL) + return EAGAIN; + + sym->obj = obj; + sym->name = &obj->strtab[sym->sym->st_name]; + + return 0; +} + +int eh_iterate_sym(eh_obj_t *obj, eh_iterate_sym_callback_func callback, void *arg) +{ + return ENOTSUP; +} + +int eh_find_next_dyn(eh_obj_t *obj, ElfW_Sword tag, int i, ElfW(Dyn) **next) +{ + /* first from i + 1 to end, then from start to i - 1 */ + int p; + *next = NULL; + + p = i + 1; + while (obj->dynamic[p].d_tag != DT_NULL) { + if (obj->dynamic[p].d_tag == tag) { + *next = &obj->dynamic[p]; + return 0; + } + p++; + } + + p = 0; + while ((obj->dynamic[i].d_tag != DT_NULL) && (p < i)) { + if (obj->dynamic[p].d_tag == tag) { + *next = &obj->dynamic[p]; + return 0; + } + p++; + } + + return EAGAIN; +} + +int eh_set_rela_plt(eh_obj_t *obj, int p, const char *sym, void *val) +{ + ElfW(Rela) *rela = (ElfW(Rela) *) obj->dynamic[p].d_un.d_ptr; + ElfW(Dyn) *relasize; + unsigned int i; + + /* DT_PLTRELSZ contains PLT relocs size in bytes */ + if (eh_find_next_dyn(obj, DT_PLTRELSZ, p, &relasize)) + return EINVAL; /* b0rken elf :/ */ + + for (i = 0; i < relasize->d_un.d_val / sizeof(ElfW(Rela)); i++) { + if (!obj->symtab[ELFW_R_SYM(rela[i].r_info)].st_name) + continue; + + if (!strcmp(&obj->strtab[obj->symtab[ELFW_R_SYM(rela[i].r_info)].st_name], sym)) + *((void **) (rela[i].r_offset + obj->addr)) = val; + } + + return 0; +} + +int eh_set_rel_plt(eh_obj_t *obj, int p, const char *sym, void *val) +{ + ElfW(Rel) *rel = (ElfW(Rel) *) obj->dynamic[p].d_un.d_ptr; + ElfW(Dyn) *relsize; + unsigned int i; + + if (eh_find_next_dyn(obj, DT_PLTRELSZ, p, &relsize)) + return EINVAL; /* b0rken elf :/ */ + + for (i = 0; i < relsize->d_un.d_val / sizeof(ElfW(Rel)); i++) { + if (!obj->symtab[ELFW_R_SYM(rel[i].r_info)].st_name) + continue; + + if (!strcmp(&obj->strtab[obj->symtab[ELFW_R_SYM(rel[i].r_info)].st_name], sym)) + *((void **) (rel[i].r_offset + obj->addr)) = val; + } + + return 0; +} + +int eh_set_rel(eh_obj_t *obj, const char *sym, void *val) +{ + /* + Elf spec states that object is allowed to have multiple + .rel.plt and .rela.plt tables, so we will support 'em - here. + */ + ElfW(Dyn) *pltrel; + int ret, p = 0; + + while (obj->dynamic[p].d_tag != DT_NULL) { + /* DT_JMPREL contains .rel.plt or .rela.plt */ + if (obj->dynamic[p].d_tag == DT_JMPREL) { + /* DT_PLTREL tells if it is Rela or Rel */ + eh_find_next_dyn(obj, DT_PLTREL, p, &pltrel); + + if (pltrel->d_un.d_val == DT_RELA) { + if ((ret = eh_set_rela_plt(obj, p, sym, val))) + return ret; + } else if (pltrel->d_un.d_val == DT_REL) { + if ((ret = eh_set_rel_plt(obj, p, sym, val))) + return ret; + } else + return EINVAL; + } + p++; + } + + return 0; +} + +int eh_iterate_rela_plt(eh_obj_t *obj, int p, eh_iterate_rel_callback_func callback, void *arg) +{ + ElfW(Rela) *rela = (ElfW(Rela) *) obj->dynamic[p].d_un.d_ptr; + ElfW(Dyn) *relasize; + eh_rel_t rel; + eh_sym_t sym; + unsigned int i, ret; + + rel.sym = &sym; + rel.rel = NULL; + rel.obj = obj; + + if (eh_find_next_dyn(obj, DT_PLTRELSZ, p, &relasize)) + return EINVAL; + + for (i = 0; i < relasize->d_un.d_val / sizeof(ElfW(Rela)); i++) { + rel.rela = &rela[i]; + sym.sym = &obj->symtab[ELFW_R_SYM(rel.rela->r_info)]; + if (sym.sym->st_name) + sym.name = &obj->strtab[sym.sym->st_name]; + else + sym.name = NULL; + + if ((ret = callback(&rel, arg))) + return ret; + } + + return 0; +} + +int eh_iterate_rel_plt(eh_obj_t *obj, int p, eh_iterate_rel_callback_func callback, void *arg) +{ + ElfW(Rel) *relp = (ElfW(Rel) *) obj->dynamic[p].d_un.d_ptr; + ElfW(Dyn) *relsize; + eh_rel_t rel; + eh_sym_t sym; + unsigned int i, ret; + + rel.sym = &sym; + rel.rela = NULL; + rel.obj = obj; + + if (eh_find_next_dyn(obj, DT_PLTRELSZ, p, &relsize)) + return EINVAL; + + for (i = 0; i < relsize->d_un.d_val / sizeof(ElfW(Rel)); i++) { + rel.rel = &relp[i]; + sym.sym = &obj->symtab[ELFW_R_SYM(rel.rel->r_info)]; + if (sym.sym->st_name) + sym.name = &obj->strtab[sym.sym->st_name]; + else + sym.name = NULL; + + if ((ret = callback(&rel, arg))) + return ret; + } + + return 0; +} + +int eh_iterate_rel(eh_obj_t *obj, eh_iterate_rel_callback_func callback, void *arg) +{ + ElfW(Dyn) *pltrel; + int ret, p = 0; + + while (obj->dynamic[p].d_tag != DT_NULL) { + if (obj->dynamic[p].d_tag == DT_JMPREL) { + eh_find_next_dyn(obj, DT_PLTREL, p, &pltrel); + + if (pltrel->d_un.d_val == DT_RELA) { + if ((ret = eh_iterate_rela_plt(obj, p, callback, arg))) + return ret; + } else if (pltrel->d_un.d_val == DT_REL) { + if ((ret = eh_iterate_rel_plt(obj, p, callback, arg))) + return ret; + } else + return EINVAL; + } + p++; + } + + return 0; +} + +int eh_destroy_obj(eh_obj_t *obj) +{ + obj->phdr = NULL; + + return 0; +} + +/** \} */