Skip to content

Instantly share code, notes, and snippets.

@saagarjha
Last active August 4, 2025 15:33

Revisions

  1. saagarjha revised this gist Feb 22, 2025. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -122,6 +122,7 @@ std::uintptr_t rearrange_stack(task_port_t task, const std::string &library, std
    auto variable = "DYLD_INSERT_LIBRARIES=" + library;
    envp.push_back(variable);
    }
    envp.push_back("DYLD_SHARED_REGION=1");

    argvAddresses.clear();
    envpAddresses.clear();
    @@ -376,7 +377,7 @@ void inject(pid_t pid, const std::string &library) {
    }

    int main(int argc, char **argv, char **envp) {
    if (!getenv("DYLD_IN_CACHE")) {
    if (!getenv("DYLD_SHARED_REGION")) {
    uint32_t length = 0;
    std::string path;
    _NSGetExecutablePath(path.data(), &length);
    @@ -387,7 +388,7 @@ int main(int argc, char **argv, char **envp) {
    environment.push_back(*envp++);
    }
    // This happens to disable dyld-in-cache.
    environment.push_back("DYLD_IN_CACHE=0");
    environment.push_back("DYLD_SHARED_REGION=1");
    environment.push_back(nullptr);
    execve(path.c_str(), argv, const_cast<char **>(environment.data()));
    ensure(false);
  2. saagarjha revised this gist Jul 14, 2024. 1 changed file with 55 additions and 5 deletions.
    60 changes: 55 additions & 5 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -207,6 +207,7 @@ __asm__(
    ".globl _dyld_start_patch_end\n"
    ".globl _dyld_start_check_start\n"
    ".globl _dyld_start_check_end\n"
    "\n"
    "_dyld_start_patch_start:\n"
    "_dyld_start_check_start:\n"
    /* sub sp, sp, [offset & 0xfff] */ // Added dynamically
    @@ -218,12 +219,45 @@ __asm__(
    "mov fp, #0\n"
    "mov lr, #0\n"
    "_dyld_start_check_end:\n");

    #elif __x86_64__
    // A similar patch for x86_64. The initial sequence is this:
    //
    // mov rdi, rsp
    // and rsp, -16
    // mov rbp, 0
    // push 0
    // jmp start
    //
    // We can golf it down with code that is equivalent (save for xor ebp, ebp,
    // which sets flags-but in this case it doesn't adjust them from what the
    // kernel sets already, and there isn't any code that relies on its value
    // anyway).
    __asm__(
    ".intel_syntax noprefix\n"
    ".globl _dyld_start_patch_start\n"
    ".globl _dyld_start_patch_end\n"
    ".globl _dyld_start_check_start\n"
    ".globl _dyld_start_check_end\n"
    "\n"
    "_dyld_start_patch_start:\n"
    /* sub rsp, [offset] */ // Added dynamically
    "push rsp\n"
    "pop rdi\n"
    "and rsp, -16\n"
    "xor ebp, ebp\n"
    "push rbp\n"
    "_dyld_start_patch_end:\n"
    "_dyld_start_check_start:\n"
    "mov rdi, rsp\n"
    "and rsp, -16\n"
    "mov rbp, 0\n"
    "push 0\n"
    "_dyld_start_check_end:\n");
    #endif
    extern char dyld_start_patch_start;
    extern char dyld_start_patch_end;
    extern char dyld_start_check_start;
    extern char dyld_start_check_end;
    #endif

    void write_patch(task_t task, std::uintptr_t address, void *patch_start, void *patch_end) {
    ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY) == KERN_SUCCESS);
    @@ -293,14 +327,17 @@ void inject(pid_t pid, const std::string &library) {
    if (__builtin_available(macOS 14.4, *)) {
    #if __arm64__
    auto address = arm_thread_state64_get_pc(state);

    #elif __x86_64__
    auto address = state.__rip;
    #endif
    auto expected = std::span{&dyld_start_check_start, &dyld_start_check_end};
    auto actual = std::vector(expected.begin(), expected.end());
    vm_size_t count;

    ensure(vm_read_overwrite(task, address, actual.size(), reinterpret_cast<pointer_t>(actual.data()), &count) == KERN_SUCCESS && count == expected.size());
    ensure(std::equal(expected.begin(), expected.end(), actual.begin(), actual.end()));

    #if __arm64__
    auto difference = arm_thread_state64_get_sp(state) - sp;
    auto stack_adjustment = std::array{
    // sub sp, sp, difference & 0xfff
    @@ -314,9 +351,22 @@ void inject(pid_t pid, const std::string &library) {
    static_cast<std::byte>(0x40 | ((difference >> 12) & 0xfc0) >> 6),
    std::byte{0xd1},
    };
    write_patch(task, address, stack_adjustment.begin(), stack_adjustment.end());
    write_patch(task, arm_thread_state64_get_pc(state) + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end);
    #elif __x86_64__
    auto difference = state.__rsp - sp;
    auto stack_adjustment = std::array{
    // sub rsp, difference
    std::byte{0x48},
    std::byte{0x81},
    std::byte{0xec},
    static_cast<std::byte>((difference >> 0) & 0xff),
    static_cast<std::byte>((difference >> 8) & 0xff),
    static_cast<std::byte>((difference >> 16) & 0xff),
    static_cast<std::byte>((difference >> 24) & 0xff),
    };
    #endif
    write_patch(task, address, stack_adjustment.begin(), stack_adjustment.end());
    write_patch(task, address + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end);

    } else {
    ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);
    }
  3. saagarjha revised this gist Jun 3, 2024. 1 changed file with 86 additions and 12 deletions.
    98 changes: 86 additions & 12 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,7 @@
    #include <algorithm>
    #include <array>
    #include <bsm/libbsm.h>
    #include <cstddef>
    #include <cstdint>
    #include <cstdlib>
    #include <cstring>
    @@ -163,10 +164,11 @@ std::uintptr_t rearrange_stack(task_port_t task, const std::string &library, std
    ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(&loadAddress), sizeof(std::uintptr_t)) == KERN_SUCCESS);
    return sp;
    }

    __asm__(
    ".globl _patch_start\n"
    ".globl _patch_end\n"
    "_patch_start:\n"
    ".globl _amfi_flags_patch_start\n"
    ".globl _amfi_flags_patch_end\n"
    "_amfi_flags_patch_start:\n"
    #if __arm64__
    "\tmov x2, #0x5f\n"
    "\tstr x2, [x1]\n"
    @@ -178,14 +180,54 @@ __asm__(
    "\txor rax, rax\n"
    "\tret\n"
    #endif
    "_patch_end:\n");
    "_amfi_flags_patch_end:\n");

    extern char amfi_flags_patch_start;
    extern char amfi_flags_patch_end;

    #if __arm64__
    // This is a clever but incredibly lazy patch. On arm64, the first five
    // instructions of _dyld_start are as follows:
    //
    // mov x0, sp
    // and sp, x0, #~15
    // mov fp, #0
    // mov lr, #0
    // b start
    //
    // We need to bump sp down a bit due to injecting DYLD_INSERT_LIBRARIES, but
    // because of thread_set_state_allowed we can't set it directly. So we inject
    // instructions to do it in here. At process startup fp and lr happen to be set
    // to 0 by the kernel already, which gives us the space to sneak in two extra
    // instructions. (If we wanted to be slightly less lazy, we could take advantage
    // of the kernel's laziness and align sp ourselves when writing the initial
    // stack. This would let us overwrite the instruction aligning sp.)
    __asm__(
    ".globl _dyld_start_patch_start\n"
    ".globl _dyld_start_patch_end\n"
    ".globl _dyld_start_check_start\n"
    ".globl _dyld_start_check_end\n"
    "_dyld_start_patch_start:\n"
    "_dyld_start_check_start:\n"
    /* sub sp, sp, [offset & 0xfff] */ // Added dynamically
    /* sub sp, sp, [offset & ~0xfff], lsl 12 */ // Added dynamically
    "mov x0, sp\n"
    "and sp, x0, #~15\n"
    "_dyld_start_patch_end:\n"
    // Used as a sanity check
    "mov fp, #0\n"
    "mov lr, #0\n"
    "_dyld_start_check_end:\n");

    extern char patch_start;
    extern char patch_end;
    extern char dyld_start_patch_start;
    extern char dyld_start_patch_end;
    extern char dyld_start_check_start;
    extern char dyld_start_check_end;
    #endif

    void write_patch(task_t task, std::uintptr_t address) {
    void write_patch(task_t task, std::uintptr_t address, void *patch_start, void *patch_end) {
    ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY) == KERN_SUCCESS);
    ensure(vm_write(task, address, reinterpret_cast<vm_offset_t>(&patch_start), &patch_end - &patch_start) == KERN_SUCCESS);
    ensure(vm_write(task, address, reinterpret_cast<vm_offset_t>(patch_start), reinterpret_cast<std::uintptr_t>(patch_end) - reinterpret_cast<std::uintptr_t>(patch_start)) == KERN_SUCCESS);
    ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_EXECUTE) == KERN_SUCCESS);
    }

    @@ -208,7 +250,7 @@ void patch_restrictions(task_t task, std::uintptr_t pc) {
    auto amfi_check_dyld_policy_self = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) {
    return !std::strcmp(reinterpret_cast<char *>(base + command->stroff) + symbol.n_un.n_strx, "_amfi_check_dyld_policy_self");
    });
    write_patch(task, pc + amfi_check_dyld_policy_self->n_value - _dyld_start->n_value);
    write_patch(task, pc + amfi_check_dyld_policy_self->n_value - _dyld_start->n_value, &amfi_flags_patch_start, &amfi_flags_patch_end);
    return;
    }
    location += command->cmdsize;
    @@ -237,15 +279,47 @@ void inject(pid_t pid, const std::string &library) {
    #if __arm64__
    ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_TO_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);
    auto sp = rearrange_stack(task, library, arm_thread_state64_get_sp(state));
    arm_thread_state64_set_sp(state, sp);
    patch_restrictions(task, arm_thread_state64_get_pc(state));
    ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_FROM_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);
    if (__builtin_available(macOS 14.4, *)) {
    } else {
    arm_thread_state64_set_sp(state, sp);
    ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_FROM_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);
    }
    #elif __x86_64__
    auto sp = rearrange_stack(task, library, static_cast<std::uintptr_t>(state.__rsp));
    state.__rsp = sp;
    patch_restrictions(task, state.__rip);
    #endif
    ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);
    if (__builtin_available(macOS 14.4, *)) {
    #if __arm64__
    auto address = arm_thread_state64_get_pc(state);

    auto expected = std::span{&dyld_start_check_start, &dyld_start_check_end};
    auto actual = std::vector(expected.begin(), expected.end());
    vm_size_t count;

    ensure(vm_read_overwrite(task, address, actual.size(), reinterpret_cast<pointer_t>(actual.data()), &count) == KERN_SUCCESS && count == expected.size());
    ensure(std::equal(expected.begin(), expected.end(), actual.begin(), actual.end()));

    auto difference = arm_thread_state64_get_sp(state) - sp;
    auto stack_adjustment = std::array{
    // sub sp, sp, difference & 0xfff
    std::byte{0xff},
    static_cast<std::byte>(0x03 | (difference & 0x3f) << 2),
    static_cast<std::byte>(0x00 | (difference & 0xfc0) >> 6),
    std::byte{0xd1},
    // sub sp, sp, difference & ~0xfff, lsl #12
    std::byte{0xff},
    static_cast<std::byte>(0x03 | ((difference >> 12) & 0x3f) << 2),
    static_cast<std::byte>(0x40 | ((difference >> 12) & 0xfc0) >> 6),
    std::byte{0xd1},
    };
    write_patch(task, address, stack_adjustment.begin(), stack_adjustment.end());
    write_patch(task, arm_thread_state64_get_pc(state) + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end);
    #endif
    } else {
    ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);
    }

    mach_port_deallocate(mach_task_self(), *threads);
    vm_deallocate(mach_task_self(), (vm_address_t)threads, sizeof(*threads));
  4. saagarjha revised this gist Jun 3, 2024. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -252,7 +252,7 @@ void inject(pid_t pid, const std::string &library) {
    }

    int main(int argc, char **argv, char **envp) {
    if (!getenv("DYLD_SHARED_REGION")) {
    if (!getenv("DYLD_IN_CACHE")) {
    uint32_t length = 0;
    std::string path;
    _NSGetExecutablePath(path.data(), &length);
    @@ -263,7 +263,7 @@ int main(int argc, char **argv, char **envp) {
    environment.push_back(*envp++);
    }
    // This happens to disable dyld-in-cache.
    environment.push_back("DYLD_SHARED_REGION=foobar");
    environment.push_back("DYLD_IN_CACHE=0");
    environment.push_back(nullptr);
    execve(path.c_str(), argv, const_cast<char **>(environment.data()));
    ensure(false);
  5. saagarjha revised this gist Jun 3, 2024. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -246,6 +246,9 @@ void inject(pid_t pid, const std::string &library) {
    patch_restrictions(task, state.__rip);
    #endif
    ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);

    mach_port_deallocate(mach_task_self(), *threads);
    vm_deallocate(mach_task_self(), (vm_address_t)threads, sizeof(*threads));
    }

    int main(int argc, char **argv, char **envp) {
  6. saagarjha created this gist Sep 4, 2021.
    308 changes: 308 additions & 0 deletions library_injector.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,308 @@
    // To compile: clang++ -arch x86_64 -arch arm64 -std=c++20 library_injector.cpp -lbsm -lEndpointSecurity -o library_injector,
    // then codesign with com.apple.developer.endpoint-security.client and run the
    // program as root.

    #include <EndpointSecurity/EndpointSecurity.h>
    #include <algorithm>
    #include <array>
    #include <bsm/libbsm.h>
    #include <cstdint>
    #include <cstdlib>
    #include <cstring>
    #include <dispatch/dispatch.h>
    #include <functional>
    #include <iostream>
    #include <mach-o/dyld.h>
    #include <mach-o/dyld_images.h>
    #include <mach-o/loader.h>
    #include <mach-o/nlist.h>
    #include <mach/mach.h>
    #ifdef __arm64__
    #include <mach/arm/thread_state.h>
    #elif __x86_64__
    #include <mach/i386/thread_state.h>
    #else
    #error "Only arm64 and x86_64 are currently supported"
    #endif
    #if __has_feature(ptrauth_calls)
    #include <ptrauth.h>
    #endif
    #include <regex>
    #include <span>
    #include <stdexcept>
    #include <string>
    #include <sys/ptrace.h>
    #include <sys/sysctl.h>
    #include <unistd.h>
    #include <vector>

    #define ensure(condition) \
    do { \
    if (!(condition)) { \
    throw std::runtime_error(std::string("") + "Check \"" + #condition "\" failed at " + \
    __FILE__ + ":" + std::to_string(__LINE__) + " in function " + __FUNCTION__); \
    } \
    } while (0)

    #define CS_OPS_STATUS 0
    #define CS_ENFORCEMENT 0x00001000

    extern "C" {
    int csops(pid_t pid, unsigned int ops, void *useraddr, size_t usersize);
    };

    auto is_translated(pid_t pid) {
    auto name = std::array{CTL_KERN, KERN_PROC, KERN_PROC_PID, pid};
    kinfo_proc proc;
    size_t size = sizeof(proc);
    ensure(!sysctl(name.data(), name.size(), &proc, &size, nullptr, 0) && size == sizeof(proc));
    return !!(proc.kp_proc.p_flag & P_TRANSLATED);
    }

    auto is_cs_enforced(pid_t pid) {
    int flags;
    ensure(!csops(pid, CS_OPS_STATUS, &flags, sizeof(flags)));
    return !!(flags & CS_ENFORCEMENT);
    }

    template <typename T>
    T scan(task_port_t task, std::uintptr_t &address) {
    T t;
    vm_size_t count;
    ensure(vm_read_overwrite(task, address, sizeof(t), reinterpret_cast<pointer_t>(&t), &count) == KERN_SUCCESS && count == sizeof(t));
    address += sizeof(t);
    return t;
    }

    std::vector<std::uintptr_t> read_string_array(task_port_t task, std::uintptr_t &base) {
    auto strings = std::vector<std::uintptr_t>{};
    std::uintptr_t string;
    do {
    string = scan<std::uintptr_t>(task, base);
    strings.push_back(string);
    } while (string);
    strings.pop_back();
    return strings;
    }

    std::string read_string(task_port_t task, std::uintptr_t address) {
    auto string = std::string{};
    char c;
    do {
    c = scan<char>(task, address);
    string.push_back(c);
    } while (c);
    string.pop_back();
    return string;
    }

    std::uintptr_t rearrange_stack(task_port_t task, const std::string &library, std::uintptr_t sp) {
    auto loadAddress = scan<std::uintptr_t>(task, sp);
    auto argc = scan<std::uintptr_t>(task, sp);

    auto argvAddresses = read_string_array(task, sp);
    auto envpAddresses = read_string_array(task, sp);
    auto appleAddresses = read_string_array(task, sp);

    auto stringReader = std::bind(read_string, task, std::placeholders::_1);
    auto argv = std::vector<std::string>{};
    std::transform(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(argv), stringReader);
    auto envp = std::vector<std::string>{};
    std::transform(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(envp), stringReader);
    auto apple = std::vector<std::string>{};
    std::transform(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(apple), stringReader);

    auto dyld_insert_libraries = std::find_if(envp.begin(), envp.end(), [](const auto &string) {
    return string.starts_with("DYLD_INSERT_LIBRARIES=");
    });
    if (dyld_insert_libraries != envp.end()) {
    *dyld_insert_libraries += ":" + library;
    } else {
    auto variable = "DYLD_INSERT_LIBRARIES=" + library;
    envp.push_back(variable);
    }

    argvAddresses.clear();
    envpAddresses.clear();
    appleAddresses.clear();

    auto strings = std::vector<char>{};
    auto arrayGenerator = [&strings](auto &addresses, const auto &string) {
    addresses.push_back(strings.size());
    std::copy(string.begin(), string.end(), std::back_inserter(strings));
    strings.push_back('\0');
    };
    std::for_each(argv.begin(), argv.end(), std::bind(arrayGenerator, std::ref(argvAddresses), std::placeholders::_1));
    std::for_each(envp.begin(), envp.end(), std::bind(arrayGenerator, std::ref(envpAddresses), std::placeholders::_1));
    std::for_each(apple.begin(), apple.end(), std::bind(arrayGenerator, std::ref(appleAddresses), std::placeholders::_1));

    sp -= strings.size();
    sp = sp / sizeof(std::uintptr_t) * sizeof(std::uintptr_t);
    ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(strings.data()), strings.size()) == KERN_SUCCESS);

    auto rebaser = [sp](auto &&address) {
    address += sp;
    };
    std::for_each(argvAddresses.begin(), argvAddresses.end(), rebaser);
    std::for_each(envpAddresses.begin(), envpAddresses.end(), rebaser);
    std::for_each(appleAddresses.begin(), appleAddresses.end(), rebaser);

    auto addresses = std::vector<std::uintptr_t>{};
    std::copy(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(addresses));
    addresses.push_back(0);
    std::copy(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(addresses));
    addresses.push_back(0);
    std::copy(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(addresses));
    addresses.push_back(0);

    sp -= addresses.size() * sizeof(std::uintptr_t);
    ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(addresses.data()), addresses.size() * sizeof(std::uintptr_t)) == KERN_SUCCESS);
    sp -= sizeof(std::uintptr_t);
    ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(&argc), sizeof(std::uintptr_t)) == KERN_SUCCESS);
    sp -= sizeof(std::uintptr_t);
    ensure(vm_write(task, sp, reinterpret_cast<vm_offset_t>(&loadAddress), sizeof(std::uintptr_t)) == KERN_SUCCESS);
    return sp;
    }
    __asm__(
    ".globl _patch_start\n"
    ".globl _patch_end\n"
    "_patch_start:\n"
    #if __arm64__
    "\tmov x2, #0x5f\n"
    "\tstr x2, [x1]\n"
    "\tmov x0, #0\n"
    "\tret\n"
    #elif __x86_64__
    ".intel_syntax noprefix\n"
    "\tmov QWORD PTR [rsi], 0x5f\n"
    "\txor rax, rax\n"
    "\tret\n"
    #endif
    "_patch_end:\n");

    extern char patch_start;
    extern char patch_end;

    void write_patch(task_t task, std::uintptr_t address) {
    ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY) == KERN_SUCCESS);
    ensure(vm_write(task, address, reinterpret_cast<vm_offset_t>(&patch_start), &patch_end - &patch_start) == KERN_SUCCESS);
    ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_EXECUTE) == KERN_SUCCESS);
    }

    void patch_restrictions(task_t task, std::uintptr_t pc) {
    task_dyld_info_data_t dyldInfo;
    mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
    ensure(task_info(mach_task_self(), TASK_DYLD_INFO, reinterpret_cast<task_info_t>(&dyldInfo), &count) == KERN_SUCCESS);
    auto all_image_infos = reinterpret_cast<dyld_all_image_infos *>(dyldInfo.all_image_info_addr);
    const auto header = reinterpret_cast<const mach_header_64 *>(all_image_infos->dyldImageLoadAddress);
    auto location = reinterpret_cast<std::uintptr_t>(header + 1);
    auto base = reinterpret_cast<std::uintptr_t>(header);
    for (unsigned i = 0; i < header->ncmds; ++i) {
    auto command = reinterpret_cast<load_command *>(location);
    if (command->cmd == LC_SYMTAB) {
    auto command = reinterpret_cast<symtab_command *>(location);
    auto symbols = std::span{reinterpret_cast<nlist_64 *>(base + command->symoff), command->nsyms};
    auto _dyld_start = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) {
    return !std::strcmp(reinterpret_cast<char *>(base + command->stroff) + symbol.n_un.n_strx, "__dyld_start");
    });
    auto amfi_check_dyld_policy_self = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) {
    return !std::strcmp(reinterpret_cast<char *>(base + command->stroff) + symbol.n_un.n_strx, "_amfi_check_dyld_policy_self");
    });
    write_patch(task, pc + amfi_check_dyld_policy_self->n_value - _dyld_start->n_value);
    return;
    }
    location += command->cmdsize;
    }
    ensure(false);
    }

    void inject(pid_t pid, const std::string &library) {
    task_port_t task;
    ensure(task_for_pid(mach_task_self(), pid, &task) == KERN_SUCCESS);
    thread_act_array_t threads;
    mach_msg_type_number_t count;
    ensure(task_threads(task, &threads, &count) == KERN_SUCCESS);
    ensure(count == 1);
    #if __arm64__
    arm_thread_state64_t state;
    count = ARM_THREAD_STATE64_COUNT;
    thread_state_flavor_t flavor = ARM_THREAD_STATE64;
    #elif __x86_64__
    x86_thread_state64_t state;
    count = x86_THREAD_STATE64_COUNT;
    thread_state_flavor_t flavor = x86_THREAD_STATE64;
    #endif
    ensure(thread_get_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);

    #if __arm64__
    ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_TO_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);
    auto sp = rearrange_stack(task, library, arm_thread_state64_get_sp(state));
    arm_thread_state64_set_sp(state, sp);
    patch_restrictions(task, arm_thread_state64_get_pc(state));
    ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_FROM_SELF, flavor, reinterpret_cast<thread_state_t>(&state), count, reinterpret_cast<thread_state_t>(&state), &count) == KERN_SUCCESS);
    #elif __x86_64__
    auto sp = rearrange_stack(task, library, static_cast<std::uintptr_t>(state.__rsp));
    state.__rsp = sp;
    patch_restrictions(task, state.__rip);
    #endif
    ensure(thread_set_state(*threads, flavor, reinterpret_cast<thread_state_t>(&state), count) == KERN_SUCCESS);
    }

    int main(int argc, char **argv, char **envp) {
    if (!getenv("DYLD_SHARED_REGION")) {
    uint32_t length = 0;
    std::string path;
    _NSGetExecutablePath(path.data(), &length);
    path = std::string('0', length);
    ensure(!_NSGetExecutablePath(path.data(), &length));
    std::vector<const char *> environment;
    while (*envp) {
    environment.push_back(*envp++);
    }
    // This happens to disable dyld-in-cache.
    environment.push_back("DYLD_SHARED_REGION=foobar");
    environment.push_back(nullptr);
    execve(path.c_str(), argv, const_cast<char **>(environment.data()));
    ensure(false);
    }

    if (argc < 3) {
    std::cerr << "Usage: " << *argv << " <library to inject> <process paths...>" << std::endl;
    std::exit(EXIT_FAILURE);
    }

    auto library = *++argv;
    std::vector<std::regex> processes;
    for (auto process : std::span(++argv, argc - 2)) {
    processes.push_back(std::regex(process));
    }

    es_client_t *client = NULL;
    ensure(es_new_client(&client, ^(es_client_t *client, const es_message_t *message) {
    switch (message->event_type) {
    case ES_EVENT_TYPE_AUTH_EXEC: {
    const char *name = message->event.exec.target->executable->path.data;
    for (const auto &process : processes) {
    pid_t pid = audit_token_to_pid(message->process->audit_token);
    if (std::regex_search(name, process) && is_translated(getpid()) == is_translated(pid)) {
    if (is_cs_enforced(pid)) {
    ensure(!ptrace(PT_ATTACHEXC, pid, nullptr, 0));
    // Work around FB9786809
    dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 1'000'000'000), dispatch_get_main_queue(), ^{
    ensure(!ptrace(PT_DETACH, pid, nullptr, 0));
    });
    }
    inject(pid, library);
    }
    }
    es_respond_auth_result(client, message, ES_AUTH_RESULT_ALLOW, false);
    break;
    }
    default:
    ensure(false && "Unexpected event type!");
    }
    }) == ES_NEW_CLIENT_RESULT_SUCCESS);
    es_event_type_t events[] = {ES_EVENT_TYPE_AUTH_EXEC};
    ensure(es_subscribe(client, events, sizeof(events) / sizeof(*events)) == ES_RETURN_SUCCESS);
    dispatch_main();
    }