Skip to content

Instantly share code, notes, and snippets.

@leegao
Created June 7, 2025 20:35
Show Gist options
  • Save leegao/000fbc7aca632c14d366760d4c45ae07 to your computer and use it in GitHub Desktop.
Save leegao/000fbc7aca632c14d366760d4c45ae07 to your computer and use it in GitHub Desktop.
#include <string.h>
#include <jni.h>
#include <vulkan/vulkan.h>
#include <dlfcn.h>
#include <android/log.h>
#include <stdio.h>
#include <__algorithm/find_if.h>
#include <assert.h>
#include <android/log.h>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <sys/mman.h>
#include "bcdec.h"
#define VK_CREATE_INSTANCE_JNI Java_com_winlator_xenvironment_components_VortekRendererComponent_createVkContext
#define VK_CREATE_INSTANCE_JNI_STR "Java_com_winlator_xenvironment_components_VortekRendererComponent_createVkContext"
#define TAG "dummyvk"
extern "C" int __system_property_get(const char *name, char *value);
#define LOG_TAG "VortekCache"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
typedef struct { char _p[0x27]; void* task_queue; } TextureDecoder;
typedef struct { char _p[0xC]; int32_t width; int32_t height; char _p2[0xC]; void* pixel_data_buffer; } TaskImageParams;
typedef struct { int fd; char _p[0x24]; size_t length; } MmapInfo;
typedef struct {
VkBuffer buffer; // 0x00: The Vulkan buffer handle.
VkDeviceSize offset; // 0x08: The offset into the device memory where the buffer is bound.
MmapInfo* mmap_details;
} TaskDataSource;
struct ImageObject {
VkImage handle; // 0x00: The Vulkan image handle (Assumed).
VkFormat format; // 0x08: The format of the image.
int32_t width; // 0x0c: The width of the image.
int32_t height; // 0x10: The height of the image.
int32_t layerCount; // 0x14: The number of layers in the image.
uint32_t pad_0x18;
uint32_t pad_0x1c;
VkDeviceMemory memory; // 0x20: The device memory bound to this image.
VkDeviceSize size; // 0x28: The size of the bound memory (Assumed for munmap).
}; // Size >= 0x30
typedef struct { TaskDataSource* data_source; ImageObject* image_params; } DecodingTask;
// ---
// Handcrafted slow decompression of a single BC1 (DXT1) compressed block.
//
// @param srcCompressedBlock Pointer to the 8-byte compressed BC1 block.
// @param dstUncompBlock Pointer to the 64-byte destination buffer for the 4x4 RGBA pixels.
void DecodeBC1(const void* srcCompressedBlock, void* dstUncompBlock, int w) {
// Cast the raw pointers to the appropriate types.
const uint16_t* pBlock = static_cast<const uint16_t*>(srcCompressedBlock);
uint32_t* pDecompressed = static_cast<uint32_t*>(dstUncompBlock);
// Read the two 16-bit color endpoints from the compressed block.
uint16_t color0_16 = pBlock[0];
uint16_t color1_16 = pBlock[1];
// Read the 32-bit lookup table for the 16 pixels.
const uint32_t lookupTable = *reinterpret_cast<const uint32_t*>(&pBlock[2]);
// Unpack the 5:6:5 color endpoints to 8-bit per channel RGBA colors.
uint32_t colors[4];
colors[0] = ((color0_16 >> 11) & 0x1F) << 19 | ((color0_16 >> 5) & 0x3F) << 10 | (color0_16 & 0x1F) << 3 | 0xFF000000;
colors[1] = ((color1_16 >> 11) & 0x1F) << 19 | ((color1_16 >> 5) & 0x3F) << 10 | (color1_16 & 0x1F) << 3 | 0xFF000000;
// Generate the other two colors in the palette based on the endpoint comparison.
if (color0_16 > color1_16) {
// 4-color mode
colors[2] = ((((colors[0] & 0xFEFEFE) >> 1) + ((colors[1] & 0xFEFEFE) >> 1) + (colors[0] & 0x010101)) & 0xFFFFFF) | 0xFF000000;
colors[3] = ((((colors[0] & 0xFEFEFE) >> 1) + ((colors[1] & 0xFEFEFE) >> 1) + (colors[1] & 0x010101)) & 0xFFFFFF) | 0xFF000000;
} else {
// 3-color mode with 1-bit alpha
colors[2] = ((((colors[0] & 0xFEFEFE) >> 1) + ((colors[1] & 0xFEFEFE) >> 1)) & 0xFFFFFF) | 0xFF000000;
colors[3] = 0; // Transparent black
}
// Decompress the 4x4 pixel block.
for (int i = 0; i < 16; ++i) {
int x = i % 4;
int y = i / 4;
pDecompressed[x + y * w] = colors[(lookupTable >> (i * 2)) & 0x03];
}
}
// ---
static void (*original_TextureDecoder_decodeAll)(void* self);
static int (*ArrayDeque_isEmpty)(void* deque);
static void* (*ArrayDeque_removeFirst)(void* deque);
static void (*ArrayDeque_addLast)(void* deque, void* element);
#define TASK_QUEUE(self) (&((char*)self)[0x28])
#define TASK_DEVICE(self) (*(VkDevice*)&((char*)self)[0x00])
extern "C"
void my_TextureDecoder_decodeAll(void* self) {
// original_TextureDecoder_decodeAll = (void(*)(TextureDecoder* self)) dlsym(RTLD_NEXT, "TextureDecoder_decodeAll");
if (!ArrayDeque_isEmpty(TASK_QUEUE(self)))
LOGE("In TextureDecoder_decodeAll with %p", original_TextureDecoder_decodeAll);
while (!ArrayDeque_isEmpty(TASK_QUEUE(self))) {
DecodingTask* task = (DecodingTask*) ArrayDeque_removeFirst(TASK_QUEUE(self));
LOGE(" + Task = %p (src=%p, dst=%p)", task, task->data_source, task->image_params);
LOGE(" src->fd = %d", task->data_source->mmap_details->fd);
LOGE(" src->len = %d", task->data_source->mmap_details->length);
LOGE(" src->off = %d", task->data_source->offset);
LOGE(" dst->format = %d", task->image_params->format - 131);
LOGE(" dst->width = %d", task->image_params->width);
LOGE(" dst->height = %d", task->image_params->height);
LOGE(" dst->memory = %p", (void*) task->image_params->memory);
LOGE(" dst->size = %d", task->image_params->size);
void* mappedSrcBase = mmap(
NULL,
task->data_source->mmap_details->length,
PROT_READ,
MAP_SHARED,
task->data_source->mmap_details->fd, 0);
if (mappedSrcBase == MAP_FAILED) {
LOGE("Failed to mmap %d", task->data_source->mmap_details->fd);
continue;
}
const uint8_t* compressedData = (const uint8_t*)mappedSrcBase + task->data_source->offset;
LOGE("Starting vkMapMemory at %p with device=%p, ", &vkMapMemory, TASK_DEVICE(self));
// Map the destination image's memory to get a CPU-accessible pointer
void* mappedDst = nullptr;
VkResult mapResult = vkMapMemory((VkDevice) TASK_DEVICE(self), task->image_params->memory, 0, task->image_params->size, 0, &mappedDst);
if (mapResult == VK_SUCCESS && mappedDst) {
// Determine the block size and decoding function based on the format
// The format values are adjusted by subtracting 0x83 (VK_FORMAT_BC1_RGB_UNORM_BLOCK)
uint32_t format_id = task->image_params->format - 0x83;
uint32_t block_size = 16;
if (format_id < 4) {
block_size = 8; // BC1
}
if (format_id == 8 || format_id == 9) {
block_size = 8; // BC4
}
char buffer [20001];
memset(buffer, 0, 20001);
int max = 10000;
if (task->image_params->width * task->image_params->height * block_size < max)
max = task->image_params->width * task->image_params->height * block_size;
for(int j = 0; j < max; j++)
sprintf(&buffer[2*j], "%02X", compressedData[j]);
LOGE("Mapping succeeded: src(%d)=`%s`", max, buffer);
// Loop over the image in 4x4 blocks
for (int y = 0; y < task->image_params->height; y += 4) {
for (int x = 0; x < task->image_params->width; x += 4) {
// Calculate pointer to the destination 4x4 block
void *dstPixelBlock =
(uint8_t *) mappedDst + (y * task->image_params->width * 4) + (x * 4);
switch (format_id) {
case 0: // BC1_RGB_UNORM_BLOCK
case 1: // BC1_RGB_SRGB_BLOCK
case 2: // BC1_RGBA_UNORM_BLOCK
case 3: // BC1_RGBA_SRGB_BLOCK
DecodeBC1(compressedData, dstPixelBlock, task->image_params->width);
break;
case 4: // BC2_UNORM_BLOCK
case 5: // BC2_SRGB_BLOCK
bcdec_bc2(compressedData, dstPixelBlock,
task->image_params->width * 4);
break;
case 6: // BC3_UNORM_BLOCK
case 7: // BC3_SRGB_BLOCK
bcdec_bc3(compressedData, dstPixelBlock, task->image_params->width * 4);
break;
case 8: // BC4_UNORM_BLOCK
case 9: // BC4_SNORM_BLOCK
bcdec_bc4(compressedData, dstPixelBlock,
task->image_params->width * 4, format_id == 9);
break;
case 10: // BC5_UNORM_BLOCK
case 11: // BC5_SNORM_BLOCK
bcdec_bc5(compressedData, dstPixelBlock,
task->image_params->width * 4, format_id == 11);
break;
default:
// Unknown/unsupported format, do nothing.
break;
}
// Advance the source pointer to the next block
compressedData += block_size;
}
}
}
vkUnmapMemory((VkDevice) TASK_DEVICE(self), task->image_params->memory);
munmap(mappedSrcBase, task->data_source->mmap_details->length);
}
original_TextureDecoder_decodeAll(self);
}
// Intercepts vkCreateInstance _between_ Vortek and the underlying libvulkan.so
// and inject a single VK_LAYER_LUNARG_api_dump layer
// TODO: this assumes that the application does not use any layers, need to make this more robust
extern "C"
VkResult my_vkCreateInstance(
VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance) {
__android_log_print(ANDROID_LOG_ERROR, TAG, "Inside of my_vkCreateInstance.");
void* libVulkan = dlopen("libvulkan.so", RTLD_NOW);
PFN_vkCreateInstance original_vkCreateInstance_ptr = (PFN_vkCreateInstance)dlsym(libVulkan, "vkCreateInstance");
// Enable just the Khronos validation layer.
static const char *layers[] = {"VK_LAYER_LUNARG_api_dump"};
// Get the layer count using a null pointer as the last parameter.
uint32_t instance_layer_present_count = 0;
vkEnumerateInstanceLayerProperties(&instance_layer_present_count, nullptr);
// Enumerate layers with a valid pointer in the last parameter.
VkLayerProperties layer_props[instance_layer_present_count];
vkEnumerateInstanceLayerProperties(&instance_layer_present_count, layer_props);
for (const VkLayerProperties layerProperties : layer_props) {
__android_log_print(ANDROID_LOG_ERROR, TAG, "Layer found: %s\n%s", layerProperties.layerName, layerProperties.description);
}
// Make sure selected validation layers are available.
VkLayerProperties *layer_props_end = layer_props + instance_layer_present_count;
for (const char* layer:layers) {
// TODO: make this modification conditional on this find, e.g. to support arbitrary layers
// in /data/data/com.winlator/ as well
assert(layer_props_end !=
std::find_if(layer_props, layer_props_end, [layer](VkLayerProperties layerProperties) {
return strcmp(layerProperties.layerName, layer) == 0;
}));
}
// Set the validation layer
// TODO: inherit the existing layers
char value[92];
int enabled = __system_property_get("debug.vk.enable", value);
pCreateInfo->enabledLayerCount = enabled ? 1 : 0;
pCreateInfo->ppEnabledLayerNames = layers;
return original_vkCreateInstance_ptr(pCreateInfo, pAllocator, pInstance);
}
void* findLibraryBase(const std::string& library_name) {
std::ifstream maps_file("/proc/self/maps");
if (!maps_file.is_open()) {
std::cerr << "Error: Could not open /proc/self/maps" << std::endl;
return nullptr;
}
std::string line;
while (std::getline(maps_file, line)) {
// Check if the line contains the library name
if (line.find(library_name) != std::string::npos) {
LOGE("Map: %s", line.c_str());
// A typical line looks like:
// 7b1edc6000-7b1edc7000 r--p 00000000 103:0c 536 /path/to/lib.so
uintptr_t base_address;
// Use sscanf to parse the starting address
if (sscanf(line.c_str(), "%lx-%*lx", &base_address) == 1) {
return (void*) base_address;
}
}
}
return nullptr;
}
void* find_vkCreateInstance_ptr(void* libvortekrenderer) {
void* wrapper_base = dlsym(libvortekrenderer, "vulkanWrapper");
if (!wrapper_base) {
__android_log_print(ANDROID_LOG_ERROR, TAG, "Could not find the Vortek cached Vulkan functions table.");
return nullptr;
}
// The vkCreateInstance pointer is at offset 0x18 from wrapper base
// e388: str x0, [x23, #0x18] ; stores the vkCreateInstance symbol from libvulkan at +0x18
return (void*)((char*)wrapper_base + 0x18);
}
typedef long (*CREATE_VK_CONTEXT_FUNC)(JNIEnv* env, jobject thiz, int fd, jobject options);
extern "C"
long VK_CREATE_INSTANCE_JNI(JNIEnv* env, jobject thiz, int fd, jobject options);
JNIEXPORT long VK_CREATE_INSTANCE_JNI(JNIEnv* env, jobject thiz, int fd, jobject options){
__android_log_print(ANDROID_LOG_ERROR, TAG, "Inside of VortekRendererComponent::createVkContext.");
void* libvortekrenderer = dlopen("libvortekrenderer.so", RTLD_NOW);
ArrayDeque_isEmpty = (int (*)(void* deque)) dlsym(libvortekrenderer, "ArrayDeque_isEmpty");
ArrayDeque_removeFirst = (void* (*)(void* deque)) dlsym(libvortekrenderer, "ArrayDeque_removeFirst");
ArrayDeque_addLast = (void (*)(void* deque, void* element)) dlsym(libvortekrenderer, "ArrayDeque_addLast");
// Patch the TextureDecoder_decodeAll, which is at +0x3bb30 from the start of the image
char* base_addr = (char*) findLibraryBase("libvortekrenderer.so");
LOGE("Got base_addr = %p", base_addr);
void** got_entry = (void**) &base_addr[0x3bb30];
original_TextureDecoder_decodeAll = (void(*)(void* self)) *got_entry;
long page_size = sysconf(_SC_PAGESIZE);
if (page_size == -1) {
perror("sysconf");
return 0;
}
// 2. Calculate the page start address
// This is equivalent to (TARGET_ADDR / page_size) * page_size
void *page_start = (void *)(((uintptr_t) got_entry) & ~(page_size - 1));
mprotect(page_start, page_size, PROT_READ | PROT_WRITE);
*got_entry = (void*) my_TextureDecoder_decodeAll;
// Call the original VortekRendererComponent::createVkContext first to set up the vulkanWrapper pointers
CREATE_VK_CONTEXT_FUNC original = (CREATE_VK_CONTEXT_FUNC) dlsym(libvortekrenderer, "old_" VK_CREATE_INSTANCE_JNI_STR);
long result = original(env, thiz, fd, options);
// Calculate the actual address of the cache vkCreateInstance pointer within Vortek
PFN_vkCreateInstance* vkCreateInstance_ptr = (PFN_vkCreateInstance*) find_vkCreateInstance_ptr(libvortekrenderer);
if (*vkCreateInstance_ptr != (PFN_vkCreateInstance)&my_vkCreateInstance) {
__android_log_print(ANDROID_LOG_ERROR, TAG, "Patching from %p to %p.", vkCreateInstance_ptr, &my_vkCreateInstance);
*vkCreateInstance_ptr = (PFN_vkCreateInstance) &my_vkCreateInstance;
}
// Return the vkContext ptr result
dlclose(libvortekrenderer);
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment