Created
April 16, 2024 16:15
-
-
Save 5ec1cff/bb723ade25ee53bae579756a827b7b38 to your computer and use it in GitHub Desktop.
Find zip entry by offset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <fcntl.h> | |
#include <sys/mman.h> | |
#include <unistd.h> | |
#include <cstring> | |
#include <cerrno> | |
#include <string> | |
#include <utility> | |
struct EntryInfo { | |
std::string name = ""; | |
off_t offset = -1; | |
size_t size = 0; | |
}; | |
#define LOGD(fmt, ...) printf(fmt "\n" __VA_OPT__(,) __VA_ARGS__) | |
#define LOGE LOGD | |
#define PLOGE(fmt, ...) printf(fmt " failed with %d %s", __VA_ARGS__ __VA_OPT__(,) errno, strerror(errno)) | |
class UniqueFd { | |
using Fd = int; | |
public: | |
UniqueFd() = default; | |
UniqueFd(Fd fd) : fd_(fd) {} | |
~UniqueFd() { if (fd_ >= 0) close(fd_); } | |
// Disallow copy | |
UniqueFd(const UniqueFd &) = delete; | |
UniqueFd &operator=(const UniqueFd &) = delete; | |
// Allow move | |
UniqueFd(UniqueFd &&other) { std::swap(fd_, other.fd_); } | |
UniqueFd &operator=(UniqueFd &&other) { | |
std::swap(fd_, other.fd_); | |
return *this; | |
} | |
// Implict cast to Fd | |
operator const Fd &() const { return fd_; } | |
private: | |
Fd fd_ = -1; | |
}; | |
struct [[gnu::packed]] LFH { | |
uint32_t signature; | |
uint8_t unused1[22]; | |
uint16_t file_name_len; | |
uint16_t extra_field_len; | |
uint32_t total_size() const { | |
return sizeof(LFH) + file_name_len + extra_field_len; | |
} | |
bool valid() const { | |
return signature == 0x04034b50; | |
} | |
}; | |
struct [[gnu::packed]] CDFH { | |
uint32_t signature; | |
uint8_t unused1[16]; | |
uint32_t comp_size; | |
uint32_t uncomp_size; | |
uint16_t file_name_len; | |
uint16_t extra_field_len; | |
uint16_t file_comment_len; | |
uint8_t unused2[8]; | |
uint32_t offset_of_local_header; | |
uint32_t total_size() const { | |
return sizeof(CDFH) + file_name_len + extra_field_len + file_comment_len; | |
} | |
std::string file_name() const { | |
auto s = std::string(file_name_len + 1, '\0'); | |
memcpy(s.data(), reinterpret_cast<const char *>(this) + sizeof(CDFH), file_name_len); | |
LOGD("file name len %d s=%s", file_name_len, s.c_str()); | |
return s; | |
} | |
bool valid() const { | |
return signature == 0x02014b50; | |
} | |
}; | |
EntryInfo findEntryByOffset(const char *path, off_t offset) { | |
UniqueFd fd = open(path, O_RDONLY | O_CLOEXEC); | |
if (fd == -1) { | |
PLOGE("failed to open"); | |
return {}; | |
} | |
auto end = lseek(fd, -2, SEEK_END); | |
if (end == -1) { | |
PLOGE("seek"); | |
return {}; | |
} | |
CDFH *cdfhs = nullptr; | |
uint8_t *real_addr = nullptr; | |
size_t real_size; | |
struct [[gnu::packed]] { | |
uint8_t unused[6]; | |
uint16_t cdfhs_count; | |
uint32_t cdfhs_size; | |
uint32_t cdfhs_off; | |
} cdfh_info; | |
for (uint16_t i = 0; i <= 65535 && (end - i) >= 22; i++) { | |
if (lseek(fd, end - i, SEEK_SET) == -1) { | |
PLOGE("lseek end-i"); | |
return {}; | |
} | |
uint16_t size; | |
if (read(fd, &size, sizeof(size)) == -1) { | |
PLOGE("failed to read"); | |
return {}; | |
} | |
if (size == i) { | |
uint32_t signature; | |
if (lseek(fd, -22, SEEK_CUR) == -1) { | |
PLOGE("lseek -22"); | |
return {}; | |
} | |
if (read(fd, &signature, sizeof(signature)) == -1) { | |
PLOGE("read sig"); | |
return {}; | |
} | |
if (signature == 0x06054b50) { | |
LOGD("found EOCD at offset -%d", i); | |
if (read(fd, &cdfh_info, sizeof(cdfh_info)) == -1) { | |
PLOGE("read cdfh info"); | |
return {}; | |
} | |
auto page_size = sysconf(_SC_PAGE_SIZE); | |
auto real_off = (cdfh_info.cdfhs_off / page_size) * page_size; | |
auto bias = cdfh_info.cdfhs_off - real_off; | |
real_size = cdfh_info.cdfhs_size + bias; | |
LOGD("cdfh count=%u size=%u off=%u real_off=%lu real_size=%zu", | |
cdfh_info.cdfhs_count, cdfh_info.cdfhs_size, cdfh_info.cdfhs_off, real_off, | |
real_size); | |
real_addr = reinterpret_cast<uint8_t *>(mmap(nullptr, real_size, PROT_READ, | |
MAP_PRIVATE, fd, real_off)); | |
if (real_addr == reinterpret_cast<uint8_t *>(-1)) { | |
PLOGE("mmap"); | |
return {}; | |
} | |
cdfhs = reinterpret_cast<CDFH *>(real_addr + bias); | |
LOGD("mmap addr %p, cdfh addr %p", real_addr, cdfhs); | |
break; | |
} | |
} | |
} | |
if (cdfhs == nullptr) { | |
LOGE("cdfhs not found"); | |
return {}; | |
} | |
auto current = cdfhs; | |
LFH lfh; | |
std::string file_name; | |
off_t file_off = -1; | |
size_t file_size = 0; | |
for (auto i = 0; i < cdfh_info.cdfhs_count && current < cdfhs + cdfh_info.cdfhs_size; i++) { | |
if (!current->valid()) { | |
LOGE("cdfh %i at %p is invalid (signature=%d)!", i, current, current->signature); | |
// pause(); | |
break; | |
} | |
if (pread(fd, &lfh, sizeof(lfh), current->offset_of_local_header) == -1) { | |
PLOGE("read lfh"); | |
break; | |
} | |
if (!lfh.valid()) { | |
LOGE("lfh %i at %u is invalid!", i, current->offset_of_local_header); | |
break; | |
} | |
auto data_off = current->offset_of_local_header + lfh.total_size(); | |
if (offset >= data_off && offset < data_off + current->comp_size) { | |
file_name = current->file_name(); | |
file_off = data_off; | |
file_size = current->uncomp_size; | |
LOGD("found at entry %i name %s offset=%lu size=%zu comp_size=%zu", i, | |
file_name.c_str(), file_off, current->comp_size, file_size); | |
break; | |
} | |
current = reinterpret_cast<CDFH *>(reinterpret_cast<uint8_t *>(current) + | |
current->total_size()); | |
} | |
if (file_off == -1) { | |
LOGE("no file found at file offset %ld", offset); | |
} | |
if (munmap(real_addr, real_size) == -1) { | |
PLOGE("munmap"); | |
} | |
LOGD("name=%s off=%ld size=%zu", file_name.c_str(), file_off, file_size); | |
return {file_name, file_off, file_size}; | |
} | |
int main(int argc, char **argv) { | |
if (argc < 3) { | |
printf("usage: <file> <off>\n"); | |
return 1; | |
} | |
auto file = argv[1]; | |
auto off = strtol(argv[2], nullptr, 0); | |
findEntryByOffset(file, off); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment