Skip to content

Instantly share code, notes, and snippets.

@namandixit
Last active April 8, 2025 11:55
Show Gist options
  • Save namandixit/22d61e7e416f7e4637730d3e5ff2a479 to your computer and use it in GitHub Desktop.
Save namandixit/22d61e7e416f7e4637730d3e5ff2a479 to your computer and use it in GitHub Desktop.
Header Prime: The first header everyone should include in their C project (if they are smart, that is)
/*
* Creator: Naman Dixit
* Notice: © Copyright 2024 Naman Dixit
* License: BSD Zero Clause License
* SPDX: 0BSD (https://spdx.org/licenses/0BSD.html)
*/
#if !defined(STD_H_INCLUDE_GUARD)
/* Compiler **************************************************************************/
#if defined(_MSC_VER)
# if defined(__clang__)
# define ENV_COMPILER_CLANG
# define ENV_COMPILER_CLANG_WITH_MSVC
# else
# define ENV_COMPILER_MSVC
# endif
#elif defined (__GNUC__)
# if defined(__clang__)
# define ENV_COMPILER_CLANG
# define ENV_COMPILER_CLANG_WITH_GCC
# else
# define ENV_COMPILER_GCC
# endif
#elif defined(__clang__)
# define ENV_COMPILER_CLANG
#else
# error Compiler not supported
#endif
/* Operating System ******************************************************************/
#if defined(_WIN32)
# define ENV_OS_WINDOWS
#elif defined(__linux__)
# define ENV_OS_LINUX
#else
# error Operating system not supported
#endif
/* CPU Architecture ******************************************************************/
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC)
# if defined(_M_IX86)
# define ENV_ARCH_X86
# elif defined(_M_X64)
# define ENV_ARCH_X64
# endif
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC)
# if defined(__i386__)
# define ENV_ARCH_X86
# elif defined(__x86_64__)
# define ENV_ARCH_X64
# endif
#endif
#if !defined(ENV_ARCH_X64) // && !defined(ENV_ARCH_X86)
# error Architecture not supported
#endif
/* Word Bit-width ********************************************************************/
#if defined(ENV_ARCH_X86)
# define ENV_BITWIDTH_32
#elif defined(ENV_ARCH_X64)
# define ENV_BITWIDTH_64
#else
# error "Bitwidth not supported"
#endif
/* Programming Language **************************************************************/
#if defined(ENV_COMPILER_MSVC)
# if defined(__cplusplus)
# if __cplusplus == 199711L
# define ENV_LANG_CPP 1998
# elif __cplusplus == 201402L
# define ENV_LANG_CPP 2014
# elif __cplusplus == 201703L
# define ENV_LANG_CPP 2017
# elif __cplusplus == 202002L
# define ENV_LANG_CPP 2020
# else
# define ENV_LANG_CPP 2017 // A future C++, assume the last best supported
# endif
# elif defined(__STDC_VERSION__)
# if (__STDC_VERSION__ == 201112L) || (__STDC_VERSION__ == 201710L)
# define ENV_LANG_C 2011
# elif (__STDC_VERSION__ == 202311L) || (__STDC_VERSION__ == 202312L) // 202312L is with /std:clatest before official release of C23 support
# define ENV_LANG_C 2023
# else
# define ENV_LANG_C 2011 // Earliest C version for which MSVC supports __STDC_VERSION__
# endif
# elif defined(__STDC__) // All Microsoft extensions are off [/Za (Disable Language Extensions), similar to pedantic]
# define ENV_LANG_C 1989
# else // /Za (Disable Language Extensions) is not provided, but MS never properly supported C99.
# define ENV_LANG_C 1989
# endif
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC)
# if defined(__cplusplus)
# if __cplusplus == 199711L
# define ENV_LANG_CPP 1998
# elif __cplusplus == 201103L
# define ENV_LANG_CPP 2011
# elif __cplusplus == 201402L
# define ENV_LANG_CPP 2014
# elif __cplusplus == 201703L
# define ENV_LANG_CPP 2017
# elif __cplusplus == 202002L
# define ENV_LANG_CPP 2020
# elif __cplusplus == 202302L
# define ENV_LANG_CPP 2023
# else
# define ENV_LANG_CPP 2017 // A future C++, assume the last best supported
# endif
# elif defined(__STDC_VERSION__) // Using C Language >= 1994 (1989)
# if (__STDC_VERSION__ == 199409L)
# define ENV_LANG_C 1989
# elif (__STDC_VERSION__ == 199901L)
# define ENV_LANG_C 1999
# elif (__STDC_VERSION__ == 201112L) || (__STDC_VERSION__ == 201710L)
# define ENV_LANG_C 2011
# elif (__STDC_VERSION__ == 202311L)
# define ENV_LANG_C 2023
# else
# define ENV_LANG_C 1999 // At least C99 (__STDC_VERSION__ is defined, C94 is too old to fallback on)
# endif
# elif defined(__STDC__) && !defined(__STDC_VERSION__)
# define ENV_LANG_C 1989 // Since C89 doesn't require definition of __STDC_VERSION__
# endif
#endif
#if !defined(ENV_LANG_C) && !defined(ENV_LANG_CPP)
# error Language not supported
#endif
/* Endianness ************************************************************************/
#if defined(ENV_OS_WINDOWS)
# if defined(ENV_ARCH_X86) || defined(ENV_ARCH_X64)
# define ENV_ENDIAN_LITTLE
# else
# error Could not determine endianness on Windows
# endif
#elif defined(ENV_OS_LINUX)
# include <endian.h>
# if __BYTE_ORDER == __LITTLE_ENDIAN
# define ENV_ENDIAN_LITTLE
# elif __BYTE_ORDER == __BIG_ENDIAN
# define ENV_ENDIAN_BIG
# else
# error Could not determine endianness on Linux
# endif
#else
# error Can not determine endianness, unknown environment
#endif
/* Constants *************************************************************************/
#define KiB(x) ( (x) * 1024ULL)
#define MiB(x) (KiB(x) * 1024ULL)
#define GiB(x) (MiB(x) * 1024ULL)
#define TiB(x) (GiB(x) * 1024ULL)
#define HUNDRED 100L
#define THOUSAND 1000L
#define MILLION 1000000L
#define BILLION 1000000000L
/* Attributes ************************************************************************/
//#define macro_gensym_uniq(prefix) macro_gensym2_(prefix, __COUNTER__) // Commented since I am going to use __COUNTER__ for profile events
#define macro_gensym_line(prefix) macro_gensym2_(prefix, __LINE__)
#define macro_gensym_func(prefix) macro_gensym2_(prefix, __func__)
#define macro_gensym_file(prefix) macro_gensym2_(prefix, __FILE__)
#if defined(ENV_LANG_C) && ((ENV_LANG_C < 2023) || (defined(ENV_COMPILER_MSVC))) // Remove when MSVC properly supports C23
# define nullptr NULL
#endif
#if (defined(ENV_LANG_C) && ENV_LANG_C == 2011)
# define static_assert(...) _Static_assert(__VA_ARGS__)
#elif (defined(ENV_LANG_C) && ENV_LANG_C < 2011)
# if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
# define static_assert__HELPER(expr, msg) \
(!!sizeof (struct { unsigned int macro_gensym_line(STATIC_ASSERTION__): (expr) ? 1 : -1; }))
# define static_assert(expr, msg) \
extern int (*assert_function__(void)) [static_assert__HELPER(expr, msg)]
# elif defined(ENV_COMPILER_MSVC)
# define static_assert(expr, msg) \
extern char macro_gensym_line(STATIC_ASSERTION__1__)[1]; extern char macro_gensym_line(STATIC_ASSERTION__2__)[(expr)?1:2]
# endif
#endif
#define global_variable static
#define global_immutable static const
#define persistent_value static
#define internal_function static
#define header_function static inline
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC)
# if defined(BUILD_DLL)
# define exported_function __declspec(dllexport)
# else
# define exported_function __declspec(dllimport)
# endif
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
# define exported_function __attribute__((visibility("default")))
#endif
#if defined(ENV_COMPILER_MSVC) || defined(ENV_COMPILER_CLANG_WITH_MSVC)
# define exported_data __declspec(dllexport)
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
# define exported_data __attribute__((visibility("default")))
#endif
#if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
#define packed_data(...) __VA_ARGS__ __attribute__((__packed__))
#elif defined(ENV_COMPILER_MSVC)
#define packed_data(...) __pragma( pack(push, 1) ) __VA_ARGS__ __pragma( pack(pop))
#endif
#if defined(__has_c_attribute)
# if __has_c_attribute(fallthrough)
# define switch_fallthrough [[fallthrough]]
# endif
#elif defined(__cplusplus) && defined(__has_cpp_attribute)
# if __has_cpp_attribute(fallthrough)
# define switch_fallthrough [[fallthrough]]
# endif
#endif
#if !defined(switch_fallthrough)
# if defined(ENV_COMPILER_GCC) && __GNUC__ >= 7
# define switch_fallthrough __attribute__ ((fallthrough))
# elif defined(ENV_COMPILER_CLANG) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 9))
# define switch_fallthrough __attribute__ ((fallthrough))
# else
# define switch_fallthrough
# endif
#endif
/* Macros ****************************************************************************/
#define elemin(array) (sizeof(array)/sizeof((array)[0]))
// Type-casts
/* If in doubt, first attempt cast_val and then cast_mem.
* For cast_mem, the `m` should be a variable (something which is in memory) and not just an expression.
*/
#if defined(ENV_LANG_C)
# define cast_mem(T, m) (*((T*)(&(m))))
# define cast_val(T, v) ((T)(v))
# define cast_const(T, v) ((T)(v))
#elif defined(ENV_LANG_CPP)
# define cast_mem(T, m) (reinterpret_cast<T>(m))
# define cast_val(T, v) (static_cast<T>(v))
# define cast_const(T, v) (const_cast<T>(v))
#endif
// Compound Literal
#if defined(ENV_LANG_C)
# define compound_literal(T, ...) (T) __VA_ARGS__
#elif defined(ENV_LANG_CPP)
# define compound_literal(T, ...) T __VA_ARGS__
#endif
#define containerof(ptr, type, member) (cast_val(type*, cast_val(void*, (cast_mem(Byte*, ptr)) - offsetof(type, member))))
#define unused_variable(var) (void)var
// Likely/unlikely for better branch prediction
#if defined(ENV_COMPILER_MSVC)
# define likely(x) (x)
# define unlikely(x) (x)
#elif defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC)
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// Defer-like macros
#define macro_gensym2_(prefix, suffix) macro_gensym_cat_(prefix, suffix)
#define macro_gensym_cat_(prefix, suffix) prefix ## suffix
#define macro_entail(...) \
goto macro_gensym_line(jump_to_else); \
\
while (true) \
if (true) { \
__VA_ARGS__; \
break; \
} else macro_gensym_line(jump_to_else):
#define macro_envelop(cloak_arg_pre_action, cloak_arg_post_action) \
cloak_arg_pre_action; \
goto macro_gensym_line(jump_to_else); \
\
while (true) \
if (true) { \
cloak_arg_post_action; \
break; \
} else macro_gensym_line(jump_to_else):
/* Error handling macros
*
* Sample Code:
*
* T *ta = nullptr; *tb = nullptr;
* err_try(doing_stuff) {
* ta = tCreate();
* if (!ta) err_throw(doing_stuff);
* tb = tCreate();
* if (!tb) err_throw(doing_stuff);
* stuffDo(ta, tb);
* } err_catch(doing_stuff) {
* if (ta) tDelete(ta);
* if (tb) tDelete(tb);
* }
*/
#define err_try(fail) Bool fail = false; do
#define err_throw(fail) {fail=true;break;}(void)0
#define err_catch(fail) while (0); if (fail)
#define eat_semicolon() static_assert(1, "")
/* Less noisy pragmas */
#if defined(ENV_COMPILER_CLANG)
# define pragma_msvc(P) eat_semicolon()
# define pragma_clang(P) _Pragma(P) eat_semicolon()
# define pragma_gcc(P) eat_semicolon()
#elif defined(ENV_COMPILER_GCC)
# define pragma_msvc(P) eat_semicolon()
# define pragma_clang(P) eat_semicolon()
# define pragma_gcc(P) _Pragma(P) eat_semicolon()
#elif defined(ENV_COMPILER_MSVC)
# define pragma_msvc(P) _Pragma(P) eat_semicolon()
# define pragma_clang(P) eat_semicolon()
# define pragma_gcc(P) eat_semicolon()
#endif
/* Compiler-specific tokens */
#if defined(ENV_COMPILER_CLANG)
# define with_msvc(P)
# define with_clang(P) P
# define with_gcc(P)
#elif defined(ENV_COMPILER_GCC)
# define with_msvc(P)
# define with_clang(P)
# define with_gcc(P) P
#elif defined(ENV_COMPILER_MSVC)
# define with_msvc(P) P
# define with_clang(P)
# define with_gcc(P)
#endif
#define with_clang_gcc(P) with_clang(P) with_gcc(P)
#define with_clang_msvc(P) with_clang(P) with_msvc(P)
#if defined(ENV_LANG_C) && ENV_LANG_C >= 1999
# define static_array_size(size) with_clang_gcc(static size)
#else
# define static_array_size(size) (size)
#endif
/* Disable all warnings in headers */
#define disable_warnings() \
pragma_clang("clang diagnostic push"); \
pragma_clang("clang diagnostic ignored \"-Weverything\""); \
pragma_msvc("warning ( push , 0 )")
#define enable_warnings() \
pragma_clang("clang diagnostic pop"); \
pragma_msvc("warning ( pop )")
#define allow_padding_in_type(...) \
pragma_msvc("warning ( push )"); \
pragma_msvc("warning ( disable: 4820 )"); \
__VA_ARGS__ \
pragma_msvc("warning ( pop )");
/* Stringify contents of a macro */
#define stringify(a) stringify_helper_(a)
#define stringify_helper_(a) #a
/* Header Files **********************************************************************/
// Function-less C89 headers
#include <float.h>
#include <limits.h>
#include <stdarg.h>
#include <stddef.h>
// Function-less C99 headers
#if (defined(ENV_LANG_C) && (ENV_LANG_C >= 1999)) || (defined(ENV_LANG_CPP) && (ENV_LANG_CPP >= 2011))
# include <inttypes.h>
# include <stdbool.h>
# include <stdint.h>
#endif
// Function-less C11 headers
// stdalign
#if (defined(ENV_LANG_C) && (ENV_LANG_C >= 2011)) || (defined(ENV_LANG_CPP) && (ENV_LANG_CPP >= 2011))
# include <stdalign.h>
#else
# if defined(ENV_COMPILER_MSVC)
//# define alignof(...) __alignof(__VA_ARGS__)
// NOTE(naman): MSCV doesn't define max_align_t even in C11 and higher, so it is handled below and not here.
# else
# define alignof(...) __alignof__(__VA_ARGS__)
typedef struct max_align_t {
char align_buf[__BIGGEST_ALIGNMENT__] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
} max_align_t;
# endif
#endif
// FIXME(naman): MSVC doesn't seem to define max_align_t for C11 code, so this will have to suffice for now.
// For allocation alignments, see docs.microsoft.com/en-us/cpp/c-runtime-library/reference/malloc?view=msvc-160
#if defined(ENV_COMPILER_MSVC)
# if defined(ENV_LANG_C)
# if defined(ENV_ARCH_x86)
typedef struct { double a; } max_align_t; // 8-bytes aligned (docs.microsoft.com/en-us/cpp/c-runtime-library/reference/malloc?view=msvc-160)
static_assert(alignof(max_align_t) == 8, "Alignment of max_align_t is not 8");
# elif defined(ENV_ARCH_X64)
# include <xmmintrin.h>
typedef struct { __m128 sse; } max_align_t; // 16-byte aligned (docs.microsoft.com/en-us/cpp/cpp/m128?view=msvc-160)
static_assert(alignof(max_align_t) == 16, "Alignment of max_align_t is not 16");
# endif
# elif defined(ENV_LANG_CPP)
# include <cstddef>
typedef std::max_align_t max_align_t;
# endif
#endif
#if defined(ENV_ARCH_X64) || defined(ENV_ARCH_X86)
# include <pmmintrin.h> // SSE3, 100% support as per Steam Hardware Survey
#endif
#if defined(ENV_COMPILER_CLANG) || defined(ENV_COMPILER_GCC)
# if BUILD_INTERNAL && (__has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__))
disable_warnings();
# define _DISABLE_STL_ANNOTATION
# include <sanitizer/asan_interface.h>
# define ENV_SANITIZER_ADDRESS
enable_warnings();
# endif
#endif
/* Primitive Types *******************************************************************/
typedef int Sint;
typedef unsigned int Uint;
#include <stdint.h>
typedef int8_t Sint8;
typedef int16_t Sint16;
typedef int32_t Sint32;
typedef int64_t Sint64;
typedef uint8_t Uint8;
typedef uint16_t Uint16;
typedef uint32_t Uint32;
typedef uint64_t Uint64;
typedef float Float32;
typedef double Float64;
typedef size_t Size;
#if defined(ENV_OS_LINUX)
typedef ssize_t SSize;
#elif defined(ENV_OS_WINDOWS)
# include <basetsd.h>
typedef SSIZE_T SSize;
#endif
typedef uintptr_t Uptr;
typedef intptr_t Sptr;
typedef ptrdiff_t Dptr;
typedef unsigned char Byte;
typedef char Char;
#if defined(ENV_LANG_C)
# if ENV_LANG_C >= 1999
typedef _Bool Bool;
# else
typedef char Bool;
# endif
#elif defined(ENV_LANG_CPP)
typedef bool Bool;
#endif
// NOTE(naman): We define our own true & false because by default, they are defined as 1 & 0 and that doesn't play well with _Generic.
pragma_clang("clang diagnostic push");
pragma_clang("clang diagnostic ignored \"-Wkeyword-macro\"");
#if defined(ENV_LANG_C)
# undef true
# define true ((Bool)+1)
# undef false
# define false ((Bool)+0)
#endif
pragma_clang("clang diagnostic pop");
/* Helper Stuff **********************************************************************/
#if defined(ENV_LANG_C)
# define maximum(x, y) ((x) > (y) ? (x) : (y))
# define minimum(x, y) ((x) < (y) ? (x) : (y))
#elif defined(ENV_LANG_CPP)
template<typename T>
const T& maximum (const T& a, const T& b)
{
return (a < b) ? b : a;
}
template<typename T>
const T& minimum (const T& a, const T& b)
{
return (a > b) ? b : a;
}
#endif
#if defined(ENV_OS_WINDOWS)
# if defined(ENV_LANG_CPP)
extern "C" {
# endif
void * __cdecl memset(void *, Sint, Size); _Pragma("intrinsic(memset)")
void * __cdecl memcpy(void *, const void *, Size); _Pragma("intrinsic(memcpy)")
int __cdecl memcmp(const void*, const void*, Size); _Pragma("intrinsic(memcmp)")
Size __cdecl strlen(const char*); _Pragma("intrinsic(strlen)")
# if defined(ENV_LANG_CPP)
}
# endif
#else
# include <string.h>
#endif
#define memzero(x) memset(&(x), 0, sizeof(x))
#if defined(BUILD_INTERNAL)
# if defined(ENV_OS_WINDOWS)
# define breakpoint() __debugbreak()
# elif defined(ENV_OS_LINUX)
# if defined(ENV_ARCH_X86) || defined(ENV_ARCH_X64)
# if defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
# define breakpoint() __asm__ volatile("int $0x03")
# endif // !GCC && !Clang
# endif // !x86 && !x64
# endif // !window && !linux
#else
# define breakpoint() do {} while (0)
#endif
#define debugBreak() breakpoint()
#define debugAssert(...) do { if (!(__VA_ARGS__)) { debugBreak(); } } while (0)
#if defined(BUILD_INTERNAL)
global_immutable Bool debug_unreachable_code_executed = false;
# define debugUnreachable() debugAssert(debug_unreachable_code_executed == true)
#else
# if defined(ENV_COMPILER_MSVC)
# define debugUnreachable() __assume(false)
# elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
# define debugUnreachable() __builtin_unreachable()
# endif
#endif
/* *********************************************************************************
* Base 32 Encoding
* ****************
* Inspired by Crockford's Base 32 encoding (https://www.crockford.com/base32.html)
*
* Changes:
*
* 1. U/u is decoded to the same integer as V/v.
* When encoding, v is the right value to encode to.
*
* 2. For the extra check symbols, use - . _ ~ u, since
* these are the symbols considered unreserved in URLs
* See section 2.3 of https://www.ietf.org/rfc/rfc3986.txt
*
* (Explaination: If checksum is required, the integer
* value of the string is divided by 37 (the next prime
* after 32) and the encoded remainder is appended after
* the string. Since the value will be <= 37, five extra
* symbols are required).
*
* 3. The encoded characters are lower-case, not upper case
* since upper case looks like screaming. Also, this way,
* alphabets and numbers are a bit more distinguishable.
* And it reads like r0x0ring h4x0r 13375p34k, n00bs4uc3!
* (Obligatory: https://megatokyo.com/strip/9)
*/
header_function
Uint8 b32DecodeChar (Char code) // Character to integer
{
switch (code) {
case 'O': case 'o':
case '0': return 0;
case 'I': case 'i':
case 'L': case 'l':
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'A': case 'a': return 10;
case 'B': case 'b': return 11;
case 'C': case 'c': return 12;
case 'D': case 'd': return 13;
case 'E': case 'e': return 14;
case 'F': case 'f': return 15;
case 'G': case 'g': return 16;
case 'H': case 'h': return 17;
case 'J': case 'j': return 18;
case 'K': case 'k': return 19;
case 'M': case 'm': return 20;
case 'N': case 'n': return 21;
case 'P': case 'p': return 22;
case 'Q': case 'q': return 23;
case 'R': case 'r': return 24;
case 'S': case 's': return 25;
case 'T': case 't': return 26;
case 'U': case 'u':
case 'V': case 'v': return 27;
case 'W': case 'w': return 28;
case 'X': case 'x': return 29;
case 'Y': case 'y': return 30;
case 'Z': case 'z': return 31;
default: debugUnreachable(); return 0;
}
}
header_function
Uint8 b32DecodeChecksumChar (Char check) // Character to integer
{
switch (check) {
case '-': return 32;
case '.': return 33;
case '_': return 34;
case '~': return 35;
case 'u': return 36;
default: return b32DecodeChar(check);
}
}
header_function
Char b32EncodeChar (Uint8 val) // Integer to character
{
switch (val) {
case 0: return '0'; case 1: return '1'; case 2: return '2'; case 3: return '3';
case 4: return '4'; case 5: return '5'; case 6: return '6'; case 7: return '7';
case 8: return '8'; case 9: return '9'; case 10: return 'a'; case 11: return 'b';
case 12: return 'c'; case 13: return 'd'; case 14: return 'e'; case 15: return 'f';
case 16: return 'g'; case 17: return 'h'; case 18: return 'j'; case 19: return 'k';
case 20: return 'm'; case 21: return 'n'; case 22: return 'p'; case 23: return 'q';
case 24: return 'r'; case 25: return 's'; case 26: return 't'; case 27: return 'v';
case 28: return 'w'; case 29: return 'x'; case 30: return 'y'; case 31: return 'z';
default: debugUnreachable(); return 0;
}
}
header_function
Char b32EncodeChecksumChar (Uint8 val) // Integer to character
{
switch (val) {
case 32: return '-';
case 33: return '.';
case 34: return '_';
case 35: return '~';
case 36: return 'u';
default: return b32EncodeChar(val);
}
}
header_function
Bool b32VerifyInputString (Char *str, Size len, Char checksum)
{
Uint8 mod = 0;
for (Size i = 0; i < len; i++) {
Bool valid = false;
valid |= (str[i] >= 0x30) && (str[i] <= 0x39); // 0-9
valid |= (str[i] >= 0x61) && (str[i] <= 0x68); // a-h
valid |= (str[i] == 0x6a); // j
valid |= (str[i] == 0x6b); // k
valid |= (str[i] == 0x6d); // m
valid |= (str[i] == 0x6e); // n
valid |= (str[i] >= 0x70) && (str[i] <= 0x74); // p-t
valid |= (str[i] >= 0x76) && (str[i] <= 0x7a); // v-z
valid |= (str[i] >= 0x41) && (str[i] <= 0x48); // A-H
valid |= (str[i] == 0x4a); // j
valid |= (str[i] == 0x4b); // k
valid |= (str[i] == 0x4d); // m
valid |= (str[i] == 0x4e); // n
valid |= (str[i] >= 0x50) && (str[i] <= 0x54); // p-t
valid |= (str[i] >= 0x56) && (str[i] <= 0x5a); // v-z
if (!valid) return false;
mod = (mod + b32DecodeChar(str[i])) % 37u;
}
if (checksum) {
Uint8 check_car = b32DecodeChecksumChar(checksum);
Uint8 check_val = mod;
if (check_val != check_car) return false;
}
return true;
}
header_function
Char b32GetChecksumChar (Char *str, Size len)
{
Uint8 mod = 0;
for (Size i = 0; i < len; i++) {
mod = (mod + b32DecodeChar(str[i])) % 37u;
}
Char check = b32EncodeChecksumChar(mod);
return check;
}
/************************************************************************************
* Unicode Processing
*/
// TODO(naman): Go through these security guidelines for safe UTF-8 processing:
// https://security.stackexchange.com/questions/257017/what-are-best-practices-for-handling-user-unicode-in-a-web-application
/*
* First codepoint Last codepoint Byte 1 Byte 2 Byte 3 Byte 4
* U+0000 U+007F 0yyyzzzz
* U+0080 U+07FF 110xxxyy 10yyzzzz
* U+0800 U+FFFF 1110wwww 10xxxxyy 10yyzzzz
* U+010000 U+10FFFF 11110uvv 10vvwwww 10xxxxyy 10yyzzzz
*/
// UTF32 to UTF8
header_function
Size unicodeEncode (Uint32 code, Byte buffer[static_array_size(4)])
{
if (buffer == nullptr) return 0;
if (code <= 0x7F) {
buffer[0] = cast_val(Byte, code); // 0yyyzzzz
return 1;
} else if (code <= 0x7FF) {
buffer[0] = cast_val(Byte, cast_val(Byte, 0xC0) | cast_val(Byte, (code >> 6))); // 110xxxyy
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz
return 2;
} else if (code <= 0xFFFF) {
buffer[0] = cast_val(Byte, cast_val(Byte, 0xE0) | cast_val(Byte, (code >> 12))); // 1110wwww
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 6) & 0x3F))); // 10xxxxyy
buffer[2] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz
return 3;
} else if (code <= 0x10FFFF) {
buffer[0] = cast_val(Byte, cast_val(Byte, 0xF0) | cast_val(Byte, (code >> 18))); // 11110uvv
buffer[1] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 12) & 0x3F))); // 10vvwwww
buffer[2] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, ((code >> 6) & 0x3F))); // 10xxxxyy
buffer[3] = cast_val(Byte, cast_val(Byte, 0x80) | cast_val(Byte, (code & 0x3F))); // 10yyzzzz
return 4;
} else {
return 0; // Invalid codepoint
}
}
// UTF8 to UTF32
header_function
Bool unicodeDecode (const Byte *buffer, Size len, Size *offset, Uint32 *codepoint)
{
if (buffer == nullptr) return 0;
Size temp1 = 0; if (offset == nullptr) offset = &temp1;
Uint32 temp2 = 0; if (codepoint == nullptr) codepoint = &temp2;
Size o = *offset;
len -= o;
*codepoint = 0;
if (len == 0) return false;
Uint32 b0 = buffer[o+0];
*offset += 1;
if (b0 == 0) return true; // nullptr-byte
if ((b0 & 0x80) == 0) { // 0x80 = 10000000
*codepoint = b0;
return true;
}
if (len == 1) return false;
Uint32 b1 = buffer[o+1];
*offset += 1;
if (b1 == 0) return false;
Uint32 BA1 = b1 & 0xC0; // 0xC0 = 11000000
Uint32 BB1 = b1 & 0x3F; // 0x3F = 00111111
if ((b0 & 0xE0) == 0xC0) { // 0xE0 = 11100000 , 0xC0 = 11000000
if (BA1 != 0x80) return false; // 0x80 = 10000000
Uint32 B = b0 & 0x1F; // 0x1F = 00011111
*codepoint = (B << 6) | BB1;
return true;
}
if (len == 2) return false;
Uint32 b2 = buffer[o+2];
*offset += 1;
if (b2 == 0) return false;
Uint32 BA2 = b2 & 0xC0; // 0xC0 = 11000000
Uint32 BB2 = b2 & 0x3F; // 0x3F = 00111111
if ((b0 & 0xF0) == 0xE0) { // 0xF0 = 11110000 , 0xE0 = 11100000
if (BA1 != 0x80) return false; // 0x80 = 10000000
if (BA2 != 0x80) return false; // 0x80 = 10000000
Uint32 B = b0 & 0x0F; // 0x0F = 00001111
*codepoint = (B << 12) | (BB1 << 6) | BB2;
return true;
}
if (len == 3) return false;
Uint32 b3 = buffer[o+3];
*offset += 1;
if (b3 == 0) return false;
Uint32 BA3 = b3 & 0xC0; // 0xC0 = 11000000
Uint32 BB3 = b3 & 0x3F; // 0x3F = 00111111
if ((b0 & 0xF8) == 0xF0) { // 0xF8 = 11111000 , 0xF0 = 11110000
if (BA1 != 0x80) return false; // 0x80 = 10000000
if (BA2 != 0x80) return false; // 0x80 = 10000000
if (BA3 != 0x80) return false; // 0x80 = 10000000
Uint32 B = b0 & 0x07; // 0x07 = 00000111
Uint32 result = (B << 18) | (BB1 << 12) | (BB2 << 6) | BB3;
if (result > 0x10FFFF) return false;
*codepoint = result;
return true;
}
return false;
}
/**********************************************************************
* Bitwise Delight
*/
// NOTE(naman): Bit vectors are supposed to be zero-indexed.
// NOTE(naman): Base type of bit vectors shouldn't have size > 8 bytes (to prevent shift overflow).
#define bitToBytes(b) (((b)+(CHAR_BIT-1))/(CHAR_BIT))
#define bit_ValInBuf(array, index) ((index)/(CHAR_BIT * sizeof(*(array))))
#define bit_BitInVal(array, index) ((index)%(CHAR_BIT * sizeof(*(array))))
#define bitSet(array, index) \
((array)[bit_ValInBuf(array, index)] |= (1LLU << bit_BitInVal(array, index)))
#define bitReset(array, index) \
((array)[bit_ValInBuf(array, index)] &= ~(1LLU << bit_BitInVal(array, index)))
#define bitToggle(array, index) \
((array)[bit_ValInBuf(array, index)] ^= ~(1LLU << bit_BitInVal(array, index)))
#define bitTest(array, index) \
((array)[bit_ValInBuf(array, index)] & (1LLU << bit_BitInVal(array, index)))
#if defined(ENV_COMPILER_MSVC)
# include <intrin.h>
/* `_BitScanReverse(&result, x)` searches `x` from MSB to least significant bit LSB for a set bit.
* Loads `result` with the bit position of the first set bit found (zero-indexed)
* Returns 0 if the `x` is zero; nonzero otherwise.
*/
header_function
Uint8 bitMSBU32 (Uint32 x)
{
unsigned long result = 0;
Byte found = _BitScanReverse(&result, x);
result = found ? result : (unsigned long)-1;
return (Uint8)result;
}
header_function
Uint8 bitMSBU64 (Uint64 x)
{
unsigned long result = 0;
Byte found = _BitScanReverse64(&result, x);
result = found ? result : (unsigned long)-1;
return (Uint8)result;
}
/* `_BitScanForward(&result, x)` searches `x` from LSB to the MSB for a set bit.
* Loads `result` with the bit position of the first set bit found.
* Returns 0 if the `x` is zero; nonzero otherwise.
*/
header_function
Uint8 bitLSBU32 (Uint32 x)
{
unsigned long result = 0;
Byte found = _BitScanForward(&result, x);
result = found ? result : (unsigned long)-1;
return (Uint8)result;
}
header_function
Uint8 bitLSBU64 (Uint64 x)
{
unsigned long result = 0;
Byte found = _BitScanForward64(&result, x);
result = found ? result : (unsigned long)-1;
return (Uint8)result;
}
#elif defined(ENV_COMPILER_GCC) || defined(ENV_COMPILER_CLANG)
/* __builtin_clz(x) returns the number of leading 0-bits in x, starting from
* most significant position.
*/
header_function
Uint8 bitMSBU32 (Uint32 x)
{
Uint8 result = cast_val(Uint8, -1);
if (x) result = 32 - cast_val(Uint8, __builtin_clz(x)) - 1;
return result;
}
header_function
Uint8 bitMSBU64 (Uint64 x)
{
Uint8 result = cast_val(Uint8, -1);
if (x) result = 64 - cast_val(Uint8, __builtin_clzll(x)) - 1;
return result;
}
/* __builtin_ctz(x) returns the number of trailing 0-bits in x, starting from
* least significant position.
*/
header_function
Uint8 bitLSBU32 (Uint32 x)
{
Uint8 result = cast_val(Uint8, -1);
if (x) result = cast_val(Uint8, __builtin_ctz(x));
return result;
}
header_function
Uint8 bitLSBU64 (Uint64 x)
{
Uint8 result = cast_val(Uint8, -1);
if (x) result = cast_val(Uint8, __builtin_ctzll(x));
return result;
}
#endif
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011
# define bitMSB(x) _Generic((x), Uint32: bitMSBU32, Uint64: bitMSBU64)(x)
# define bitLSB(x) _Generic((x), Uint32: bitLSBU32, Uint64: bitLSBU64)(x)
#elif defined(ENV_LANG_CPP)
header_function Uint8 bitMSB (Uint32 x) { return bitMSBU32(x); }
header_function Uint8 bitMSB (Uint64 x) { return bitMSBU64(x); }
header_function Uint8 bitLSB (Uint32 x) { return bitLSBU32(x); }
header_function Uint8 bitLSB (Uint64 x) { return bitLSBU64(x); }
#endif // !LANG_C && !LANG_CPP
header_function Uint64 bitPow2U32 (Uint32 x) { Uint64 y = 1ull << x; return y; }
header_function Uint64 bitPow2U64 (Uint64 x) { Uint64 y = 1ull << x; return y; }
header_function Uint8 bitLog2U32 (Uint32 x) { Uint8 y = x ? bitMSBU32(x) : 0u; return y; }
header_function Uint8 bitLog2U64 (Uint64 x) { Uint8 y = x ? bitMSBU64(x) : 0u; return y; }
header_function Bool bitIsPowerOf2U32 (Uint32 x) { Bool b = (x & (x - 1)) == 0; return b; }
header_function Bool bitIsPowerOf2U64 (Uint64 x) { Bool b = (x & (x - 1)) == 0; return b; }
header_function Uint64 bitPrevPowerOf2U32 (Uint32 x) { Uint64 y = bitIsPowerOf2U32(x) ? (1u << (bitLog2U32(x) - 1u)) : x; return y; }
header_function Uint64 bitPrevPowerOf2U64 (Uint64 x) { Uint64 y = bitIsPowerOf2U64(x) ? (1ull << (bitLog2U64(x) - 1ull)) : x; return y; }
// Number of leading zeros (on the most significant end)
header_function Uint8 bitMSZerosU32 (Uint32 x) { Uint8 clz = 32U - bitMSBU32(x) - 1U; return clz; }
header_function Uint8 bitMSZerosU64 (Uint64 x) { Uint8 clz = 64U - bitMSBU64(x) - 1U; return clz; }
// square root functions from Mark Dickinson
// https://stackoverflow.com/a/70550680/12862673
header_function
Uint16 bitSqrtU32 (Uint32 x)
{
Sint lz = bitMSZerosU32(x | 1U) & 0x1E;
x <<= lz;
Uint32 y = 1u + (x >> 30);
y = (y << 1) + (x >> 27) / y;
y = (y << 3) + (x >> 21) / y;
y = (y << 7) + (x >> 9) / y;
y -= x < cast_val(Uint32, y) * y;
return cast_val(Uint16, (y >> (lz >> 1)));
}
header_function
Uint32 bitSqrtU64 (Uint64 x)
{
Uint8 lz = cast_val(Uint8, bitMSZerosU64(x | 1ull) & 0x3Eu);
x <<= lz;
Uint64 y = 2ull + (x >> 63);
y = (y << 1) + (x >> 59) / y;
y = (y << 3) + (x >> 53) / y;
y = (y << 7) + (x >> 41) / y;
y = (y << 15) + (x >> 17) / y;
y -= x < cast_val(Uint64, y) * y;
return cast_val(Uint32, (y >> (lz >> 1)));
}
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011
# define bitPow2(x) _Generic((x), Uint32: bitPow2U32, Uint64: bitPow2U64)(x)
# define bitLog2(x) _Generic((x), Uint32: bitLog2U32, Uint64: bitLog2U64)(x)
# define bitIsPowerOf2(x) _Generic((x), Uint32: bitIsPowerOf2U32, Uint64: bitIsPowerOf2U64)(x)
# define bitPrevPowerOf2(x) _Generic((x), Uint32: bitPrevPowerOf2U32, Uint64: bitPrevPowerOf2U64)(x)
# define bitSqrt(x) _Generic((x), Uint32: bitSqrtU32, Uint64: bitSqrtU64)(x)
#elif defined(ENV_LANG_CPP)
header_function Uint64 bitPow2 (Uint32 x) { return bitPow2U32(x); }
header_function Uint64 bitPow2 (Uint64 x) { return bitPow2U64(x); }
header_function Uint8 bitLog2 (Uint32 x) { return bitLog2U32(x); }
header_function Uint8 bitLog2 (Uint64 x) { return bitLog2U64(x); }
header_function Bool bitIsPowerOf2 (Uint32 x) { return bitIsPowerOf2U32(x); }
header_function Bool bitIsPowerOf2 (Uint64 x) { return bitIsPowerOf2U64(x); }
header_function Uint64 bitPrevPowerOf2 (Uint32 x) { return bitPrevPowerOf2U32(x); }
header_function Uint64 bitPrevPowerOf2 (Uint64 x) { return bitPrevPowerOf2U64(x); }
header_function Uint16 bitSqrt (Uint32 x) { return bitSqrtU32(x); }
header_function Uint32 bitSqrt (Uint64 x) { return bitSqrtU64(x); }
#endif
/*************************************************************************************
* Serialization
*/
header_function
void srlzWrite8 (Uint8 src, Byte *dest)
{
dest[0] = src;
}
#define srlzWrite8BE srlzWrite8
#define srlzWrite8LE srlzWrite8
header_function
void srlzWrite16LE (Uint16 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
}
header_function
void srlzWrite16BE (Uint16 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
dest[1] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
}
header_function
void srlzWrite32LE (Uint32 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
dest[2] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020);
dest[3] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030);
}
header_function
void srlzWrite32BE (Uint32 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030);
dest[1] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020);
dest[2] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
dest[3] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
}
header_function
void srlzWrite64LE (Uint64 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
dest[1] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
dest[2] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020);
dest[3] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030);
dest[4] = cast_val(Uint8, (0x000000FF00000000 & (src)) >> 040);
dest[5] = cast_val(Uint8, (0x0000FF0000000000 & (src)) >> 050);
dest[6] = cast_val(Uint8, (0x00FF000000000000 & (src)) >> 060);
dest[7] = cast_val(Uint8, (0xFF00000000000000 & (src)) >> 070);
}
header_function
void srlzWrite64BE (Uint64 src, Byte *dest)
{
dest[0] = cast_val(Uint8, (0xFF00000000000000 & (src)) >> 070);
dest[1] = cast_val(Uint8, (0x00FF000000000000 & (src)) >> 060);
dest[2] = cast_val(Uint8, (0x0000FF0000000000 & (src)) >> 050);
dest[3] = cast_val(Uint8, (0x000000FF00000000 & (src)) >> 040);
dest[4] = cast_val(Uint8, (0x00000000FF000000 & (src)) >> 030);
dest[5] = cast_val(Uint8, (0x0000000000FF0000 & (src)) >> 020);
dest[6] = cast_val(Uint8, (0x000000000000FF00 & (src)) >> 010);
dest[7] = cast_val(Uint8, (0x00000000000000FF & (src)) >> 000);
}
header_function
Uint8 srlzRead8 (Byte *src)
{
Uint8 result = src[0];
return result;
}
header_function
Uint16 srlzRead16LE (Byte *src) {
Uint16 result = cast_val(Uint16, cast_val(Uint16, 255 & src[1]) << 8 | cast_val(Uint16, 255 & src[0]));
return result;
}
header_function
Uint16 srlzRead16BE (Byte *src) {
Uint16 result = cast_val(Uint16, cast_val(Uint16, 255 & src[0]) << 8 | cast_val(Uint16, 255 & src[1]));
return result;
}
header_function
Uint32 srlzRead32LE (Byte *src)
{
Uint32 result = (cast_val(Uint32, 255 & src[3]) << 030 | cast_val(Uint32, 255 & src[2]) << 020 |
cast_val(Uint32, 255 & src[1]) << 010 | cast_val(Uint32, 255 & src[0]) << 000);
return result;
}
header_function
Uint32 srlzRead32BE (Byte *src)
{
Uint32 result = (cast_val(Uint32, 255 & src[0]) << 030 | cast_val(Uint32, 255 & src[1]) << 020 |
cast_val(Uint32, 255 & src[2]) << 010 | cast_val(Uint32, 255 & src[3]) << 000);
return result;
}
header_function
Uint64 srlzRead64LE (Byte *src)
{
Uint64 result = (cast_val(Uint64, 255 & src[7]) << 070 | cast_val(Uint64, 255 & src[6]) << 060 |
cast_val(Uint64, 255 & src[5]) << 050 | cast_val(Uint64, 255 & src[4]) << 040 |
cast_val(Uint64, 255 & src[3]) << 030 | cast_val(Uint64, 255 & src[2]) << 020 |
cast_val(Uint64, 255 & src[1]) << 010 | cast_val(Uint64, 255 & src[0]) << 000);
return result;
}
header_function
Uint64 srlzRead64BE (Byte *src)
{
Uint64 result = (cast_val(Uint64, 255 & src[0]) << 070 | cast_val(Uint64, 255 & src[1]) << 060 |
cast_val(Uint64, 255 & src[2]) << 050 | cast_val(Uint64, 255 & src[3]) << 040 |
cast_val(Uint64, 255 & src[4]) << 030 | cast_val(Uint64, 255 & src[5]) << 020 |
cast_val(Uint64, 255 & src[6]) << 010 | cast_val(Uint64, 255 & src[7]) << 000);
return result;
}
typedef union srlz_8 { Uint8 u8; Sint8 s8; } srlz_8;
typedef union srlz_16 { Uint16 u16; Sint16 s16; } srlz_16;
typedef union srlz_32 { Uint32 u32; Sint32 s32; Float32 f32; } srlz_32;
typedef union srlz_64 { Uint64 u64; Sint64 s64; Float64 f64; } srlz_64;
/*************************************************************************************
* Marshalling
*/
#define MARSHAL_FUNC(_name) Bool _name (void* data, Size size, void* userdata)
header_function
Bool marshalUint8 (Uint8 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
unused_variable(big_endian);
srlz_8 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.u8 = *datum;
srlzWrite8(srlzd.u8, buf);
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
srlzd.u8 = srlzRead8(buf);
*datum = srlzd.u8;
}
return true;
}
header_function
Bool marshalSint8 (Sint8 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
unused_variable(big_endian);
srlz_8 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.s8 = *datum;
srlzWrite8(srlzd.u8, buf);
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
srlzd.u8 = srlzRead8(buf);
*datum = srlzd.s8;
}
return true;
}
header_function
Bool marshalUint16 (Uint16 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_16 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.u16 = *datum;
if (big_endian) { srlzWrite16BE(srlzd.u16, buf); }
else { srlzWrite16LE(srlzd.u16, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u16 = srlzRead16BE(buf); }
else { srlzd.u16 = srlzRead16LE(buf); }
*datum = srlzd.u16;
}
return true;
}
header_function
Bool marshalSint16 (Sint16 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_16 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.s16 = *datum;
if (big_endian) { srlzWrite16BE(srlzd.u16, buf); }
else { srlzWrite16LE(srlzd.u16, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u16 = srlzRead16BE(buf); }
else { srlzd.u16 = srlzRead16LE(buf); }
*datum = srlzd.s16;
}
return true;
}
header_function
Bool marshalUint32 (Uint32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_32 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.u32 = *datum;
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); }
else { srlzWrite32LE(srlzd.u32, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); }
else { srlzd.u32 = srlzRead32LE(buf); }
*datum = srlzd.u32;
}
return true;
}
header_function
Bool marshalSint32 (Sint32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_32 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.s32 = *datum;
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); }
else { srlzWrite32LE(srlzd.u32, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); }
else { srlzd.u32 = srlzRead32LE(buf); }
*datum = srlzd.s32;
}
return true;
}
header_function
Bool marshalFloat32 (Float32 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_32 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.f32 = *datum;
if (big_endian) { srlzWrite32BE(srlzd.u32, buf); }
else { srlzWrite32LE(srlzd.u32, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u32 = srlzRead32BE(buf); }
else { srlzd.u32 = srlzRead32LE(buf); }
*datum = srlzd.f32;
}
return true;
}
header_function
Bool marshalUint64 (Uint64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_64 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.u64 = *datum;
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); }
else { srlzWrite64LE(srlzd.u64, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); }
else { srlzd.u64 = srlzRead64LE(buf); }
*datum = srlzd.u64;
}
return true;
}
header_function
Bool marshalSint64 (Sint64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_64 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.s64 = *datum;
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); }
else { srlzWrite64LE(srlzd.u64, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); }
else { srlzd.u64 = srlzRead64LE(buf); }
*datum = srlzd.s64;
}
return true;
}
header_function
Bool marshalFloat64 (Float64 *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
srlz_64 srlzd = {0};
Byte buf[sizeof(*datum)] = {0};
if (write) {
srlzd.f64 = *datum;
if (big_endian) { srlzWrite64BE(srlzd.u64, buf); }
else { srlzWrite64LE(srlzd.u64, buf); }
if (!func(buf, sizeof(*datum), funcdata)) return false;
} else {
if (!func(buf, sizeof(*datum), funcdata)) return false;
if (big_endian) { srlzd.u64 = srlzRead64BE(buf); }
else { srlzd.u64 = srlzRead64LE(buf); }
*datum = srlzd.f64;
}
return true;
}
/*************************************************************************************
* Free Grid
*/
/*
* row mask |‾‾‾‾‾‾‾‾‾‾side‾‾‾‾‾‾‾‾‾|
* lsb
* 1 FF FF FF FF FF ‾|
* 1 FF FF FF FF FF |
* 1 FF FF 07 00 00 side
* 0 00 00 00 00 00 |
* 0 00 00 00 00 00 _|
* msb
* 1 1 1 1 1 <- col mask
* lsb msb
*/
// NOTE(naman): Here, the "correct" thing to do would have been to
// calculate the `side` variable using `sqrt(elem_count)`. But even
// if we take the maximum possible size, the allocation size is
// 32 KiB which is around the minimum allocation we can make with
// the root TLSF allocator. So, there is no point in making a smaller
// allocation here.
#define FG_SIDE 64u
// NOTE(naman): 2^18 is 64*64*64 (= 262144), and that is the maximum count we support
#define FREE_GRID_MAXIMUM_ELEMENTS (1U << 18)
typedef struct Free_Grid {
Uint64 masks[FG_SIDE * FG_SIDE * sizeof(Uint64)];
Uint8 count_masks_in_row[FG_SIDE];
Uint8 count_masks_in_col[FG_SIDE];
Uint64 row_mask;
Uint64 col_mask;
Uint32 count;
Uint32 allocated;
} Free_Grid;
header_function
void fgCreate (Free_Grid *fg, Uint32 elem_count)
{
debugAssert(elem_count <= FREE_GRID_MAXIMUM_ELEMENTS);
memzero(*fg);
fg->count = elem_count;
Uint32 fully_marked_mask_count = elem_count / 64;
Uint32 extra_marked_bits_count = elem_count % 64;
if (fully_marked_mask_count) memset(fg->masks, 0xFF, fully_marked_mask_count * sizeof(Uint64));
fg->masks[fully_marked_mask_count] |= ((~cast_val(Uint64, 0x0ull)) >> (64 - extra_marked_bits_count));
Uint32 total_marked_masks = fully_marked_mask_count + (extra_marked_bits_count ? 1 : 0);
Uint32 fully_filled_rows = total_marked_masks / FG_SIDE;
Uint8 partially_filled_row_size = total_marked_masks % FG_SIDE;
Uint32 total_rows = fully_filled_rows + (partially_filled_row_size ? 1 : 0);
fg->row_mask = ((~cast_val(Uint64, 0x0ull)) >> (64 - total_rows));
Uint32 col_bits_count = fully_filled_rows ? FG_SIDE : partially_filled_row_size;
fg->col_mask = ((~cast_val(Uint64, 0x0ull)) >> (64 - minimum(FG_SIDE, col_bits_count)));
for (Size i = 0; i < fully_filled_rows; i++) {
fg->count_masks_in_row[i] = FG_SIDE;
}
if (partially_filled_row_size) {
pragma_msvc("warning ( push )");
pragma_msvc("warning ( disable: 6386 )"); // Analyze: WRITE_OVERRUN (Buffer overrun while writing to 'fg->count_masks_in_row')
fg->count_masks_in_row[fully_filled_rows] = partially_filled_row_size;
pragma_msvc("warning ( pop )");
}
for (Size i = 0; i < FG_SIDE; i++) {
fg->count_masks_in_col[i] = cast_val(Uint8, fully_filled_rows);
if (partially_filled_row_size && (i < partially_filled_row_size)) {
fg->count_masks_in_col[i]++;
}
}
return;
}
header_function
void fgDelete (Free_Grid *fg)
{
unused_variable(fg);
}
header_function
Bool fgAllocate (Free_Grid *fg, Uint32 *index)
{
Uint8 row_first_bit_index = bitLSBU64(fg->row_mask);
if (row_first_bit_index == cast_val(Uint8, -1)) {
return false;
}
debugAssert(row_first_bit_index < 64);
Uint8 col_first_bit_index = bitLSBU64(fg->col_mask);
debugAssert(col_first_bit_index != cast_val(Uint8, -1));
while (!fg->masks[(row_first_bit_index * FG_SIDE) + col_first_bit_index]) {
col_first_bit_index++;
}
debugAssert(col_first_bit_index < 64);
Uint32 mask_array_index = row_first_bit_index * FG_SIDE + col_first_bit_index;
debugAssert(mask_array_index < (64*64));
debugAssert(fg->masks[mask_array_index]);
Uint8 mask_first_bit_index = bitLSBU64(fg->masks[mask_array_index]);
debugAssert(mask_first_bit_index != cast_val(Uint8, -1));
*index = mask_array_index * 64 + cast_val(Uint32, mask_first_bit_index);
debugAssert(*index < fg->count);
fg->masks[mask_array_index] &= ~(1ULL << mask_first_bit_index);
if (fg->masks[mask_array_index] == 0) {
fg->count_masks_in_row[row_first_bit_index]--;
fg->count_masks_in_col[col_first_bit_index]--;
if (fg->count_masks_in_row[row_first_bit_index] == 0) {
fg->row_mask &= ~(1ULL << row_first_bit_index);
}
if (fg->count_masks_in_col[col_first_bit_index] == 0) {
fg->col_mask &= ~(1ULL << col_first_bit_index);
}
}
fg->allocated++;
return true;
}
header_function
void fgFree (Free_Grid *fg, Uint32 index)
{
debugAssert(index < fg->count);
Uint32 fully_traversed_mask_index = (index + 1) / 64;
Uint32 extra_untreaded_bits_count = (index + 1) % 64;
Uint32 total_relevant_mask_count = fully_traversed_mask_index + (extra_untreaded_bits_count ? 1 : 0);
Uint32 final_relevant_mask_index = total_relevant_mask_count - 1;
Uint32 row_bit_index = final_relevant_mask_index / FG_SIDE;
Uint32 col_bit_index = final_relevant_mask_index - (row_bit_index * FG_SIDE);
Uint32 relevant_bit_index;
if (extra_untreaded_bits_count) {
relevant_bit_index = extra_untreaded_bits_count - 1;
} else {
relevant_bit_index = 63;
}
debugAssert((fg->masks[final_relevant_mask_index] & (1ULL << relevant_bit_index)) == 0);
Uint64 old_mask = fg->masks[final_relevant_mask_index];
fg->masks[final_relevant_mask_index] |= (1ULL << relevant_bit_index);
if (old_mask == 0) {
fg->count_masks_in_row[row_bit_index]++;
fg->count_masks_in_col[col_bit_index]++;
fg->row_mask |= (1ULL << row_bit_index);
fg->col_mask |= (1ULL << col_bit_index);
}
}
#undef FG_SIZE
/**************************************************************************************
* Memory
*/
header_function
void* memoryAlignUpPtrTo (void *p, Size align)
{
Size k = align - 1LLU;
Size not_k = ~k;
Size up = cast_mem(Uptr, p) + k;
Size result_uptr = up & cast_val(Uptr, not_k);
void *result = cast_mem(void *, result_uptr);
return result;
}
header_function
Size memoryAlignUpSizeTo (Size s, Size align)
{
Size k = align - 1LLU;
Size result = (s + k) & (~ k);
return result;
}
header_function
void* memoryAlignDownPtrTo (void *p, Size align)
{
Byte *pb = cast_val(Byte *, p);
Size k = align - 1LLU;
Byte *result = cast_val(Byte *, memoryAlignUpPtrTo(pb - k, align));
return result;
}
header_function
Size memoryAlignDownSizeTo (Size s, Size align)
{
Size k = align - 1LLU;
Size result = memoryAlignUpSizeTo(s - k, align);
return result;
}
#if defined(ENV_LANG_C) && ENV_LANG_C >= 2011
# define memoryAlignUpTo(x, a) _Generic((x), void*: memoryAlignUpPtrTo, Size:memoryAlignUpSizeTo)(x, a)
# define memoryAlignDownTo(x, a) _Generic((x), void*: memoryAlignDownPtrTo, Size:memoryAlignDownSizeTo)(x, a)
#elif defined(ENV_LANG_CPP)
header_function void* memoryAlignUpTo (void *p, Size align) { return memoryAlignUpPtrTo(p, align); }
header_function Size memoryAlignUpTo (Size s, Size align) { return memoryAlignUpSizeTo(s, align); }
header_function void* memoryAlignDownTo (void *p, Size align) { return memoryAlignDownPtrTo(p, align); }
header_function Size memoryAlignDownTo (Size s, Size align) { return memoryAlignDownSizeTo(s, align); }
#endif
# define memoryAlignUp(x) (memoryAlignUpTo(x, alignof(max_align_t)))
# define memoryAlignDown(x) (memoryAlignDownTo(x, alignof(max_align_t)))
#define aligned_sizeof(x) memoryAlignUp(sizeof(x))
typedef struct Memory_Allocator_Interface Memory_Allocator_Interface;
#define MEMORY_ALLOCATOR_REQUEST_FUNC(name) void* name (void *userdata, Size amount)
#define MEMORY_ALLOCATOR_RESCIND_FUNC(name) void name (void *userdata, void *ptr)
struct Memory_Allocator_Interface {
void *userdata;
MEMORY_ALLOCATOR_REQUEST_FUNC((*request));
MEMORY_ALLOCATOR_RESCIND_FUNC((*rescind));
#if defined(ENV_SANITIZER_ADDRESS)
Bool skip_address_sanitizer;
#endif
};
header_function
Memory_Allocator_Interface memICreate (void *userdata, MEMORY_ALLOCATOR_REQUEST_FUNC((*request)), MEMORY_ALLOCATOR_RESCIND_FUNC((*rescind)))
{
Memory_Allocator_Interface mif;
memzero(mif);
mif.userdata = userdata;
mif.request = request;
mif.rescind = rescind;
return mif;
}
// NOTE(naman): Since memIRescind poisons the memory in order to prevent use after free, we might get an
// "use after poisoning" error if that memory was to be reallocated. This might happen if the miface is being
// used to wrap a libc/system allocator. Thus, in such case, this function should be called to ensure that
// we don't end up with strange crashes upon memory allocation.
header_function
void memIDisableASAN (Memory_Allocator_Interface *miface)
{
unused_variable(miface);
#if defined(ENV_SANITIZER_ADDRESS)
miface->skip_address_sanitizer = true;
#endif
}
header_function
void* memIRequest (Memory_Allocator_Interface *miface, Size size)
{
#if defined(ENV_SANITIZER_ADDRESS)
Size real_size = size + alignof(max_align_t);
Byte *orig_ptr = cast_val(Byte*, miface->request(miface->userdata, real_size));
Byte *ptr = orig_ptr + (real_size - size);
if (miface->skip_address_sanitizer == false) {
ASAN_UNPOISON_MEMORY_REGION(orig_ptr, real_size);
}
Size *s = cast_val(Size*, cast_val(void*, orig_ptr));
static_assert(sizeof(*s) <= alignof(max_align_t), "Header is too small to store size");
*s = size;
if (miface->skip_address_sanitizer == false) {
ASAN_POISON_MEMORY_REGION(orig_ptr, alignof(max_align_t));
}
return ptr;
#else
return miface->request(miface->userdata, size);
#endif
}
header_function
void memIRescind (Memory_Allocator_Interface *miface, void *ptr)
{
#if defined(ENV_SANITIZER_ADDRESS)
Byte *p = cast_val(Byte*, ptr);
void *orig_ptr = p - alignof(max_align_t);
if (miface->skip_address_sanitizer == false) {
ASAN_UNPOISON_MEMORY_REGION(orig_ptr, alignof(max_align_t));
}
Size *s = cast_val(Size*, orig_ptr);
static_assert(sizeof(*s) <= alignof(max_align_t), "Header is too small to fetch size");
Size size = *s;
Size real_size = size + alignof(max_align_t);
if (miface->skip_address_sanitizer == false) {
ASAN_POISON_MEMORY_REGION(orig_ptr, real_size);
}
miface->rescind(miface->userdata, orig_ptr);
#else
miface->rescind(miface->userdata, ptr);
#endif
}
/*****************************************************************************************
* Memory Push Allocator
*/
typedef struct Memory_Push_Block_ {
struct Memory_Push_Block_ *next;
Size cap;
Size len;
Byte _pad[8];
Byte data[];
} Memory_Push_Block_;
static_assert(offsetof(Memory_Push_Block_, data) % alignof(max_align_t) == 0, "The data member has to be properly aligned");
typedef struct Memory_Push {
Memory_Allocator_Interface *miface;
Memory_Push_Block_ *first, *last;
Size size;
} Memory_Push;
header_function
Size memPush_PaddingForAlignment (void *ptr, Size offset, Size alignment)
{
Uptr old_location = cast_mem(Uptr, ptr) + offset;
if (old_location % alignment) {
Uptr next_location = old_location + alignment;
Uptr align_location = (next_location/alignment)*alignment;
Uptr diff = align_location - old_location;
return diff;
}
return 0;
}
header_function
void* memPush_AcquireMemory (Memory_Allocator_Interface *miface, Size size)
{
Size size_total = sizeof(Memory_Push_Block_) + size;
Memory_Push_Block_ *block = cast_val(Memory_Push_Block_ *, memIRequest(miface, size_total));
memzero(*block);
block->cap = size;
block->len = 0;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(block->data, block->cap);
#endif
return block;
}
header_function
Memory_Push memPushCreate (Memory_Allocator_Interface *miface, Size size)
{
Memory_Push mp;
memzero(mp);
mp.miface = miface;
mp.size = size;
mp.first = cast_val(Memory_Push_Block_ *, memPush_AcquireMemory(miface, size));
mp.last = mp.first;
memzero(*mp.first);
mp.first->cap = size;
return mp;
}
header_function
void memPushDelete (Memory_Push *push)
{
for (Memory_Push_Block_ *mpb = push->first, *mpbn; mpb != nullptr; mpb = mpbn) {
mpbn = mpb->next;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(mpb->data, mpb->cap);
#endif
memIRescind(push->miface, mpb);
}
memzero(*push);
}
header_function
void* memPushAllotA (Memory_Push *push, Size size, Size alignment)
{
Size pad = memPush_PaddingForAlignment(push->last->data, push->last->len, alignment);
if ((push->last->len + pad + size) > push->last->cap) {
if (push->last->next == nullptr) {
Size s = maximum(push->size, size) + alignment;
push->last->next = cast_val(Memory_Push_Block_ *, memPush_AcquireMemory(push->miface, s));
push->last->next->cap = s;
}
push->last = push->last->next;
}
pad = memPush_PaddingForAlignment(push->last->data, push->last->len, alignment);
push->last->len += pad;
void *m = push->last->data + push->last->len;
push->last->len += size;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(m, size);
#endif
return m;
}
header_function
void* memPushAllot (Memory_Push *push, Size size)
{
return memPushAllotA(push, size, alignof(max_align_t));
}
header_function
void memPushRemitAll (Memory_Push *push)
{
for (Memory_Push_Block_ *mpb = push->first; mpb != nullptr; mpb = mpb->next) {
mpb->len = 0;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(mpb->data, mpb->cap);
#endif
}
push->last = push->first;
}
/************************************************************************************************
* Memory Slab Allocator
*/
typedef struct Memory_Slab {
Free_Grid fg;
Memory_Allocator_Interface *miface;
void *buffer;
Size memory_size;
Size elem_size;
Size elem_count;
} Memory_Slab;
header_function
void slabCreate (Memory_Slab *s, Memory_Allocator_Interface *miface, Uint32 elem_count, Size elem_size)
{
memzero(*s);
s->miface = miface;
s->elem_count = elem_count;
s->elem_size = elem_size;
s->memory_size = elem_size * elem_count;
fgCreate(&s->fg, elem_count);
s->buffer = memIRequest(miface, s->memory_size);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(s->buffer, s->memory_size);
#endif
}
header_function
void slabDelete (Memory_Slab *s)
{
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(s->buffer, s->memory_size);
#endif
memIRescind(s->miface, s->buffer);
fgDelete(&s->fg);
}
header_function
void* slabAllocate (Memory_Slab *s)
{
Uint32 index;
debugAssert(fgAllocate(&s->fg, &index));
void *result = cast_val(Byte *, s->buffer) + (index * s->elem_size);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(result, s->elem_size);
#endif
return result;
}
header_function
void slabFree (Memory_Slab *s, void *m)
{
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(m, s->elem_size);
#endif
Uint32 index = cast_val(Uint32, (cast_val(Dptr, (cast_val(Byte *, m) - cast_val(Byte *, s->buffer)))/cast_val(Dptr, s->elem_size)));
fgFree(&s->fg, index);
}
/************************************************************************************************
* TLSF Allocator
*/
/*
* The folliwing is a sample table with buffer_size = GiB(4), min_alloc = MiB(1) and increment_step = 8
* Each row is primary level, where the buffer is divided into units of size being consecutive powers of two (geometric progression)
* Each column them breaks each unit into a linear sub-unit of size being a constant increment (arithmetic progression).
*
* ________________________________________________________________________________________________________________
* | | 0 1 2 3 4 5 6 7 |
* |-------|------------------------------------------------------------------------------------------------------|
* | 31 | 2G 2G+256M 2G+512M 2G+768M 3G 3G+256M 3G+512M 3G+768M |
* | 30 | 1G 1G+128M 1G+256M 1G+384M 1G+512M 1G+640M 1G+768M 1G+896M |
* | 29 | 512M 576M 640M 704M 768M 832M 896M 960M |
* | 28 | 256M 288M 320M 352M 384M 416M 448M 480M |
* | 27 | 128M 144M 160M 176M 192M 208M 224M 240M |
* | 26 | 64M 72M 80M 88M 96M 104M 112M 120M |
* | 25 | 32M 36M 40M 44M 48M 52M 56M 60M |
* | 24 | 16M 18M 20M 22M 24M 26M 28M 30M |
* | 23 | 8M 9M 10M 11M 12M 13M 14M 15M |
* | 22 | 4M 4M+512K 5M 5M+512K 6M 6M+512K 7M 7M+512K |
* | 21 | 2M 2M+256K 2M+512K 2M+768K 3M 3M+256K 3M+512K 3M+768K |
* | 20 | 1M 1M+128K 1M+256K 1M+384K 1M+512K 1M+640K 1M+768K 1M+896K |
* |--------------------------------------------------------------------------------------------------------------|
*
* Reference:
* [1] : http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf
* [2] : http://www.gii.upv.es/tlsf/files/papers/ecrts04_tlsf.pdf
* [3] : http://www.gii.upv.es/tlsf/files/papers/jrts2008.pdf
*/
// TODO(naman): Right now, this is 64-bytes large. Once I am sure that the allocator is bug-free (i.e., once it has been
// tested enough), rename `next_empty_slot` and `next_free` to `next` and `prev_free` to `prev`, since a slot can either
// be empty or house a free node but not both. Also, store the `allocated` bool in the top bit of `size`. That will drop the
// size of the struct to 48-bytes, reducing the metadata size by close to 25%.
typedef struct TLSF_Node {
Uint64 offset;
Uint64 size;
/* For empty slots in the list, it contains the index of the next empty slot.
*/
struct TLSF_Node *next_empty_slot;
/* For free blocks, the next member of the node contains the index of the next node in chain corresponding to another
* free block in the same size class. The index of chain's head is stored in the free_table.
*/
struct TLSF_Node *next_free, *prev_free;
/* For allocated and free blocks, the following members contain the index of the nodes associated with the blocks right
* after and before them in the address space.
*/
struct TLSF_Node *next_mem, *prev_mem;
Bool allocated; // false = free (make sure it's not left true for empty slots)
Byte _pad[7];
} TLSF_Node;
static_assert(sizeof(TLSF_Node) % 16 == 0, "TLSF Nodes need to be of the same size as alignment, so that they can be packed in the node_list, etc.");
typedef struct TLSF {
Memory_Allocator_Interface *miface;
void *metadata_buffer;
Uint64 buffer_size;
Uint64 min_alloc;
TLSF_Node **free_table;
TLSF_Node *node_list;
TLSF_Node *first_empty_slot;
Uint64 *row_bitmaps; // Array goes bottom to top, bitmap's LSB->MSB is left to right
Uint64 column_bitap; // Bitmap's LSB->MSB is bottom to top
Uint8 rows, columns;
Byte _pad[6];
} TLSF;
#define TLSF_HEAD_OF_FREE_LIST_(t, r, c) ((t)->free_table[(t)->columns*(r) + (c)])
// sl = log2(size)
// ml = log2(min_alloc)
// il = log2(increment)
//
// r = sl - ml
// c = (size - 2^sl) / ((2*2^sl - 2^sl)/2^il)
// = (size - 2^sl) / (2^sl*(2 - 1) / 2^il)
// = (size - 2^sl) / (2^sl / 2^il)
// = (size - 2^sl) / (2^(sl-il))
// = (size/2^(sl-il)) - (2^sl-(2^(sl-il)))
// = (size/2^(sl-il)) - 2^il
// = (size >> (sl-il)) - 2^il
// = (size >> (sl-il)) - (1 << il)
#define TLSF_ROW_(t, s) cast_val(Uint8, (bitLog2U64(s) - bitLog2U64((t)->min_alloc)))
#define TLSF_COL_(t, s) cast_val(Uint8, (((s) >> (bitLog2U64(s) - bitLog2U64(cast_val(Uint64, ((t)->columns))))) - (1ULL << bitLog2U64(cast_val(Uint64, ((t)->columns))))))
// NOTE(naman): All three parameters should be powers of two with value >= 2.
// buffer_size should be greater than min_alloc
header_function
TLSF tlsfCreate (Memory_Allocator_Interface *miface, Uint64 buffer_size, Uint64 min_alloc, Uint8 increment_steps)
{
TLSF *dummy = nullptr;
Uint8 buffer_size_bits = bitLog2U64(buffer_size);
Uint8 min_alloc_bits = bitLog2U64(min_alloc);
Uint8 row_count = cast_val(Uint8, buffer_size_bits - min_alloc_bits);
Size table_size = row_count * increment_steps * sizeof(*dummy->free_table);
Size maximum_allocation_count = buffer_size / min_alloc;
Size list_size = maximum_allocation_count * sizeof(*dummy->node_list);
TLSF tlsf;
memzero(tlsf);
tlsf.miface = miface;
tlsf.buffer_size = buffer_size;
tlsf.min_alloc = min_alloc;
tlsf.rows = row_count;
tlsf.columns = increment_steps;
Size row_bitmap_size = row_count * sizeof(*dummy->row_bitmaps);
Size total_size = table_size + list_size + row_bitmap_size;
tlsf.metadata_buffer = memIRequest(miface, total_size);
pragma_clang("clang diagnostic push");
pragma_clang("clang diagnostic ignored \"-Wcast-align\"");
tlsf.free_table = cast_val(TLSF_Node **, tlsf.metadata_buffer);
Byte *node_list_ptr = cast_mem(Byte*, tlsf.free_table) + table_size;
tlsf.node_list = cast_mem(TLSF_Node *, node_list_ptr);
Byte *row_bitmaps_ptr = cast_mem(Byte*, tlsf.node_list) + list_size;
tlsf.row_bitmaps = cast_mem(Uint64 *, row_bitmaps_ptr);
pragma_clang("clang diagnostic pop");
tlsf.first_empty_slot = &tlsf.node_list[0];
for (Size i = 0; i < maximum_allocation_count-1; i++) {
memzero(tlsf.node_list[i]);
tlsf.node_list[i].next_empty_slot = &tlsf.node_list[i] + 1;
}
{ // Adding the full buffer in free_table
TLSF_Node node;
memzero(node);
node.offset = 0;
node.size = buffer_size - 1;
Uint8 row = TLSF_ROW_(&tlsf, node.size);
Uint8 col = TLSF_COL_(&tlsf, node.size);
TLSF_Node *slot_for_full_buffer = tlsf.first_empty_slot;
tlsf.first_empty_slot = slot_for_full_buffer->next_empty_slot;
*slot_for_full_buffer = node;
TLSF_HEAD_OF_FREE_LIST_(&tlsf, row, col) = slot_for_full_buffer;
tlsf.column_bitap |= (1ULL << row);
tlsf.row_bitmaps[row] |= (1ULL << col);
}
return tlsf;
}
header_function
void tlsfDelete (TLSF *tlsf)
{
memIRescind(tlsf->miface, tlsf->metadata_buffer);
}
header_function
TLSF_Node* tlsfAllocate (TLSF *tlsf, Size size_requested)
{
if (size_requested > tlsf->buffer_size) return nullptr;
Size size = maximum(size_requested, tlsf->min_alloc);
// The next slot for size (so that we do a loop-less good fit for real-time instead of a looped best fit)
// sl = log2(size)
// il = log2(increment)
//
// gap = size_of_each_column_in_that_row = ((2*2^sl - 2^sl)/2^il) = 2^(sl-il)
// size_next = size + 2^(sl-il)
// However, in case of size = 2^sl, we don't want to go to the next block. Thus, we should subtract 1 from size_next,
// so that we end up back in the same table slot in that particular case.
// => size_next = size + 2^(sl-il) - 1
Size size_class = size + (1ULL << (bitLog2U64(size) - bitLog2U64(cast_val(Uint64, tlsf->columns)))) - 1;
size_class = minimum(size_class, tlsf->buffer_size-1);
Size row = TLSF_ROW_(tlsf, size_class); // fl in [1]
Size col = TLSF_COL_(tlsf, size_class); // sl in [1]
Uint64 row_bitmap = tlsf->row_bitmaps[row] & (0xFFFFFFFFFFFFFFFFULL << col);
Size row_available, col_available;
if (row_bitmap) {
col_available = bitLSBU64(row_bitmap);
row_available = row;
} else {
Uint64 col_bitmap = tlsf->column_bitap & (0xFFFFFFFFFFFFFFFFULL << (row + 1));
if (col_bitmap) {
row_available = bitLSBU64(col_bitmap);
col_available = bitLSBU64(tlsf->row_bitmaps[row_available]);
} else {
return nullptr;
}
}
// Get the free node of the free list and mark it used
TLSF_Node *free_node = TLSF_HEAD_OF_FREE_LIST_(tlsf, row_available, col_available); // This will definitely succeed, so free_node != nullptr
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_available, col_available) = free_node->next_free; // Next free node in the size class, might be zero or non-zero;
if (free_node->next_free) free_node->next_free->prev_free = nullptr;
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row_available, col_available) == nullptr) { // but do handle zero separately.
tlsf->row_bitmaps[row_available] &= ~(1ULL << col_available);
if (tlsf->row_bitmaps[row_available] == 0) {
tlsf->column_bitap &= ~(1ULL << row_available);
}
}
TLSF_Node *split_node_slot = nullptr;
if (free_node->size >= (size + tlsf->min_alloc)) { // If the acquired block is too big
Size split_node_size = free_node->size - size;
Uint8 row_split = TLSF_ROW_(tlsf, split_node_size);
Uint8 col_split = TLSF_COL_(tlsf, split_node_size);
// Find a empty slot in list and put the splitted node in it
split_node_slot = tlsf->first_empty_slot;
tlsf->first_empty_slot = split_node_slot->next_empty_slot;
memzero(*split_node_slot);
memzero(*split_node_slot);
split_node_slot->offset = free_node->offset + size;
split_node_slot->size = split_node_size;
split_node_slot->prev_mem = free_node;
split_node_slot->next_mem = free_node->next_mem;
split_node_slot->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row_split, col_split);
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row_split, col_split)) {
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_split, col_split)->prev_free = split_node_slot;
}
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_split, col_split) = split_node_slot;
tlsf->column_bitap |= (1ULL << row_split);
tlsf->row_bitmaps[row_split] |= (1ULL << col_split);
}
// Create a node for the allocated block
TLSF_Node free_node_copy = *free_node;
memzero(*free_node);
free_node->offset = free_node_copy.offset;
free_node->size = size;
free_node->prev_mem = free_node_copy.prev_mem;
free_node->next_mem = split_node_slot;
free_node->allocated = true;
return free_node;
}
header_function
void tlsf_MergeLeft (TLSF *tlsf, TLSF_Node *node)
{
TLSF_Node *prev = node->prev_mem;
if (prev && !prev->allocated) {
Size total_size = node->size + prev->size;
Size size_old = node->size;
// Expand the node
node->offset = prev->offset;
node->size = total_size;
{ // Remove prev
Size row_prev = TLSF_ROW_(tlsf, prev->size); // fl in [1]
Size col_prev = TLSF_COL_(tlsf, prev->size); // sl in [1]
// Remove prev from the free list
if (prev->next_free) prev->next_free->prev_free = prev->prev_free;
if (prev->prev_free) prev->prev_free->next_free = prev->next_free;
// Remove prev from table bitmap
if ((prev->next_free == nullptr) && (prev->prev_free == nullptr)) { // Only if there is no free block left in that range
tlsf->column_bitap &= ~(1ULL << row_prev);
tlsf->row_bitmaps[row_prev] &= ~(1ULL << col_prev);
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_prev, col_prev) = nullptr;
}
// Remove prev from the memory list
if (prev->prev_mem) prev->prev_mem->next_mem = node;
node->prev_mem = prev->prev_mem;
// Add prev into the empty slot list
prev->next_empty_slot = tlsf->first_empty_slot;
tlsf->first_empty_slot = prev;
}
if (!node->allocated) { // Remove and reinsert node
Size row_old = TLSF_ROW_(tlsf, size_old); // fl in [1]
Size col_old = TLSF_COL_(tlsf, size_old); // sl in [1]
Size row_new = TLSF_ROW_(tlsf, node->size); // fl in [1]
Size col_new = TLSF_COL_(tlsf, node->size); // sl in [1]
if ((row_new != row_old) && (col_new != col_old)) {
// Remove node from the free list
if (node->next_free) node->next_free->prev_free = node->prev_free;
if (node->prev_free) node->prev_free->next_free = node->next_free;
// Remove node from table bitmap
if ((node->next_free == nullptr) && (node->prev_free == nullptr)) { // Only if there is no free block left in that range
tlsf->column_bitap &= ~(1ULL << row_old);
tlsf->row_bitmaps[row_old] &= ~(1ULL << col_old);
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_old, col_old) = nullptr;
}
// Add node to free list
node->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new);
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new)) TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new)->prev_free = node;
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new) = node;
// Add node to table bitmap
tlsf->column_bitap |= (1ULL << row_new);
tlsf->row_bitmaps[row_new] |= (1ULL << col_new);
}
}
}
}
header_function
void tlsf_MergeRight (TLSF *tlsf, TLSF_Node *node)
{
TLSF_Node *next = node->next_mem;
if (next && !next->allocated) {
Size total_size = node->size + next->size;
Size size_old = node->size;
// Expand the node
// Offset doesn't change
node->size = total_size;
{ // Remove next
Size row_next = TLSF_ROW_(tlsf, next->size); // fl in [1]
Size col_next = TLSF_COL_(tlsf, next->size); // sl in [1]
// Remove prev from the free list
if (next->next_free) next->next_free->prev_free = next->prev_free;
if (next->prev_free) next->prev_free->next_free = next->next_free;
// Remove prev from table bitmap
if (!next->next_free && !next->prev_free) { // Only if there is no free block left in that range
tlsf->column_bitap &= ~(1ULL << row_next);
tlsf->row_bitmaps[row_next] &= ~(1ULL << col_next);
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_next, col_next) = nullptr;
}
// Remove prev from the memory list
if (next->next_mem) next->next_mem->prev_mem = node;
node->next_mem = next->next_mem;
// Add prev into the empty slot list
next->next_empty_slot = tlsf->first_empty_slot;
tlsf->first_empty_slot = next;
}
if (!node->allocated) { // Remove and reinsert node
Size row_old = TLSF_ROW_(tlsf, size_old); // fl in [1]
Size col_old = TLSF_COL_(tlsf, size_old); // sl in [1]
Size row_new = TLSF_ROW_(tlsf, node->size); // fl in [1]
Size col_new = TLSF_COL_(tlsf, node->size); // sl in [1]
if ((row_new != row_old) && (col_new != col_old)) {
// Remove node from the free list
if (node->next_free) node->next_free->prev_free = node->prev_free;
if (node->prev_free) node->prev_free->next_free = node->next_free;
// Remove node from table bitmap
if ((node->next_free == nullptr) && (node->prev_free == nullptr)) { // Only if there is no free block left in that range
tlsf->column_bitap &= ~(1ULL << row_old);
tlsf->row_bitmaps[row_old] &= ~(1ULL << col_old);
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_old, col_old) = nullptr;
}
// Add node to free list
node->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new);
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new)) TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new)->prev_free = node;
TLSF_HEAD_OF_FREE_LIST_(tlsf, row_new, col_new) = node;
// Add node to table bitmap
tlsf->column_bitap |= (1ULL << row_new);
tlsf->row_bitmaps[row_new] |= (1ULL << col_new);
}
}
}
}
header_function
Bool tlsfAdjustLeft (TLSF *tlsf, TLSF_Node *node, Size new_size)
{
if (!node->allocated) return false;
new_size = maximum(new_size, tlsf->min_alloc);
tlsf_MergeLeft(tlsf, node);
if (node->size < new_size) return false;
Size remnant_size = node->size - new_size;
if (remnant_size == 0) return true;
if (remnant_size < tlsf->min_alloc) return true;
{ // Add a remnnant node
// Get an empty slot
TLSF_Node *remnant = tlsf->first_empty_slot;
tlsf->first_empty_slot = remnant->next_empty_slot;
memzero(*remnant);
// Size it properly
remnant->offset = node->offset;
remnant->size = remnant_size;
node->offset += remnant->size;
node->size = new_size;
// Add to memory list
remnant->next_mem = node;
remnant->prev_mem = node->prev_mem;
if (node->prev_mem) node->prev_mem->next_mem = remnant;
node->prev_mem = remnant;
// Add to free list
Size row = TLSF_ROW_(tlsf, remnant->size); // fl in [1]
Size col = TLSF_COL_(tlsf, remnant->size); // sl in [1]
tlsf->column_bitap |= (1ULL << row);
tlsf->row_bitmaps[row] |= (1ULL << col);
remnant->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col);
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)) TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)->prev_free = remnant;
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = remnant;
}
return true;
}
header_function
Bool tlsfAdjustRight (TLSF *tlsf, TLSF_Node *node, Size new_size)
{
if (!node->allocated) return false;
new_size = maximum(new_size, tlsf->min_alloc);
tlsf_MergeRight(tlsf, node);
if (node->size < new_size) return false;
Size remnant_size = node->size - new_size;
if (remnant_size == 0) return true;
if (remnant_size < tlsf->min_alloc) return true;
{ // Add a remnnant node
// Get an empty slot
TLSF_Node *remnant = tlsf->first_empty_slot;
tlsf->first_empty_slot = remnant->next_empty_slot;
memzero(*remnant);
// Size it properly
remnant->offset = node->offset + new_size;
remnant->size = remnant_size;
// node->offset doesn't change
node->size = new_size;
// Add to memory list
remnant->prev_mem = node;
remnant->next_mem = node->next_mem;
if (node->next_mem) node->next_mem->prev_mem = remnant;
node->next_mem = remnant;
// Add to free list
Size row = TLSF_ROW_(tlsf, remnant->size); // fl in [1]
Size col = TLSF_COL_(tlsf, remnant->size); // sl in [1]
tlsf->column_bitap |= (1ULL << row);
tlsf->row_bitmaps[row] |= (1ULL << col);
remnant->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col);
if (TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)) TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col)->prev_free = remnant;
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = remnant;
}
return true;
}
header_function
Bool tlsfFree (TLSF *tlsf, TLSF_Node *node)
{
if (!node->allocated) return false;
tlsf_MergeLeft(tlsf, node);
tlsf_MergeRight(tlsf, node);
Size row = TLSF_ROW_(tlsf, node->size); // fl in [1]
Size col = TLSF_COL_(tlsf, node->size); // sl in [1]
node->next_free = TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col);
if (node->next_free) node->next_free->prev_free = node;
TLSF_HEAD_OF_FREE_LIST_(tlsf, row, col) = node;
tlsf->column_bitap |= (1ULL << row);
tlsf->row_bitmaps[row] |= (1ULL << col);
node->allocated = false;
return true;
}
header_function
Bool tlsfFreeAll (TLSF *tlsf)
{
memIRescind(tlsf->miface, tlsf->metadata_buffer);
*tlsf = tlsfCreate(tlsf->miface, tlsf->buffer_size, tlsf->min_alloc, tlsf->columns);
return true;
}
header_function
Size tlsfGetSize (TLSF *tlsf, TLSF_Node *node)
{
unused_variable(tlsf);
if (!node->allocated) return 0;
return node->size;
}
/************************************************************************************************
* TLSF-based General Purpose Memory Allocator
*/
typedef struct Memory {
TLSF tlsf;
Memory_Allocator_Interface *parent_miface;
Byte *buffer;
Memory_Allocator_Interface miface; // Fetch with memGetInterface
Size max_mem;
} Memory;
typedef struct Memory_Header_ {
TLSF_Node *node;
Size size;
} Memory_Header_;
// For Memory_Allocator_Interface
header_function void* mem_Allocate (void *mt, Size size);
header_function void mem_Free (void *mt, void *ptr);
// NOTE(naman): With (512 MiB, 32 KiB, 16), it will need slightly more than 1 MiB of metadata.
header_function
void memCreate (Memory *memory, Memory_Allocator_Interface *parent_miface, Size memory_size, Size minimum_allocation_size, Uint8 increment_per_slot)
{
Size max_mem = memory_size, min_mem = minimum_allocation_size;
Uint8 inc = increment_per_slot;
debugAssert(max_mem > min_mem);
debugAssert(bitIsPowerOf2(max_mem));
debugAssert(bitIsPowerOf2(min_mem));
debugAssert(bitIsPowerOf2(cast_val(Uint32, inc)));
TLSF tlsf = tlsfCreate(parent_miface, max_mem, min_mem, inc);
void *buffer = memIRequest(parent_miface, max_mem);
*memory = compound_literal(Memory, {
.tlsf = tlsf,
.parent_miface = parent_miface,
.buffer = cast_val(Byte*, buffer),
.miface = memICreate(memory, mem_Allocate, mem_Free),
.max_mem = max_mem,
});
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(memory->buffer, memory->max_mem);
#endif
return;
}
header_function
void memRemove (Memory *mt)
{
tlsfDelete(&mt->tlsf);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(mt->buffer, mt->max_mem);
#endif
memIRescind(mt->parent_miface, mt->buffer);
}
#define MEMORY_HEADER_SIZE_ (2 * alignof(max_align_t))
static_assert(sizeof(Memory_Header_) <= MEMORY_HEADER_SIZE_, "SIze of memory header is too big to fit");
header_function
Memory_Header_* mem_GetHeader (Byte *ptr)
{
Memory_Header_ *header = cast_val(Memory_Header_*, cast_val(void*, ptr - MEMORY_HEADER_SIZE_));
return header;
}
header_function
void* memAllocate (Memory *mt, Size size)
{
Size real_size = size + MEMORY_HEADER_SIZE_;
TLSF_Node *node = tlsfAllocate(&mt->tlsf, real_size);
Byte *result = mt->buffer + node->offset + MEMORY_HEADER_SIZE_;
Memory_Header_ *header = mem_GetHeader(result);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(header, sizeof(*header));
#endif
header->node = node;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(header, sizeof(*header));
ASAN_UNPOISON_MEMORY_REGION(result, size);
#endif
return result;
}
// For Memory_Allocator_Interface
header_function
void* mem_Allocate (void *mt, Size size) {
return memAllocate(cast_val(Memory*, mt), size);
}
header_function
void* memAdjustLeft (Memory *mt, void *ptr, Size size)
{
Memory_Header_ *header = mem_GetHeader(cast_val(Byte*, ptr));
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(header, sizeof(*header));
#endif
Size real_size = size + MEMORY_HEADER_SIZE_;
if (tlsfAdjustLeft(&mt->tlsf, header->node, real_size)) {
Byte *result = mt->buffer + header->node->offset + MEMORY_HEADER_SIZE_;
Memory_Header_ *header2 = mem_GetHeader(result);
header2->node = header->node;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(header, sizeof(*header));
ASAN_UNPOISON_MEMORY_REGION(result, size);
#endif
return result;
} else return nullptr;
}
header_function
void* memAdjustRight (Memory *mt, void *ptr, Size size)
{
Memory_Header_ *header = mem_GetHeader(cast_val(Byte*, ptr));
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(header, sizeof(*header));
#endif
Size real_size = size + MEMORY_HEADER_SIZE_;
if (tlsfAdjustRight(&mt->tlsf, header->node, real_size)) {
Byte *result = mt->buffer + header->node->offset + MEMORY_HEADER_SIZE_;
Memory_Header_ *header2 = mem_GetHeader(result);
header2->node = header->node;
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(header, sizeof(*header));
ASAN_UNPOISON_MEMORY_REGION(result, size);
#endif
return result;
} else return nullptr;
}
header_function
void memFree (Memory *mt, void *ptr)
{
Memory_Header_ *header = mem_GetHeader(cast_val(Byte*, ptr));
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_UNPOISON_MEMORY_REGION(header, sizeof(*header));
#endif
tlsfFree(&mt->tlsf, header->node);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(ptr, header->size);
ASAN_POISON_MEMORY_REGION(header, sizeof(*header));
#endif
}
// For Memory_Allocator_Interface
header_function
void mem_Free (void *mt, void *ptr)
{
memFree(cast_val(Memory*, mt), ptr);
}
header_function
void memFreeAll (Memory *mt)
{
tlsfFreeAll(&mt->tlsf);
#if defined(ENV_SANITIZER_ADDRESS)
ASAN_POISON_MEMORY_REGION(mt->buffer, mt->max_mem);
#endif
}
header_function
Size memGetSize (Memory *mt, void *ptr)
{
Memory_Header_ *header = mem_GetHeader(cast_val(Byte*, ptr));
return tlsfGetSize(&mt->tlsf, header->node) - MEMORY_HEADER_SIZE_;
}
header_function
Memory_Allocator_Interface* memGetInterface (Memory *mt)
{
return &mt->miface;
}
/************************************************************************************************
* Label (Small immutable stack-allocated strings)
* *************************************
*/
// We put the string at the end since most reads will be about comparing the hash and length;
// and even in cases when one has to read the string, the likelihood is that the string will be less
// than 252 characters long, meaning that later cache lines will never actually load.
typedef struct Label {
Uint16 crc;
Uint8 pearson;
Uint8 len;
Char str[252];
} Label;
static_assert(sizeof(Label) == 256);
static_assert(elemin((Label){}.str) == 252);
// Reference: Fast CRCs by Gam D. Nguyen (https://arxiv.org/pdf/1009.5949) [Fig. 12, Page 19]
header_function
Uint16 labelCRC16 (Label label)
{
const Sint s = 8;
const Sint hs = 8; /* hs = h-s, h = 16 */
Uint16 CRC = 0;
for (Uint8 i = 0; i < label.len; i++) { /* 2 */
const Uint16 A = cast_val(Uint16, (CRC >> hs) ^ cast_val(Uint16, label.str[i])); /* 2 */
const Uint16 B = cast_val(Uint16, (A << 2) ^ cast_val(Uint16, (A << 1) ^ A)); /* 4 */
CRC = cast_val(Uint16, B ^ (CRC << s)); /* 2 */
}
return CRC;
}
// https://en.wikipedia.org/wiki/Pearson_hashing
header_function
Uint8 labelPearson (Label label)
{
Uint8 H = 0;
for (Uint8 i = 0; i < label.len; i++) {
const Uint8 A = cast_val(Uint8, H ^ label.str[i]);
// Permutation function generated with https://programming.sirrida.de/calcperm.php using input "4 0 3 7 5 2 1 6"
const Uint8 B = cast_val(Uint8, (((A & 0x41) << 1) |
((A & 0x04) << 3) |
((A & 0x02) << 5) |
((A & 0x90) >> 4) |
((A & 0x28) >> 1)));
H = B;
}
return H;
}
header_function
Label labelMakeL (const Char *str, Uint8 len)
{
debugAssert(len < elemin((Label){}.str));
Label l = {
.len = len,
};
memcpy(l.str, str, l.len);
l.crc = labelCRC16(l);
l.pearson = labelPearson(l);
return l;
}
header_function
Label labelMake (const Char *str)
{
Size len = strlen(str);
debugAssert(len < elemin((Label){}.str));
return labelMakeL(str, cast_val(Uint8, len));
}
header_function
Uint8 labelLen (Label l)
{
return l.len;
}
header_function
Bool marshalLabel (Label *datum, MARSHAL_FUNC((*func)), void *funcdata, Bool write, Bool big_endian) {
if (!marshalUint8(&datum->len, func, funcdata, write, big_endian)) return false;
if (!func(datum->str, datum->len, funcdata)) return false;
if (!write) {
datum->crc = labelCRC16(*datum);
datum->pearson = labelPearson(*datum);
}
return true;
}
/************************************************************************************************
* Txt (String Library)
* ********************
*
* Define STD_TXT_INCLUDE_FORMATTED_APPEND to include formatted appending (txtfmtAppend).
* Currently, this depends of the stb_sprintf library.
* Including stb_sprintf.h before including std.h also works.
*/
typedef struct Txt_Arena {
Memory_Push mp;
} Txt_Arena;
header_function
Txt_Arena txtArenaCreate (Memory_Allocator_Interface *mai)
{
Memory_Push mp = memPushCreate(mai, MiB(1));
Txt_Arena ta = {
.mp = mp,
};
return ta;
}
header_function
void txtArenaDelete (Txt_Arena *ta)
{
memPushDelete(&ta->mp);
}
header_function
void txtArenaReset (Txt_Arena *ta)
{
memPushRemitAll(&ta->mp);
}
typedef struct Txt { Char *str; } Txt;
typedef struct Txt_Header_ {
Uint32 len;
Char str[];
} Txt_Header_;
header_function
Txt txt_Allocate (Txt_Arena *ta, Size len)
{
Size size = len + 1 + sizeof(Txt_Header_);
Txt_Header_ *hdr = cast_val(Txt_Header_*, memPushAllotA(&ta->mp, size, alignof(Txt_Header_)));
debugAssert(len < UINT32_MAX);
hdr->len = cast_val(Uint32, len);
hdr->str[len] = '\0';
Char *s = hdr->str;
Txt t = {.str = s};
return t;
}
header_function
Txt txtNewL (Txt_Arena *ta, Char *s, Size len)
{
Txt t = txt_Allocate(ta, len);
memcpy(t.str, s, len);
return t;
}
header_function
Txt txtNew (Txt_Arena *ta, Char *s)
{
Size length = strlen(s);
Txt result = txtNewL(ta, s, length);
return result;
}
header_function
Char* txtStr (Txt t)
{
Char *s = t.str;
return s;
}
header_function
Size txtLen (Txt t)
{
Txt_Header_ *hdr = containerof(t.str, Txt_Header_, str);
return hdr->len;
}
header_function
Bool txt_EqN (Txt t1, Txt t2, Size len)
{
Bool result = memcmp(t1.str, t2.str, len) == 0;
return result;
}
header_function
Bool txtEqTT (Txt t1, Txt t2)
{
Size t1l = txtLen(t1);
Size t2l = txtLen(t2);
if (t1l != t2l) return false;
Bool result = txt_EqN(t1, t2, t1l);
return result;
}
header_function
Bool txtEqTC (Txt t1, Char *s2)
{
Size t1l = txtLen(t1);
Size s2l = strlen(s2);
if (t1l != s2l) return false;
Bool result = memcmp(txtStr(t1), s2, t1l) == 0;
return result;
}
header_function
Bool txtEqTL (Txt t1, Label l2)
{
Size t1l = txtLen(t1);
Size l2l = labelLen(l2);
if (t1l != l2l) return false;
Bool result = memcmp(txtStr(t1), l2.str, t1l) == 0;
return result;
}
#define txtEq(_a, _b) \
_Generic((_b) \
, Txt : txtEqTT \
, Char* : txtEqTC \
, Label : txtEqTL)(_a, _b)
header_function
Char* txtChr (Txt t, Char c)
{
Size len = txtLen(t);
Char *str = txtStr(t);
for (Size i = 0; i < len; i++) {
if (str[i] == c) return &str[i];
}
return nullptr;
}
header_function
Char* txtChrR (Txt t, Char c)
{
Size len = txtLen(t);
Char *str = txtStr(t);
for (Size i = len - 1; i < len; i++) {
if (str[i] == c) return &str[i];
}
return nullptr;
}
typedef struct Txt_Span {
Txt t;
Uint32 begin, end; // begin inclusive, end exclusive { [begin, end) }
Uint64 hash; // Top bit set to 0 means that hash has not been calculated
} Txt_Span;
header_function
Txt_Span txtGetSpan (Txt t, Sint64 begin, Sint64 end)
{
if (begin < 0) begin = cast_val(Uint32, cast_val(Sint32, txtLen(t)) - (-begin));
if (end < 0) end = cast_val(Uint32, cast_val(Sint32, txtLen(t)) - (-end));
if (end <= begin) {
end = begin + 1; // So that we don't end up with unnecessary errors
}
if (end > cast_val(Sint, txtLen(t))) {
end = cast_val(Sint, txtLen(t));
}
Txt_Span ts = {
.t = t,
.begin = cast_val(Uint32, begin),
.end = cast_val(Uint32, end),
};
return ts;
}
#define TXT_FMT_BYTES_PER_CHUNK_ 55ull /* 55 + 1 + 8 = 64 */
#if defined(STB_SPRINTF_H_INCLUDE)
# define STD_TXT_INCLUDE_FORMATTED_APPEND
#elif defined(STD_TXT_INCLUDE_FORMATTED_APPEND)
disable_warnings();
# define STB_SPRINTF_MIN TXT_FMT_BYTES_PER_CHUNK_
# define STB_SPRINTF_IMPLEMENTATION
# define STB_SPRINTF_NOUNALIGNED
#define STB_SPRINTF_STATIC
# include "stb/stb_sprintf.h"
enable_warnings();
#endif
typedef struct Txt_Fmt_Chunk Txt_Fmt_Chunk;
typedef struct Txt_Fmt_Pool {
Memory_Allocator_Interface *miface;
Memory_Slab chunk_allocator;
Size chunk_count;
} Txt_Fmt_Pool;
typedef struct Txt_Fmt {
Txt_Fmt_Pool *pool;
Size length;
Txt_Fmt_Chunk *first_chunk, *last_chunk;
} Txt_Fmt;
struct Txt_Fmt_Chunk {
Txt_Fmt_Chunk *next_chunk;
Char data[TXT_FMT_BYTES_PER_CHUNK_];
Uint8 filled;
};
static_assert(sizeof(Txt_Fmt_Chunk) == 64, "Txt fmt chunk won't fit in a single chache line");
header_function
void txtfmtPoolCreate (Txt_Fmt_Pool *tp, Memory_Allocator_Interface *mem, Uint32 chunk_count)
{
// 512 chunks in 32 KiB (same size as the free-grid metadata)
chunk_count = maximum(chunk_count, 512u);
// This is initialized like this instead of a compound initialize to avoid the stack from blowing up.
memset(tp, 0, sizeof(*tp));
tp->chunk_count = chunk_count;
slabCreate(&tp->chunk_allocator, mem, chunk_count, sizeof(Txt_Fmt_Chunk));
}
header_function
void txtfmtPoolDelete (Txt_Fmt_Pool *tp)
{
slabDelete(&tp->chunk_allocator);
}
header_function
Txt_Fmt txtfmtCreate (Txt_Fmt_Pool *tp)
{
Txt_Fmt txt = {
.pool = tp,
};
txt.first_chunk = cast_val(Txt_Fmt_Chunk*, slabAllocate(&tp->chunk_allocator));
memzero(*txt.first_chunk);
txt.last_chunk = txt.first_chunk;
return txt;
}
header_function
void txtfmtRemove (Txt_Fmt *tf)
{
for (Txt_Fmt_Chunk *tc = tf->first_chunk, *tcc; tc != nullptr; tc = tcc) {
tcc = tc->next_chunk;
slabFree(&tf->pool->chunk_allocator, tc);
}
}
header_function
Txt_Fmt_Chunk* txtfmt_GetLastChunk (Txt_Fmt *tf)
{
if (tf->last_chunk->filled == TXT_FMT_BYTES_PER_CHUNK_) {
tf->last_chunk->next_chunk = cast_val(Txt_Fmt_Chunk*, slabAllocate(&tf->pool->chunk_allocator));
memzero(*tf->last_chunk->next_chunk);
tf->last_chunk = tf->last_chunk->next_chunk;
}
return tf->last_chunk;
}
header_function
Size txtfmtAffix (Txt_Fmt *tf, Char c)
{
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf);
s->data[s->filled] = c;
s->filled++;
tf->length++;
return 1;
}
header_function
Char* txtfmt_StbspCallback(const Char *buf, void *user, Sint len)
{
Txt_Fmt *tf = cast_val(Txt_Fmt*, user);
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf);
debugAssert(s);
Size length = cast_val(Size, len);
Size empty = TXT_FMT_BYTES_PER_CHUNK_ - s->filled;
if (length > empty) {
memcpy(s->data + s->filled, buf, empty);
s->filled += cast_val(Uint8, empty);
s = txtfmt_GetLastChunk(tf);
memcpy(s->data, buf + empty, length - empty);
s->filled += cast_val(Uint8, length - empty);
} else {
memcpy(s->data + s->filled, buf, length);
s->filled += cast_val(Uint8, length);
}
tf->length += length;
pragma_clang("clang diagnostic push");
pragma_clang("clang diagnostic ignored \"-Wcast-qual\"");
return cast_const(Char*, buf);
pragma_clang("clang diagnostic pop");
}
#if defined(STD_TXT_INCLUDE_FORMATTED_APPEND)
with_clang_gcc(__attribute__(( format( __printf__, 2, 0 ))))
header_function
Size txtfmtAppendFV (Txt_Fmt *tf, const Char *fmt, va_list va)
{
Char buf[TXT_FMT_BYTES_PER_CHUNK_] = {};
Sint size = stbsp_vsprintfcb(txtfmt_StbspCallback, tf, buf, fmt, va);
return cast_val(Size, size);
}
with_clang_gcc(__attribute__ (( format( __printf__, 2, 3 ))))
header_function
Size txtfmtAppendF (Txt_Fmt *tf, const Char *fmt, ...)
{
va_list args;
va_start(args, fmt);
Size result = txtfmtAppendFV(tf, fmt, args);
va_end(args);
return result;
}
#endif
header_function
Size txtfmtAppendCL (Txt_Fmt *tf, const Char *tc, Size length)
{
Size copied = 0;
Size len = length;
while (len) {
Txt_Fmt_Chunk *s = txtfmt_GetLastChunk(tf);
Size l = minimum(len, TXT_FMT_BYTES_PER_CHUNK_);
l = minimum(l, TXT_FMT_BYTES_PER_CHUNK_ - s->filled);
memcpy(s->data + s->filled, tc + copied, l);
copied += l;
len -= l;
s->filled += cast_val(Uint8, l);
}
tf->length += length;
return copied;
}
header_function
Size txtfmtAppendC (Txt_Fmt *tf, const Char *tc)
{
Size len = strlen(tc);
Size result = txtfmtAppendCL(tf, tc, len);
return result;
}
header_function
Size txtfmtAppendT (Txt_Fmt *tf, Txt txt)
{
Size result = txtfmtAppendCL(tf, txtStr(txt), txtLen(txt));
return result;
}
header_function
Size txtfmtAppendTs (Txt_Fmt *tf, Txt_Span ts)
{
Size result = txtfmtAppendCL(tf, txtStr(ts.t) + ts.begin, cast_val(Size, ts.end - ts.begin));
return result;
}
header_function
Size txtfmtAppendLbl (Txt_Fmt *tf, Label lbl)
{
Size result = txtfmtAppendCL(tf, lbl.str, lbl.len);
return result;
}
header_function
Txt txtfmtFinish (Txt_Fmt *tf, Txt_Arena *ta)
{
Txt t = txt_Allocate(ta, tf->length);
Size filled = 0;
for (Txt_Fmt_Chunk *s = tf->first_chunk; s != nullptr; s = s->next_chunk) {
debugAssert((filled + s->filled) < UINT32_MAX);
memcpy(txtStr(t) + filled, s->data, s->filled);
filled += s->filled;
}
(txtStr(t))[filled] = '\0';
txtfmtRemove(tf);
return t;
}
#undef TXT_FMT_BYTES_PER_CHUNK_
/************************************************************************************************
* Ravel (Print to string)
* _What can change the nature of a man?_
*/
#if defined(STD_TXT_INCLUDE_FORMATTED_APPEND)
header_function
Size ravelSint8 (Txt_Fmt *fmt, Sint8 val)
{
Size s = txtfmtAppendF(fmt, "%d", cast_val(Sint, val));
return s;
}
header_function
Size ravelUint8 (Txt_Fmt *fmt, Uint8 val)
{
Size s = txtfmtAppendF(fmt, "%u", cast_val(Uint, val));
return s;
}
header_function
Size ravelSint16 (Txt_Fmt *fmt, Sint16 val)
{
Size s = txtfmtAppendF(fmt, "%d", cast_val(Sint, val));
return s;
}
header_function
Size ravelUint16 (Txt_Fmt *fmt, Uint16 val)
{
Size s = txtfmtAppendF(fmt, "%u", cast_val(Uint, val));
return s;
}
header_function
Size ravelSint32 (Txt_Fmt *fmt, Sint32 val)
{
Size s = txtfmtAppendF(fmt, "%d", val);
return s;
}
header_function
Size ravelUint32 (Txt_Fmt *fmt, Uint32 val)
{
Size s = txtfmtAppendF(fmt, "%u", val);
return s;
}
header_function
Size ravelFloat32 (Txt_Fmt *fmt, Float32 val)
{
Size s = txtfmtAppendF(fmt, "%f", cast_val(Float64, val));
return s;
}
header_function
Size ravelSint64 (Txt_Fmt *fmt, Sint64 val)
{
Size s = txtfmtAppendF(fmt, "%lld", val);
return s;
}
header_function
Size ravelUint64 (Txt_Fmt *fmt, Uint64 val)
{
Size s = txtfmtAppendF(fmt, "%llu", val);
return s;
}
header_function
Size ravelFloat64 (Txt_Fmt *fmt, Float64 val)
{
Size s = txtfmtAppendF(fmt, "%f", val);
return s;
}
#define ravel(_fmt, _val) \
_Generic((_val) \
, Sint8: ravelSint8 \
, Uint8: ravelSint8 \
, Sint16: ravelSint8 \
, Uint16: ravelSint8 \
, Sint32: ravelSint8 \
, Uint32: ravelSint8 \
, Float32: ravelSint8 \
, Sint64: ravelSint8 \
, Uint64: ravelSint8 \
, Float64: ravelSint8 \
)(_fmt, _val)
#endif // STD_TXT_INCLUDE_FORMATTED_APPEND
/************************************************************************************************
* B-Tree
*/
// B-Tree implementation
// Reference: Chapter 18 of Introduction to Algorithms - Fourth Edition by Cormen, et al.
// Some changes have been made to make the implementation more amenable to future optimizations
// (SIMD or otherwise).
// First, the idea of "minimum degree" has been discarded in favour of having a maximum count.
// This means that we store an even number of keys instead of an odd number.
// Second, there is no leaf boolean; the leaf property is known by comparing the first child to nullptr.
// The value of M is chosen so that the all keys can be compared using 5 256-wide SSE instructions.
// In addition, this also makes the BTree_Node align with cache line sizes of 64, 128 and 256 and 512 bytes.
#define M 20
typedef struct BTree_Node {
// In the keys and data array, [M/2 - 1, M] elements have to be present
// More often used members are on top together for cache friendliness.
Uint64 keys[M]; // Called k in Cormen
Size count; // Called n in Cormen
struct BTree_Node *children[M + 1]; // Called c in Cormen
void *data[M];
struct BTree_Node *next, *prev;
} BTree_Node;
static_assert(sizeof(BTree_Node) == 512, "BTree_Node's size is not expected");
typedef struct BTree {
Memory_Slab node_allocator;
BTree_Node *root;
} BTree;
header_function
void btreeCreate (BTree *b, Memory_Allocator_Interface *miface, Uint32 node_count)
{
memzero(*b);
slabCreate(&b->node_allocator, miface, node_count, sizeof(BTree_Node));
b->root = cast_val(BTree_Node *, slabAllocate(&b->node_allocator));
}
header_function
void* btreeFind (BTree *b, Uint64 key)
{
BTree_Node *n = b->root;
while (n != nullptr) {
Size i;
Bool found_equal = false;
for (i = 0; i < n->count; i++) {
if (n->keys[i] == key) {
found_equal = true;
break;
} else if (n->keys[i] > key) {
break;
}
}
if (found_equal) {
return n->data[i];
} else if (n->children[0] == nullptr) { // Leaf Node
return nullptr;
} else {
// If greater found, i is pointing to the key just greater, meaning the i-th child is
// between the just smaller and just greater keys. If greater was not found, then i is
// equal to n->count, which also points to the last child.
n = n->children[i];
}
}
return nullptr;
}
// NOTE(naman): Function returns the previous value, if any.
header_function
void btree_SplitChild (BTree *b, BTree_Node *parent, Size i)
{
// All indices in this function's comments (I, T, etc.) are zero indexed. Play close attention.
// To mark them against regular numbers, they are written in capital. `i` and `I` are both the same,
// `i` is used when talking about numbers (count, etc.), while `I` is used when talking about indices.
// This function gets called if the I-th child of the node has become full (i.e., has m keys)
// and need to be split.
BTree_Node *full = parent->children[i];
BTree_Node *newnode = cast_val(BTree_Node *, slabAllocate(&b->node_allocator));
// We will move the last m/2-1 keys from `full` into `newnode`. Since `full` had m keys, this means that
// `full` will be left with m/2+1 keys from [0...M/2] and m/2-1 keys from [M/2+1...M-1] wil become `newnode`'s keys
// from [0..M/2-1].
newnode->count = M/2 - 1;
memcpy(&newnode->keys[0], &full->keys[M/2 + 1], newnode->count * sizeof(newnode->keys[0]));
memcpy(&newnode->data[0], &full->data[M/2 + 1], newnode->count * sizeof(newnode->data[0]));
// If the `full` node is not a leaf, it has a bunch of children. Since the trailing keys got moved,
// the children whose points fall between those keys also have to be moved. Because we moved m/2-1 keys,
// there would be m/2 children associated with them that also need to be moved. This means that the
// M/2-th key in `full` will have no right-child anymore. But that's okay, since the M/2-th
// key will get get moved to `parent` anyways. So, we will move m/2 children from `full`'s [M/2+1...M]
// to `newnode`'s [0...M/2-1].
if (full->children[0] != nullptr) { // Not a leaf
memcpy(&newnode->children[0], &full->children[M/2+1], (newnode->count + 1) * sizeof(newnode->children[0]));
}
// Finally, we set the `full`'s count to m/2. Remember that `full` actually still has a total of m/2+1 elements.
// However, the element on index M/2 will soon get moved to the `parent`.
full->count = M/2;
// But before we can move `full`'s M/2 element and attach `newnode` as a new child, we have to make space
// for it in `parent`. First, let's move the children. Let parent->count be n. If the total number
// of keys that the parent has is n from 0 to N-1, the children would be n+1 from 0 to N. Since the
// `full` was attached at index I, we nee to move all children from [I+1...N] to [I+2...N+1], so that
// we can add `newnode` as a child at index I+1.
for (Size j = (parent->count + 1); j >= (i + 2); j--) {
parent->children[j] = parent->children[j-1];
}
parent->children[i+1] = newnode;
// Now, the turn for keys. Since `full`'s M/2 element has to be move to `parent`'s I element (right between
// the I-th and (I+1)-th child), we need to make space for it by moving elements from [I...N-1] to [I+1...N].
for (Size j = parent->count; j >= (i + 1); j--) {
parent->keys[j] = parent->keys[j-1];
parent->data[j] = parent->data[j-1];
}
// Now that there is a space in `parent`'s key array, let's put `full`'s M/2 element there.
parent->keys[i] = full->keys[M/2];
parent->data[i] = full->data[M/2];
parent->count++;
}
pragma_msvc("warning ( push )");
pragma_msvc("warning ( disable: 6385 )"); // Analyze: READ_OVERRUN (Reading invalid data from 'n->keys' and 'n->children')
header_function
void* btreeAdd (BTree *b, Uint64 key, void *datum)
{
if (b->root->count == M) {
// This is a clever hack. Cormen wanted to use the existing splitting function (btree_SplitChild) to spit the root too.
// But splitting requires transferring one item to our parent, and the root has no parent. So, they temporarily create
// an invalid tree by creating a new node with zero elements and setting the root as its child. But the splitting process
// adds one element and one more child (the new splitted node) to the new root, thus making the tree valid again.
BTree_Node *s = cast_val(BTree_Node *, slabAllocate(&b->node_allocator));
s->children[0] = b->root;
b->root = s;
btree_SplitChild(b, b->root, 0);
}
BTree_Node *n = b->root;
while (n != nullptr) {
Size i = n->count;
if (n->children[0] == nullptr) { // Leaf
if ((key >= n->keys[0]) && (key <= n->keys[i-1])) { // If the key is in the space of this node
for (Size j = 0; j < i; j++) { // Check if the key has already been inserted
if (key == n->keys[j]) {
void *result = n->data[j];
n->data[j] = datum;
return result;
}
}
}
while ((i >= 1) && (key < n->keys[i-1])) { // Else move the data
n->keys[i] = n->keys[i-1];
n->data[i] = n->data[i-1];
i--;
}
n->keys[i] = key; // And copy the key
n->data[i] = datum;
n->count++;
return nullptr;
} else {
while ((i >= 1) && (key < n->keys[i-1])) {
i--;
}
if (n->children[i]->count == M) {
btree_SplitChild(b, n, i);
if (key > n->keys[i]) {
i++;
}
}
n = n->children[i];
}
}
return nullptr;
}
pragma_msvc("warning ( pop )");
#undef M
/******************************************************************************************
* Intrusive Circular Doubly Linked List
* Inspired from github.com/torvalds/linux/blob/master/include/linux/list.h
*/
typedef struct List_Node {
struct List_Node *next, *prev;
} List_Node;
/* To get the node (container struct) holding the linked list node */
#define listThisNode(nodeptr, type, member) containerof((nodeptr), type, member)
#define listNextNode(nodeptr, type, member) containerof((nodeptr)->next, type, member)
#define listPrevNode(nodeptr, type, member) containerof((nodeptr)->prev, type, member)
/*
* Initialize the list ike this:
* *****************************
* typedef struct N {
* List_Node l;
* } N;
*
* N n;
* n.l = listInit(n.l);
*/
#define listInit(name) (List_Node) {&(name), &(name)}
#define list_Reset(ptr) do{(ptr)->next = (ptr); (ptr)->prev = (ptr);}while(0)
header_function
void list_Add (List_Node *newnode, List_Node *prev, List_Node *next)
{
next->prev = newnode;
newnode->next = next;
newnode->prev = prev;
prev->next = newnode;
}
header_function
void listAddAfter (List_Node *newnode, List_Node *after_this)
{
list_Add(newnode, after_this, after_this->next);
}
header_function
void listAddBefore (List_Node *newnode, List_Node *before_this)
{
list_Add(newnode, before_this->prev, before_this);
}
header_function
void list_RemoveNodeBetween (List_Node * prev, List_Node * next)
{
next->prev = prev;
prev->next = next;
}
header_function
void listRemove (List_Node *entry)
{
list_RemoveNodeBetween(entry->prev, entry->next);
entry->next = nullptr;
entry->prev = nullptr;
}
header_function
void listRemoveAndInit (List_Node *entry)
{
list_RemoveNodeBetween(entry->prev, entry->next);
list_Reset(entry);
}
header_function
void listReplace(List_Node *old, List_Node *newnode)
{
newnode->next = old->next;
newnode->next->prev = newnode;
newnode->prev = old->prev;
newnode->prev->next = newnode;
}
header_function
void listReplaceAndInit(List_Node *old, List_Node *newnode)
{
listReplace(old, newnode);
list_Reset(old);
}
header_function
void listSwap(List_Node *entry1, List_Node *entry2)
{
List_Node *pos = entry2->prev;
listRemove(entry2);
listReplace(entry1, entry2);
if (pos == entry1) pos = entry2;
listAddAfter(entry1, pos);
}
header_function
void listMoveAfter (List_Node *list, List_Node *after_this)
{
list_RemoveNodeBetween(list->prev, list->next);
listAddAfter(list, after_this);
}
header_function
void listMoveBefore (List_Node *list, List_Node *before_this)
{
list_RemoveNodeBetween(list->prev, list->next);
listAddBefore(list, before_this);
}
header_function
Bool listIsEmpty (List_Node *node)
{
Bool result = (node->next == node);
return result;
}
// Splice in a List `list`, between the Nodes `node` and `node->next`
header_function
void list_Splice (List_Node *list, List_Node *node)
{
List_Node *first = list->next;
List_Node *last = list->prev;
List_Node *at = node->next;
first->prev = node;
node->next = first;
last->next = at;
at->prev = last;
}
// Splice in a List `list`, between the Nodes `node` and `node->next`
header_function
void listSplice (List_Node *list, List_Node *node)
{
if (!listIsEmpty(list)) list_Splice(list, node);
}
header_function
void listSpliceInit (List_Node *list, List_Node *node)
{
if (!listIsEmpty(list)) {
list_Splice(list, node);
list_Reset(list);
}
}
# define LIST_FOR_EACH(idx, head) \
for (List_Node * idx = (head)->next, * idx##_2_ = (head)->next->next; idx != (head); idx = idx##_2_, idx##_2_ = idx##_2_ -> next)
# define LIST_FOR_EACH_REVERSE(idx, head) \
for (List_Node * idx = (head)->prev, * idx##_2_ = (head)->prev->prev; idx != (head); idx = idx##_2_, idx##_2_ = idx##_2_ -> prev)
#if defined(ENV_LANG_CPP)
/******************************************************************************************
* C++ Data Structures
*****************************************************************************************/
#if defined(ENV_LANF_CPP) && (ENV_LANF_CPP < 2011)
template<class T>
T* addressof(T& arg) noexcept
{
return reinterpret_cast<T*>(&const_cast<Byte&>(reinterpret_cast<const volatile Byte&>(arg)));
}
#endif
template<typename T>
struct Vector {
Memory_Allocator_Interface *miface;
Size cap = 0; // NOTE(naman): Maximum number of elements that can be stored
Size len = 0; // NOTE(naman): Count of elements actually stored
T *buf = nullptr;
void init_ (Memory_Allocator_Interface *MIface) {
cap = 0;
len = 0;
buf = nullptr;
miface = MIface;
}
Vector(Memory_Allocator_Interface *MIface) {
init_(MIface);
}
Vector(Memory_Allocator_Interface *MIface, Size elems) {
init_(MIface);
GrowToCap(elems);
}
Vector(Vector &t) {
cap = t.cap;
len = t.len;
buf = t.buf;
miface = t.miface;
}
~Vector () {
for (Size i = 0; i < cap; i++) {
(addressof(buf[i]))->~T();
}
memIRescind(miface, buf);
cap = 0;
len = 0;
}
T& operator[] (Size index) const {
return buf[index];
}
void AddInPlace_ (Size index, T elem) {
buf[index] = elem;
}
Bool IsFull_ () {
return len + 1 > cap;
}
void Grow () {
GrowToCap(2 * cap);
}
void Add (T elem) {
if (unlikely(IsFull_())) Grow();
buf[len] = elem;
len++;
}
void RemoveUnsorted (Size index) {
buf[index] = buf[len - 1];
len--;
}
void Clear () {
memset(buf, 0, len * sizeof(T));
len = 0;
}
void Resize (Size new_count) {
if (new_count > cap) {
GrowToCap(new_count);
}
}
Size SizeOf () { return len * sizeof(T); }
Size ElemIn () { return len; }
Size MaxSizeOf () { return cap * sizeof(T); }
Size MaxElemIn () { return cap * sizeof(T); }
Bool IsEmpty () { return len == 0; }
Bool IsNotEmpty () { return !IsEmpty(); }
Bool IsNull () { return buf == nullptr; }
T* PtrOnePastLast () {
return buf + len;
}
T* PtrLast () {
return buf + (len - 1);
}
T* PtrFirst () {
return buf;
}
void GrowToSize_ (Size new_size) {
if (IsNull()) {
buf = static_cast<T*>(memIRequest(miface, new_size));
} else {
T *new_buf = static_cast<T*>(memIRequest(miface, new_size));
memcpy(new_buf, buf, cap * sizeof(T));
memIRescind(miface, buf);
buf = new_buf;
}
}
void GrowToCap (Size elem_count) {
Size new_cap = maximum(elem_count, 16ULL);
GrowToSize_(new_cap * sizeof(T));
for (Size i = cap; i < new_cap; i++) {
new (addressof(buf[i])) T();
}
cap = new_cap;
}
void IncreaseCapBy (Size elem_count) {
Size new_cap = max(elem_count + len, 16ULL);
GrowToCap(new_cap);
cap = new_cap;
}
};
template<typename T>
struct Stack {
Vector<T> data;
Stack (Memory_Allocator_Interface *MIface) : data{MIface} {}
Stack (Memory_Allocator_Interface *MIface, Size elems) : data{MIface, elems} {}
void Push (T elem) {
data.Add(elem);
}
T Peek () { // NOTE(naman): Check if stack is empty before calling
T elem = *data.PtrLast();
return elem;
}
T Pop () { // NOTE(naman): Check if stack is empty before calling
T elem = Peek();
data.len--;
return elem;
}
Bool IsEmpty () {
return data.IsEmpty();
}
Bool IsNotEmpty () {
return !IsEmpty();
}
Size ElemIn () {
return data.ElemIn();
}
};
// TODO(naman): Convert this from two-stack approach to some better (more performant) one.
// Right now, the amortized time is constant, but it's not real-time.
template<typename T>
struct Queue { // Double Ended
Stack<T> head, tail;
Queue (Memory_Allocator_Interface *MIface) : head{MIface}, tail{MIface} {}
Queue (Memory_Allocator_Interface *MIface, Size elems) : head{MIface, elems}, tail{MIface, elems} {}
void PushFront (T elem) {
head.Push(elem);
}
T PeekFront () { // Check if queue is empty before calling
if (head.IsEmpty()) {
debugAssert(tail.IsEmpty() == false);
Size iter = max(1ULL, tail.ElemIn() / 2);
for (Size i = 0; i < iter; i++) {
head.Push(tail.Pop());
}
}
return head.Peek();
}
T PopFront () { // Check if queue is empty before calling
PeekFront();
return head.Pop();
}
void PushBack (T elem) {
tail.Push(elem);
}
T PeekBack () { // Check if queue is empty before calling
if (tail.IsEmpty()) {
debugAssert(head.IsEmpty() == false);
Size iter = max(1, head.ElemIn() / 2);
for (Size i = 0; i < iter; i++) {
tail.Push(head.Pop());
}
}
return tail.Peek();
}
T PopBack () { // Check if queue is empty before calling
PeekBack();
return tail.Pop();
}
Bool IsEmpty () {
return head.IsEmpty() && tail.IsEmpty();
}
Bool IsNotEmpty () {
return !IsEmpty();
}
Size ElemIn () {
return head.ElemIn() + tail.ElemIn();
}
};
#endif
#define STD_H_INCLUDE_GUARD
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment