/* * ST: Streaming Trace - A tiny tracing library * Copyright (C) 2025 Dominic Hoeglinger * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "st.h" #define ST_NIBBLE_TO_HEX(nibble) ((nibble) < 10 ? (nibble) + '0' : (nibble) - 10 + 'A') #define ST_NIBBLE(value, nibble) ((value >> nibble) & 0xF) #define ST_BYTE(value, offset) (char)(((value >> (8 * offset)) & 0xFF)) #define ST_ENCODE(value, nibble) (char)(ST_NIBBLE_TO_HEX(ST_NIBBLE(value, nibble))) #define ST_NELEMS(a) (sizeof(a) / sizeof(a[0])) #define ST_MACROPACK_FRAME_SIZE (2 * 8) #define ST_MACROPACK_SIZE(n) (ST_MACROPACK_FRAME_SIZE * n + (ST_MACROPACK_FRAME_SIZE * n) / 254 + 1 + n - 1) #define ST_MAX_RENDER_SIZE(n) (ST_MACROPACK_SIZE(n) + (ST_MACROPACK_SIZE(n) / 254) + 1) #define FASTLZ1_MAX_COPY 32 #define FASTLZ1_MAX_LEN 264 /* 256 + 8 */ #define FASTLZ1_MAX_L1_DISTANCE 8192 #define FASTLZ1_MAX_L2_DISTANCE 8191 #define FASTLZ1_MAX_FARDISTANCE (65535 + FASTLZ1_MAX_L2_DISTANCE - 1) #define FASTLZ1_HASH_LOG 13 #define FASTLZ1_HASH_SIZE (1 << FASTLZ1_HASH_LOG) #define FASTLZ1_HASH_MASK (FASTLZ1_HASH_SIZE - 1) typedef enum { st_d1 = 0x11, st_d2 = 0x12, st_d4 = 0x14, st_v1 = 0x21, st_v2 = 0x22, st_v4 = 0x24, st_v8 = 0x28, st_a1 = 0x31, st_a2 = 0x32, st_a4 = 0x34, st_s4 = 0x44, st_f4 = 0x54, st_f8 = 0x58, st_ev = 0x60, } st_type_t; typedef enum { st_packet_z = (1 << 0), } st_packetflags_t; typedef struct { const char* m_tag; uint16_t m_hash; } st_hashcache_t; typedef union { uint32_t u32value; uint64_t u64value; #if (!ST_FLOAT_DISABLE) float f32value; double f64value; #endif } st_convert_t; static bool s_enabled = false; static uint32_t s_last_time = 0; st_overflow_policy_t s_policy = st_overwrite; static st_trace_t* s_tracebuffer; static size_t s_tracebuffer_size = 0; static size_t s_tracebuffer_head = 0; static size_t s_tracebuffer_tail = 0; static size_t s_tracebuffer_items = 0; static bool s_tracebuffer_full = false; static bool s_locked = false; static bool s_tty_enabled = false; static size_t s_tty_rubout_len = 0; static uint32_t s_diag_avg_compression_total = 0; static uint32_t s_diag_avg_compression_n = 0; static uint32_t s_diag_items_sent = 0; static uint32_t s_diag_avg_ctime_total = 0; static uint32_t s_diag_avg_ctime_n = 0; static uint32_t s_diag_avg_rtime_total = 0; static uint32_t s_diag_avg_rtime_n = 0; #if (ST_HASHCACHE_LINES > 0) static st_hashcache_t s_hashcache[ST_HASHCACHE_LINES] = {0}; static size_t s_hashcache_index = 0; #endif size_t render_macropacket_payload(char buffer[], const size_t buffer_size, size_t n); static size_t frame_preamble(char buffer[], const size_t buffer_size); static size_t frame_epilouge(char buffer[], const size_t buffer_size); static size_t pack_frame(char packet_bytes[], const st_trace_t* const p_packet); static size_t ttyize_payload(char packet_buffer[], const size_t packet_size, const size_t buffer_size); static uint16_t bsd_hash(const char* const str, size_t n); static bool populate_time_delta_packet(const uint32_t timestamp, st_trace_t* const p_trace); static void tracebuffer_add(const uint32_t timestamp, const st_type_t type, const uint32_t value, const uint8_t sub, char const *tag); size_t cobs_encode(uint8_t* dst, size_t dst_buf_len, const uint8_t* ptr, int len, uint8_t delim); static int fastlz1_compress(const void* input, int length, void* output); void st_init(st_trace_t tracebuffer[], size_t ntraces) { s_tracebuffer = tracebuffer; s_tracebuffer_size = ntraces; } size_t st_get_buffer_items(void) { return s_tracebuffer_items; } void st_tty_mode(const bool enable) { s_tty_enabled = enable; } size_t st_tty_rubout(char buffer[], const size_t buffer_size) { const size_t rubout_packet_size = 3 + s_tty_rubout_len + 3; size_t buffer_pos = 0; if (buffer_size < rubout_packet_size) { return 0; } buffer_pos += frame_preamble(buffer, buffer_size); for (size_t i = 0; i < s_tty_rubout_len; ++i) { buffer[buffer_pos + i] = ' '; } buffer_pos += s_tty_rubout_len; buffer_pos += frame_epilouge(&buffer[buffer_pos], buffer_size - buffer_pos); s_tty_rubout_len = 0; return buffer_pos; } size_t st_output(size_t n, const bool compress, const bool rubout) { const size_t render_number_items = 4; const size_t render_max_size = ST_MAX_RENDER_SIZE(render_number_items); char packet_bytes[ST_MAX_RENDER_SIZE(4)] = {0}; size_t packet_size = 0; size_t i = 0; n = (n == 0) ? s_tracebuffer_size : n; while ((s_tracebuffer_full || (s_tracebuffer_tail != s_tracebuffer_head)) && (i < n)) { packet_size = st_render(packet_bytes, render_max_size, render_number_items, compress); i += render_number_items; st_out(packet_bytes, packet_size); } if (rubout) { packet_size = st_tty_rubout(packet_bytes, render_max_size); st_out(packet_bytes, packet_size); } return i; } size_t render_macropacket_payload(char buffer[], const size_t buffer_size, size_t n) { const size_t max_render_size = (8 + 1 + 1) * 2 - 1; char payload_buffer[8] = {0}; char delimimter = 0x00; st_trace_t timestamp_pack = {0}; size_t payload_size = 0; size_t i = 0; size_t buffer_pos = 0; size_t encode_size = 0; n = (n == 0) ? s_tracebuffer_size : n; while ((s_tracebuffer_full || (s_tracebuffer_tail != s_tracebuffer_head)) && (i < n) && ((buffer_pos + max_render_size) < buffer_size)) { if (populate_time_delta_packet(s_tracebuffer[s_tracebuffer_tail].m_timestamp, ×tamp_pack)) { payload_size = pack_frame(payload_buffer, ×tamp_pack); encode_size = cobs_encode(&buffer[buffer_pos], buffer_size - buffer_pos, payload_buffer, payload_size, delimimter); buffer_pos += encode_size; buffer[buffer_pos] = delimimter; buffer_pos++; } payload_size = pack_frame(payload_buffer, &s_tracebuffer[s_tracebuffer_tail]); encode_size = cobs_encode(&buffer[buffer_pos], buffer_size - buffer_pos, payload_buffer, payload_size, delimimter); buffer_pos += encode_size; buffer[buffer_pos] = delimimter; buffer_pos++; s_tracebuffer_tail = (s_tracebuffer_tail + 1) % s_tracebuffer_size; s_tracebuffer_full = false; s_tracebuffer_items = (s_tracebuffer_items > 0) ? (s_tracebuffer_items - 1) : 0; i++; s_diag_items_sent++; } // remove last delimiter if(buffer_pos) buffer_pos--; return buffer_pos; } size_t st_render(char buffer[], const size_t buffer_size, const size_t n, const bool compress) { const char delimimter = '\033'; size_t n_elems = (n == 0) ? s_tracebuffer_items : n; n_elems = (s_tty_enabled && (n > 4)) ? 4 : n_elems; size_t render_max_size = ST_MAX_RENDER_SIZE(n_elems); uint32_t ticks_render_start = st_timestamp(); char *final_payload_buffer = NULL; size_t final_payload_size = 0; while ((buffer_size < (1 + (render_max_size * 2))) && (n_elems > 0)) { n_elems--; render_max_size = ST_MAX_RENDER_SIZE(n_elems); } if (n_elems == 0) return 0; if (compress) { size_t macropack_payload_size = render_macropacket_payload(buffer, render_max_size, n_elems); if (macropack_payload_size == 0) return 0; size_t compressed_size = macropack_payload_size + (macropack_payload_size*15)/100; char *compressed_buffer = &buffer[buffer_size - compressed_size]; uint32_t ticks_comp_start = st_timestamp(); compressed_size = fastlz1_compress(buffer, macropack_payload_size, compressed_buffer); final_payload_buffer = compressed_buffer; final_payload_size = compressed_size; s_diag_avg_ctime_total += st_timestamp() - ticks_comp_start; s_diag_avg_ctime_n++; if (macropack_payload_size > 0) { s_diag_avg_compression_total += 100 - ((100 * compressed_size) / macropack_payload_size); s_diag_avg_compression_n++; } } else { char *macropack_buffer = &buffer[buffer_size - render_max_size]; size_t macropack_payload_size = render_macropacket_payload(macropack_buffer, render_max_size, n_elems); if (macropack_payload_size == 0) return 0; final_payload_buffer = macropack_buffer; final_payload_size = macropack_payload_size; } size_t out_pos = frame_preamble(buffer, buffer_size); uint8_t flags = 0; if (compress) flags |= (uint8_t)st_packet_z; buffer[out_pos] = flags; out_pos++; size_t payload_size = cobs_encode(&buffer[out_pos], buffer_size - out_pos, final_payload_buffer, final_payload_size, delimimter); if (s_tty_enabled == true) { payload_size = ttyize_payload(&buffer[out_pos], payload_size, buffer_size - out_pos); if (payload_size > s_tty_rubout_len) { s_tty_rubout_len = payload_size; } } out_pos += payload_size; out_pos += frame_epilouge(&buffer[out_pos], buffer_size - out_pos); s_diag_avg_rtime_total += st_timestamp() - ticks_render_start; s_diag_avg_rtime_n++; return out_pos; } void st_enable(const st_overflow_policy_t policy) { s_policy = policy; s_enabled = true; } void st_disable(void) { s_enabled = false; } void st_clear(void) { s_tracebuffer_tail = s_tracebuffer_head; s_tracebuffer_full = false; } void st_diagtrace(void) { const uint32_t buffer_health = ((s_tracebuffer_size - s_tracebuffer_items) * 0xFF)/s_tracebuffer_size; st_u32trace("ST.BufferItems", s_tracebuffer_items, false); st_u8trace("ST.BufferHealth", (uint8_t)buffer_health, true); if (s_diag_avg_compression_n > 0) { const uint32_t average = s_diag_avg_compression_total / s_diag_avg_compression_n; st_u8trace("ST.CompressionLevel", (uint8_t)average, true); s_diag_avg_compression_total = 0; s_diag_avg_compression_n = 0; } if (s_diag_items_sent > 0) { st_u32trace("ST.ItemsSent", s_diag_items_sent, true); s_diag_items_sent = 0; } if (s_diag_avg_ctime_n > 0) { const uint32_t average = s_diag_avg_ctime_total / s_diag_avg_ctime_n; st_u8trace("ST.CompressionTime", (uint8_t)average, true); s_diag_avg_ctime_total = 0; s_diag_avg_ctime_n = 0; } if (s_diag_avg_rtime_n > 0) { const uint32_t average = s_diag_avg_rtime_total / s_diag_avg_rtime_n; st_u8trace("ST.RenderTime", (uint8_t)average, true); s_diag_avg_rtime_total = 0; s_diag_avg_rtime_n = 0; } } void st_evtrace(const char* const tag, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_ev, 1UL, 0U, tag); } } void st_u8trace(const char* const tag, const uint8_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v1, (uint32_t)value, 0U, tag); } } void st_u16trace(const char* const tag, const uint16_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v2, (uint32_t)value, 0U, tag); } } void st_u32trace(const char* const tag, const uint32_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v4, (uint32_t)value, 0U, tag); } } void st_u64trace(const char* const tag, const uint64_t value, const bool skip_time) { if (s_enabled == true) { uint32_t ts = skip_time ? 0UL : st_timestamp(); tracebuffer_add(ts, st_v8, (uint32_t)(value & 0xFFFFFFFF), 0U, tag); tracebuffer_add(ts, st_v8, (uint32_t)(value >> 32), 1U, tag); } } void st_s8trace(const char* const tag, const int8_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v1, (uint32_t)value, 0U, tag); } } void st_s16trace(const char* const tag, const int16_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v2, (uint32_t)value, 0U, tag); } } void st_s32trace(const char* const tag, const int32_t value, const bool skip_time) { if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_v4, (uint32_t)value, 0U, tag); } } void st_s64trace(const char* const tag, const int64_t value, const bool skip_time) { if (s_enabled == true) { uint32_t ts = skip_time ? 0UL : st_timestamp(); tracebuffer_add(ts, st_v8, (uint32_t)(value & 0xFFFFFFFF), 0U, tag); tracebuffer_add(ts, st_v8, (uint32_t)(value >> 32), 1U, tag); } } void st_au8trace(const char* const tag, const uint8_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { tracebuffer_add(ts, st_a1, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_au16trace(const char* const tag, const uint16_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { tracebuffer_add(ts, st_a2, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_au32trace(const char* const tag, const uint32_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { uint16_t sub = (i) | (size << 8); tracebuffer_add(ts, st_a4, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_as8trace(const char* const tag, const int8_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { tracebuffer_add(ts, st_a1, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_as16trace(const char* const tag, const int16_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { tracebuffer_add(ts, st_a2, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_as32trace(const char* const tag, const int32_t value[const], const uint8_t size, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); for (size_t i = 0; i < size; ++i) { uint16_t sub = (i) | (size << 8); tracebuffer_add(ts, st_a4, (uint32_t)value[i], (uint8_t)i, tag); } } } void st_strtrace(const char* const tag, const char* const str, const bool skip_time) { if (s_enabled == true) { const size_t ts = skip_time ? 0UL : st_timestamp(); uint32_t packword = 0; uint8_t packindex = 0; for (size_t i = 0; str[i] != '\0'; ++i) { packindex = (i % 4); packword |= (str[i] << (8 * packindex)); if (packindex == 3) { tracebuffer_add(ts, st_s4, packword, (uint8_t)str[i + 1] == '\0', tag); packword = 0; } } if (packindex != 3) { tracebuffer_add(ts, st_s4, packword, (uint8_t)true, tag); } } } #if (!ST_FLOAT_DISABLE) void st_f32trace(const char* const tag, const float value, const bool skip_time) { st_convert_t converter = { .f32value = value }; if (s_enabled == true) { tracebuffer_add(skip_time ? 0UL : st_timestamp(), st_f4, converter.u32value, 0U, tag); } } void st_f64trace(const char* const tag, const double value, const bool skip_time) { st_convert_t converter = { .f64value = value }; if (s_enabled == true) { uint32_t ts = skip_time ? 0UL : st_timestamp(); tracebuffer_add(ts, st_f8, (uint32_t)(converter.u64value & 0xFFFFFFFF), 0U, tag); tracebuffer_add(ts, st_f8, (uint32_t)(converter.u64value >> 32), 1U, tag); } } #endif static size_t frame_preamble(char buffer[], const size_t buffer_size) { size_t pack_pos = 0; if (buffer_size >= 3) { buffer[pack_pos++] = '\033'; buffer[pack_pos++] = '['; buffer[pack_pos++] = 's'; } return pack_pos; } static size_t frame_epilouge(char buffer[], const size_t buffer_size) { size_t pack_pos = 0; if (buffer_size >= 3) { buffer[pack_pos++] = '\033'; buffer[pack_pos++] = '['; buffer[pack_pos++] = 'u'; } return pack_pos; } static size_t pack_frame(char packet_bytes[], const st_trace_t* const p_packet) { size_t packet_size = 0; packet_bytes[packet_size++] = (char)p_packet->m_type; switch ((st_type_t)p_packet->m_type) { case st_v1: case st_a1: case st_d1: packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 0); break; case st_v2: case st_a2: case st_d2: packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 0); packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 1); break; case st_v4: case st_a4: case st_s4: case st_f4: case st_d4: case st_v8: case st_f8: packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 0); packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 1); packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 2); packet_bytes[packet_size++] = ST_BYTE(p_packet->m_value, 3); break; } switch ((st_type_t)p_packet->m_type) { case st_a1: case st_a2: case st_a4: case st_s4: case st_v8: case st_f8: packet_bytes[packet_size++] = (char)p_packet->m_sub; break; } switch ((st_type_t)p_packet->m_type) { case st_d1: case st_d2: case st_d4: break; default: const uint16_t hash_tag = bsd_hash(p_packet->m_tag, ST_MAX_TAG_LEN); packet_bytes[packet_size++] = ST_BYTE(hash_tag, 0); packet_bytes[packet_size++] = ST_BYTE(hash_tag, 1); break; } return packet_size; } static size_t ttyize_payload(char packet_buffer[], const size_t packet_size, const size_t buffer_size) { const size_t tty_packet_size = (packet_size * 2); size_t backpos = 0; char value = 0; if (tty_packet_size > buffer_size) return 0; for (size_t i = 0; i < packet_size; ++i) { value = packet_buffer[(packet_size - i - 1)]; backpos = (packet_size - i - 1) * 2; packet_buffer[backpos] = ST_ENCODE(value, 0); packet_buffer[backpos + 1] = ST_ENCODE(value, 4); } return tty_packet_size; } static uint16_t bsd_hash(const char* const str, size_t n) { uint32_t checksum = 0; if (str == NULL) return 0U; #if (ST_HASHCACHE_LINES > 0) for (size_t i = 0; i < ST_HASHCACHE_LINES; ++i) { size_t hash_index = (i + s_hashcache_index) % ST_HASHCACHE_LINES; if (s_hashcache[hash_index].m_tag == str) { return s_hashcache[hash_index].m_hash; } } #endif for (size_t i = 0; (i < n) && (str[i] != '\0'); ++i) { checksum = (checksum >> 1) + ((checksum & 1) << 15); checksum += str[i]; checksum &= 0xFFFF; } #if (ST_HASHCACHE_LINES > 0) s_hashcache_index = (s_hashcache_index + 1) % ST_HASHCACHE_LINES; s_hashcache[s_hashcache_index].m_hash = checksum; s_hashcache[s_hashcache_index].m_tag = str; #endif return checksum; } static bool populate_time_delta_packet(const uint32_t timestamp, st_trace_t* const p_trace) { const uint32_t dt = timestamp - s_last_time; uint16_t type = 0; if (timestamp == 0UL) return false; if (dt != 0) { if (dt > 0xFFFF) { type = st_d4; } else if (dt > 0xFF) { type = st_d2; } else { type = st_d1; } p_trace->m_type = type; p_trace->m_value = dt; p_trace->m_sub = 0U; p_trace->m_tag = NULL; p_trace->m_timestamp = 0UL; s_last_time = timestamp; return true; } return false; } static void tracebuffer_add(const uint32_t timestamp, const st_type_t type, const uint32_t value, const uint8_t sub, char const *tag) { const size_t insertion_pos = s_tracebuffer_head; uint32_t critical_h = 0UL; if (s_tracebuffer_full == false || s_policy == st_overwrite) { st_crit_on(&critical_h); s_tracebuffer_head = (s_tracebuffer_head + 1) % s_tracebuffer_size; s_tracebuffer_full = (s_tracebuffer_head == s_tracebuffer_tail); s_tracebuffer_items++; st_crit_off(critical_h); s_tracebuffer[insertion_pos].m_type = (uint8_t)type; s_tracebuffer[insertion_pos].m_value = value; s_tracebuffer[insertion_pos].m_sub = sub; s_tracebuffer[insertion_pos].m_tag = tag; s_tracebuffer[insertion_pos].m_timestamp = timestamp; } } size_t cobs_encode(uint8_t* dst, size_t dst_buf_len, const uint8_t* ptr, int len, uint8_t delim) { size_t encode_pos = 1; size_t ins = 0; uint8_t code = 1; bool add_last_code = true; size_t dst_size = 0; for (size_t i = 0; i < len; ++i) { uint8_t byte = ptr[i]; if (byte != 0) { dst[encode_pos] = byte ^ delim; encode_pos++; code = code + 1; } add_last_code = true; if ((byte == 0) || (code == 0xFF)) { if (code == 0xFF) add_last_code = false; dst[ins] = code ^ delim; code = 1; ins = encode_pos; dst[encode_pos] = 0xFF ^ delim; encode_pos++; } } if (add_last_code) { dst[ins] = code ^ delim; dst[encode_pos] = delim; encode_pos++; } else { dst[ins] = delim; } encode_pos--; return encode_pos; } static uint32_t flz_readu32(const void* ptr) { const uint8_t* p = (const uint8_t*)ptr; return (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; } static uint16_t flz_hash(uint32_t v) { uint32_t h = (v * 2654435769LL) >> (32 - FASTLZ1_HASH_LOG); return h & FASTLZ1_HASH_MASK; } static void st_memcpy(uint8_t* dest, const uint8_t* src, uint32_t count) { for(size_t i = 0; i < count; ++i) { dest[i] = src[i]; } } static uint32_t flz_cmp(const uint8_t* p, const uint8_t* q, const uint8_t* r) { const uint8_t* start = p; while (q < r) if (*p++ != *q++) break; return p - start; } static uint8_t* flz_literals(uint32_t runs, const uint8_t* src, uint8_t* dest) { while (runs >= FASTLZ1_MAX_COPY) { *dest++ = FASTLZ1_MAX_COPY - 1; st_memcpy(dest, src, FASTLZ1_MAX_COPY); src += FASTLZ1_MAX_COPY; dest += FASTLZ1_MAX_COPY; runs -= FASTLZ1_MAX_COPY; } if (runs > 0) { *dest++ = runs - 1; st_memcpy(dest, src, runs); dest += runs; } return dest; } static uint8_t* flz1_match(uint32_t len, uint32_t distance, uint8_t* op) { --distance; if (len > FASTLZ1_MAX_LEN - 2) while (len > FASTLZ1_MAX_LEN - 2) { *op++ = (7 << 5) + (distance >> 8); *op++ = FASTLZ1_MAX_LEN - 2 - 7 - 2; *op++ = (distance & 255); len -= FASTLZ1_MAX_LEN - 2; } if (len < 7) { *op++ = (len << 5) + (distance >> 8); *op++ = (distance & 255); } else { *op++ = (7 << 5) + (distance >> 8); *op++ = len - 7; *op++ = (distance & 255); } return op; } static int fastlz1_compress(const void* input, int length, void* output) { const uint8_t* ip = (const uint8_t*)input; const uint8_t* ip_start = ip; const uint8_t* ip_bound = ip + length - 4; /* because readU32 */ const uint8_t* ip_limit = ip + length - 12 - 1; uint8_t* op = (uint8_t*)output; uint32_t htab[FASTLZ1_HASH_SIZE]; uint32_t seq, hash; /* initializes hash table */ for (hash = 0; hash < FASTLZ1_HASH_SIZE; ++hash) htab[hash] = 0; /* we start with literal copy */ const uint8_t* anchor = ip; ip += 2; /* main loop */ while (ip < ip_limit) { const uint8_t* ref; uint32_t distance, cmp; /* find potential match */ do { seq = flz_readu32(ip) & 0xffffff; hash = flz_hash(seq); ref = ip_start + htab[hash]; htab[hash] = ip - ip_start; distance = ip - ref; cmp = (distance < FASTLZ1_MAX_L1_DISTANCE) ? flz_readu32(ref) & 0xffffff : 0x1000000; if (ip >= ip_limit) break; ++ip; } while (seq != cmp); if (ip >= ip_limit) break; --ip; if (ip > anchor) { op = flz_literals(ip - anchor, anchor, op); } uint32_t len = flz_cmp(ref + 3, ip + 3, ip_bound); op = flz1_match(len, distance, op); /* update the hash at match boundary */ ip += len; seq = flz_readu32(ip); hash = flz_hash(seq & 0xffffff); htab[hash] = ip++ - ip_start; seq >>= 8; hash = flz_hash(seq); htab[hash] = ip++ - ip_start; anchor = ip; } uint32_t copy = (uint8_t*)input + length - anchor; op = flz_literals(copy, anchor, op); return op - (uint8_t*)output; }