/* Written by Krzysztof Kowalczyk (http://blog.kowalczyk.info) The author disclaims copyright to this source code. */ /* The most basic things, including string handling functions */ #include "base_util.h" #include "str_util.h" #include "str_strsafe.h" /* TODO: should probably be based on MSVC version */ #if defined(__GNUC__) || !defined(_WIN32) || (_MSC_VER < 1400) void strcpy_s(char *dst, size_t dstLen, const char *src) { size_t toCopy; assert(dst); assert(src); assert(dstLen > 0); if (!dst || !src || dstLen <= 0) return; toCopy = strlen(src); if (toCopy > (dstLen-1)) toCopy = dstLen - 1; strncpy(dst, src, toCopy); dst[toCopy] = 0; } #endif void no_op(void) { /* This really is a no-op, just to silence the compiler */ } int char_is_ws_or_zero(char c) { switch (c) { case ' ': case '\t': case '\r': case '\n': case 0: return TRUE; } return FALSE; } int char_is_ws(char c) { switch (c) { case ' ': case '\t': case '\r': case '\n': return TRUE; } return FALSE; } int char_is_digit(char c) { if ((c >= '0') && (c <= '9')) return TRUE; return FALSE; } /* Concatenate 4 strings. Any string can be NULL. Caller needs to free() memory. */ char *str_cat4(const char *str1, const char *str2, const char *str3, const char *str4) { char *str; char *tmp; size_t str1_len = 0; size_t str2_len = 0; size_t str3_len = 0; size_t str4_len = 0; if (str1) str1_len = strlen(str1); if (str2) str2_len = strlen(str2); if (str3) str3_len = strlen(str3); if (str4) str4_len = strlen(str4); str = (char*)zmalloc(str1_len + str2_len + str3_len + str4_len + 1); if (!str) return NULL; tmp = str; if (str1) { memcpy(tmp, str1, str1_len); tmp += str1_len; } if (str2) { memcpy(tmp, str2, str2_len); tmp += str2_len; } if (str3) { memcpy(tmp, str3, str3_len); tmp += str3_len; } if (str4) { memcpy(tmp, str4, str1_len); } return str; } /* Concatenate 3 strings. Any string can be NULL. Caller needs to free() memory. */ char *str_cat3(const char *str1, const char *str2, const char *str3) { return str_cat4(str1, str2, str3, NULL); } /* Concatenate 2 strings. Any string can be NULL. Caller needs to free() memory. */ char *str_cat(const char *str1, const char *str2) { return str_cat4(str1, str2, NULL, NULL); } char *str_dup(const char *str) { return str_cat4(str, NULL, NULL, NULL); } char *str_dupn(const char *str, size_t str_len_cch) { char *copy; if (!str) return NULL; copy = (char*)malloc(str_len_cch+1); if (!copy) return NULL; memcpy(copy, str, str_len_cch); copy[str_len_cch] = 0; return copy; } int str_copyn(char *dst, size_t dst_cch_size, const char *src, size_t src_cch_size) { char *end = dst + dst_cch_size - 1; if (0 == dst_cch_size) { if (0 == src_cch_size) return TRUE; else return FALSE; } while ((dst < end) && (src_cch_size > 0)) { *dst++ = *src++; --src_cch_size; } *dst = 0; if (0 == src_cch_size) return TRUE; else return FALSE; } int str_copy(char *dst, size_t dst_cch_size, const char *src) { char *end = dst + dst_cch_size - 1; if (0 == dst_cch_size) return FALSE; while ((dst < end) && *src) { *dst++ = *src++; } *dst = 0; if (0 == *src) return TRUE; else return FALSE; } int str_eq(const char *str1, const char *str2) { if (!str1 && !str2) return TRUE; if (!str1 || !str2) return FALSE; if (0 == strcmp(str1, str2)) return TRUE; return FALSE; } int str_ieq(const char *str1, const char *str2) { if (!str1 && !str2) return TRUE; if (!str1 || !str2) return FALSE; if (0 == _stricmp(str1, str2)) return TRUE; return FALSE; } int str_eqn(const char *str1, const char *str2, int len) { if (!str1 && !str2) return TRUE; if (!str1 || !str2) return FALSE; if (0 == strncmp(str1, str2, len)) return TRUE; return FALSE; } /* return true if 'str' starts with 'txt', case-sensitive */ int str_startswith(const char *str, const char *txt) { if (!str && !txt) return TRUE; if (!str || !txt) return FALSE; if (0 == strncmp(str, txt, strlen(txt))) return TRUE; return FALSE; } /* return true if 'str' starts with 'txt', NOT case-sensitive */ int str_startswithi(const char *str, const char *txt) { if (!str && !txt) return TRUE; if (!str || !txt) return FALSE; if (0 == _strnicmp(str, txt, strlen(txt))) return TRUE; return FALSE; } int str_endswith(const char *txt, const char *end) { size_t end_len; size_t txt_len; if (!txt || !end) return FALSE; txt_len = strlen(txt); end_len = strlen(end); if (end_len > txt_len) return FALSE; if (str_eq(txt+txt_len-end_len, end)) return TRUE; return FALSE; } int str_endswithi(const char *txt, const char *end) { size_t end_len; size_t txt_len; if (!txt || !end) return FALSE; txt_len = strlen(txt); end_len = strlen(end); if (end_len > txt_len) return FALSE; if (str_ieq(txt+txt_len-end_len, end)) return TRUE; return FALSE; } int str_endswith_char(const char *str, char c) { char end[2]; end[0] = c; end[1] = 0; return str_endswith(str, end); } int str_empty(const char *str) { if (!str) return TRUE; if (0 == *str) return TRUE; return FALSE; } /* Find character 'c' in string 'txt'. Return pointer to this character or NULL if not found */ const char *str_find_char(const char *txt, char c) { while (*txt != c) { if (0 == *txt) return NULL; ++txt; } return txt; } /* split a string '*txt' at the border character 'c'. Something like python's string.split() except called iteratively. Returns a copy of the string (must be free()d by the caller). Returns NULL to indicate there's no more items. */ char *str_split_iter(char **txt, char c) { const char *tmp; const char *pos; char *result; tmp = (const char*)*txt; if (!tmp) return NULL; pos = str_find_char(tmp, c); if (pos) { result = str_dupn(tmp, (int)(pos-tmp)); *txt = (char*)pos+1; } else { result = str_dup(tmp); *txt = NULL; /* next iteration will return NULL */ } return result; } /* Replace all posible versions (Unix, Windows, Mac) of newline character with 'replace'. Returns newly allocated string with normalized newlines or NULL if error. Caller needs to free() the result */ char *str_normalize_newline(const char *txt, const char *replace) { size_t replace_len; char c; char * result; const char * tmp; char * tmp_out; size_t result_len = 0; replace_len = strlen(replace); tmp = txt; for (;;) { c = *tmp++; if (!c) break; if (0xa == c) { /* a single 0xa => Unix */ result_len += replace_len; } else if (0xd == c) { if (0xa == *tmp) { /* 0xd 0xa => dos */ result_len += replace_len; ++tmp; } else { /* just 0xd => Mac */ result_len += replace_len; } } else ++result_len; } if (0 == result_len) return NULL; result = (char*)malloc(result_len+1); if (!result) return NULL; tmp_out = result; for (;;) { c = *txt++; if (!c) break; if (0xa == c) { /* a single 0xa => Unix */ memcpy(tmp_out, replace, replace_len); tmp_out += replace_len; } else if (0xd == c) { if (0xa == *txt) { /* 0xd 0xa => dos */ memcpy(tmp_out, replace, replace_len); tmp_out += replace_len; ++txt; } else { /* just 0xd => Mac */ memcpy(tmp_out, replace, replace_len); tmp_out += replace_len; } } else *tmp_out++ = c; } *tmp_out = 0; return result; } #define WHITE_SPACE_CHARS " \n\t\r" /* Strip all 'to_strip' characters from the beginning of the string. Does stripping in-place */ void str_strip_left(char *txt, const char *to_strip) { char *new_start = txt; char c; if (!txt || !to_strip) return; for (;;) { c = *new_start; if (0 == c) break; if (!str_contains(to_strip, c)) break; ++new_start; } if (new_start != txt) { memmove(txt, new_start, strlen(new_start)+1); } } /* Strip white-space characters from the beginning of the string. Does stripping in-place */ void str_strip_ws_left(char *txt) { str_strip_left(txt, WHITE_SPACE_CHARS); } void str_strip_right(char *txt, const char *to_strip) { char * new_end; char c; if (!txt || !to_strip) return; if (0 == *txt) return; /* point at the last character in the string */ new_end = txt + strlen(txt) - 1; for (;;) { c = *new_end; if (!str_contains(to_strip, c)) break; if (txt == new_end) break; --new_end; } if (str_contains(to_strip, *new_end)) new_end[0] = 0; else new_end[1] = 0; } void str_strip_ws_right(char *txt) { str_strip_right(txt, WHITE_SPACE_CHARS); } void str_strip_both(char *txt, const char *to_strip) { str_strip_left(txt, to_strip); str_strip_right(txt, to_strip); } void str_strip_ws_both(char *txt) { str_strip_ws_left(txt); str_strip_ws_right(txt); } #if 0 int utf8_eq(const utf8* str1, const utf8* str2) { return str_eq(str1, str2); } int utf8_eqn(const utf8* str1, const utf8* str2, int len) { return str_eqn(str1, str2, len); } int utf8_copy(utf8 *dst, int dst_size_bytes, utf8* src) { return str_copy(dst, dst_size_bytes, src); } utf8 *utf8_dup(const utf8 *str) { return str_dup(str); } utf8 *utf8_cat4(const utf8 *str1, const utf8 *str2, const utf8 *str3, const utf8 *str4) { return str_cat4(str1, str2, str3, str4); } utf8 *utf8_cat3(const utf8 *str1, const utf8 *str2, const utf8 *str3) { return str_cat4(str1, str2, str3, NULL); } utf8 *utf8_cat(const utf8 *str1, const utf8 *str2) { return str_cat4(str1, str2, NULL, NULL); } int utf8_endswith(const utf8 *str, const utf8 *end) { return str_endswith(str, end); } #endif #define HEX_NUMBERS "0123456789ABCDEF" static void char_to_hex(unsigned char c, char* buffer) { buffer[0] = HEX_NUMBERS[c / 16]; buffer[1] = HEX_NUMBERS[c % 16]; } int str_contains(const char *str, char c) { const char *pos = str_find_char(str, c); if (!pos) return FALSE; return TRUE; } #define CHAR_URL_DONT_ENCODE "-_.!~*'()" int char_needs_url_encode(char c) { if ((c >= 'a') && (c <= 'z')) return FALSE; if ((c >= 'A') && (c <= 'Z')) return FALSE; if ((c >= '0') && (c <= '9')) return FALSE; if (str_contains(CHAR_URL_DONT_ENCODE, c)) return FALSE; return TRUE; } /* url-encode 'str'. Returns NULL in case of error. Caller needs to free() the result */ char *str_url_encode(const char *str) { char * encoded; char * result; int res_len = 0; const char * tmp = str; /* calc the size of the string after url encoding */ while (*tmp) { if (char_needs_url_encode(*tmp)) res_len += 3; else ++res_len; tmp++; } if (0 == res_len) return NULL; encoded = (char*)malloc(res_len+1); if (!encoded) return NULL; result = encoded; tmp = str; while (*tmp) { if (char_needs_url_encode(*tmp)) { *encoded++ = '%'; char_to_hex(*tmp, encoded); encoded += 2; } else { if (' ' == *tmp) *encoded++ = '+'; else *encoded++ = *tmp; } tmp++; } *encoded = 0; return result; } char *str_escape(const char *txt) { /* TODO: */ return str_dup(txt); } char *str_printf(const char *format, ...) { char *result; va_list args; va_start(args, format); result = str_printf_args(format, args); va_end(args); return result; } char *str_printf_args(const char *format, va_list args) { #ifdef _WIN32 HRESULT hr; char message[256]; char * buf; size_t bufCchSize; char * result = NULL; buf = &(message[0]); bufCchSize = sizeof(message); for (;;) { /* TODO: this only works on windows with recent C library */ hr = StringCchVPrintfA(buf, bufCchSize, format, args); if (S_OK == hr) break; if (STRSAFE_E_INSUFFICIENT_BUFFER != hr) { /* any error other than buffer not big enough: a) should not happen b) means we give up */ assert(FALSE); goto Error; } /* we have to make the buffer bigger. The algorithm used to calculate the new size is arbitrary (aka. educated guess) */ if (buf != &(message[0])) free(buf); if (bufCchSize < 4*1024) bufCchSize += bufCchSize; else bufCchSize += 1024; buf = (char *)malloc(bufCchSize*sizeof(char)); if (NULL == buf) goto Error; } /* free the buffer if it was dynamically allocated */ if (buf == &(message[0])) return str_dup(buf); return buf; Error: if (buf != &(message[0])) free((void*)buf); return NULL; #else char* buf; int len = vasprintf(&buf, format, args); return buf; #endif } #ifdef _WIN32 void win32_dbg_out(const char *format, ...) { char buf[4096]; char * p = buf; int written; va_list args; va_start(args, format); written = _vsnprintf(p,sizeof(buf), format, args); /* printf(buf); fflush(stdout); */ OutputDebugStringA(buf); va_end(args); } void win32_dbg_out_hex(const char *dsc, const unsigned char *data, int dataLen) { unsigned char buf[64+1]; unsigned char * curPos; int bufCharsLeft; if (dsc) win32_dbg_out(dsc); /* a bit dangerous if contains formatting codes */ if (!data) return; bufCharsLeft = sizeof(buf)-1; curPos = buf; while (dataLen > 0) { if (bufCharsLeft <= 1) { *curPos = 0; win32_dbg_out((char*)buf); bufCharsLeft = sizeof(buf)-1; curPos = buf; } char_to_hex(*data, curPos); curPos += 2; bufCharsLeft -= 2; --dataLen; ++data; } if (curPos != buf) { *curPos = 0; win32_dbg_out(buf); } win32_dbg_out("\n"); } #endif /* Given a pointer to a string in '*txt', skip past whitespace in the string and put the result in '*txt' */ void str_skip_ws(char **txtInOut) { char *cur; if (!txtInOut) return; cur = *txtInOut; if (!cur) return; while (char_is_ws(*cur)) { ++cur; } *txtInOut = cur; } char *str_parse_quoted(char **txt) { char * strStart; char * strCopy; char * cur; char * dst; char c; size_t len; assert(txt); if (!txt) return NULL; strStart = *txt; assert(strStart); if (!strStart) return NULL; assert('"' == *strStart); /* TODO: rewrite as 2-phase logic so that counting and copying are always in sync */ ++strStart; cur = strStart; len = 0; for (;;) { c = *cur; if ((0 == c) || ('"' == c)) break; if ('\\' == c) { /* TODO: should I un-escape more than '"' ? I used to un-escape '\' as well, but it wasn't right and files with UNC path like "\\foo\file.pdf" failed to load */ if ('"' == cur[1]) { ++cur; c = *cur; } } ++cur; ++len; } strCopy = (char*)malloc(len+1); if (!strCopy) return NULL; cur = strStart; dst = strCopy; for (;;) { c = *cur; if (0 == c) break; if ('"' == c) { ++cur; break; } if ('\\' == c) { /* TODO: should I un-escape more than '"' ? I used to un-escape '\' as well, but it wasn't right and files with UNC path like "\\foo\file.pdf" failed to load */ if ('"' == cur[1]) { ++cur; c = *cur; } } *dst++ = c; ++cur; } *dst = 0; *txt = cur; return strCopy; } char *str_parse_non_quoted(char **txt) { char * cur; char * strStart; char * strCopy; char c; size_t strLen; strStart = *txt; assert(strStart); if (!strStart) return NULL; assert('"' != *strStart); cur = strStart; for (;;) { c = *cur; if (char_is_ws_or_zero(c)) break; ++cur; } strLen = cur - strStart; assert(strLen > 0); strCopy = str_dupn(strStart, strLen); *txt = cur; return strCopy; } /* 'txt' is path that can be: - escaped, in which case it starts with '"', ends with '"' and each '"' that is part of the name is escaped with '\' - unescaped, in which case it start with != '"' and ends with ' ' or eol (0) This function extracts escaped or unescaped path from 'txt'. Returns NULL in case of error. Caller needs to free() the result. */ char *str_parse_possibly_quoted(char **txt) { char * cur; char * str_copy; if (!txt) return NULL; cur = *txt; if (!cur) return NULL; str_skip_ws(&cur); if (0 == *cur) return NULL; if ('"' == *cur) str_copy = str_parse_quoted(&cur); else str_copy = str_parse_non_quoted(&cur); *txt = cur; return str_copy; } void str_array_init(str_array *str_arr) { assert(str_arr); if (!str_arr) return; memzero(str_arr, sizeof(str_array)); } void str_array_free(str_array *str_arr) { int i; assert(str_arr); if (!str_arr) return; for (i = 0; i < str_arr->items_count; i++) free(str_arr->items[i]); free(str_arr->items); str_array_init(str_arr); } void str_array_delete(str_array *str_arr) { assert(str_arr); if (!str_arr) return; str_array_free(str_arr); free((void*)str_arr); } str_item *str_array_get(str_array *str_arr, int index) { assert(str_arr); if (!str_arr) return NULL; assert(index >= 0); assert(index < str_arr->items_count); if ((index < 0) || (index >= str_arr->items_count)) return NULL; return str_arr->items[index]; } int str_array_get_count(str_array *str_arr) { assert(str_arr); if (!str_arr) return 0; return str_arr->items_count; } /* Set one string at position 'index' in 'str_arr'. Space for the item must already be allocated. */ str_item *str_array_set(str_array *str_arr, int index, const char *str) { str_item * new_item; size_t str_len_cch; assert(str_arr); if (!str_arr) return NULL; if (index >= str_arr->items_count) return NULL; str_len_cch = str_len(str); new_item = (str_item*)malloc(sizeof(str_item) + str_len_cch*sizeof(char)); if (!new_item) return NULL; str_copy(new_item->str, str_len_cch+1, str); if (str_arr->items[index]) free(str_arr->items[index]); str_arr->items[index] = new_item; return new_item; } #define STR_ARR_GROW_VALUE 32 /* make a generic array alloc */ str_item *str_array_add(str_array *str_arr, const char *str) { str_item ** tmp; str_item * new_item; void * data; int n; if (str_arr->items_count >= str_arr->items_allocated) { /* increase memory for items if necessary */ n = str_arr->items_allocated + STR_ARR_GROW_VALUE; tmp = (str_item**)realloc(str_arr->items, n * sizeof(str_item *)); if (!tmp) return NULL; str_arr->items = tmp; data = &(str_arr->items[str_arr->items_count]); memzero(data, STR_ARR_GROW_VALUE * sizeof(str_item *)); str_arr->items_allocated = n; } str_arr->items_count++; new_item = str_array_set(str_arr, str_arr->items_count - 1, str); if (!new_item) --str_arr->items_count; return new_item; } int str_array_exists_no_case(str_array *str_arr, const char *str) { int count, i; str_item * item; char * item_str; if (!str_arr || !str) return FALSE; count = str_arr->items_count; for (i = 0; i < count; i++) { item = str_arr->items[i]; item_str = item->str; if (str_ieq(str, item_str)) return TRUE; } return FALSE; } str_item *str_array_add_no_dups(str_array *str_arr, const char *str) { if (str_array_exists_no_case(str_arr, str)) return NULL; return str_array_add(str_arr, str); }