reactos/rosapps/smartpdf/baseutils/str_util.c
Daniel Reimer a7fddf9c07 Delete all Trailing spaces in code.
svn path=/trunk/; revision=29689
2007-10-19 23:05:02 +00:00

971 lines
21 KiB
C

/* Written by Krzysztof Kowalczyk (http://blog.kowalczyk.info)
The author disclaims copyright to this source code. */
/* The most basic things, including string handling functions */
#include "base_util.h"
#include "str_util.h"
#include "str_strsafe.h"
/* TODO: should probably be based on MSVC version */
#if defined(__GNUC__) || !defined(_WIN32) || (_MSC_VER < 1400)
void strcpy_s(char *dst, size_t dstLen, const char *src)
{
size_t toCopy;
assert(dst);
assert(src);
assert(dstLen > 0);
if (!dst || !src || dstLen <= 0)
return;
toCopy = strlen(src);
if (toCopy > (dstLen-1))
toCopy = dstLen - 1;
strncpy(dst, src, toCopy);
dst[toCopy] = 0;
}
#endif
void no_op(void)
{
/* This really is a no-op, just to silence the compiler */
}
int char_is_ws_or_zero(char c)
{
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case 0:
return TRUE;
}
return FALSE;
}
int char_is_ws(char c)
{
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
return TRUE;
}
return FALSE;
}
int char_is_digit(char c)
{
if ((c >= '0') && (c <= '9'))
return TRUE;
return FALSE;
}
/* Concatenate 4 strings. Any string can be NULL.
Caller needs to free() memory. */
char *str_cat4(const char *str1, const char *str2, const char *str3, const char *str4)
{
char *str;
char *tmp;
size_t str1_len = 0;
size_t str2_len = 0;
size_t str3_len = 0;
size_t str4_len = 0;
if (str1)
str1_len = strlen(str1);
if (str2)
str2_len = strlen(str2);
if (str3)
str3_len = strlen(str3);
if (str4)
str4_len = strlen(str4);
str = (char*)zmalloc(str1_len + str2_len + str3_len + str4_len + 1);
if (!str)
return NULL;
tmp = str;
if (str1) {
memcpy(tmp, str1, str1_len);
tmp += str1_len;
}
if (str2) {
memcpy(tmp, str2, str2_len);
tmp += str2_len;
}
if (str3) {
memcpy(tmp, str3, str3_len);
tmp += str3_len;
}
if (str4) {
memcpy(tmp, str4, str1_len);
}
return str;
}
/* Concatenate 3 strings. Any string can be NULL.
Caller needs to free() memory. */
char *str_cat3(const char *str1, const char *str2, const char *str3)
{
return str_cat4(str1, str2, str3, NULL);
}
/* Concatenate 2 strings. Any string can be NULL.
Caller needs to free() memory. */
char *str_cat(const char *str1, const char *str2)
{
return str_cat4(str1, str2, NULL, NULL);
}
char *str_dup(const char *str)
{
return str_cat4(str, NULL, NULL, NULL);
}
char *str_dupn(const char *str, size_t str_len_cch)
{
char *copy;
if (!str)
return NULL;
copy = (char*)malloc(str_len_cch+1);
if (!copy)
return NULL;
memcpy(copy, str, str_len_cch);
copy[str_len_cch] = 0;
return copy;
}
int str_copyn(char *dst, size_t dst_cch_size, const char *src, size_t src_cch_size)
{
char *end = dst + dst_cch_size - 1;
if (0 == dst_cch_size) {
if (0 == src_cch_size)
return TRUE;
else
return FALSE;
}
while ((dst < end) && (src_cch_size > 0)) {
*dst++ = *src++;
--src_cch_size;
}
*dst = 0;
if (0 == src_cch_size)
return TRUE;
else
return FALSE;
}
int str_copy(char *dst, size_t dst_cch_size, const char *src)
{
char *end = dst + dst_cch_size - 1;
if (0 == dst_cch_size)
return FALSE;
while ((dst < end) && *src) {
*dst++ = *src++;
}
*dst = 0;
if (0 == *src)
return TRUE;
else
return FALSE;
}
int str_eq(const char *str1, const char *str2)
{
if (!str1 && !str2)
return TRUE;
if (!str1 || !str2)
return FALSE;
if (0 == strcmp(str1, str2))
return TRUE;
return FALSE;
}
int str_ieq(const char *str1, const char *str2)
{
if (!str1 && !str2)
return TRUE;
if (!str1 || !str2)
return FALSE;
if (0 == _stricmp(str1, str2))
return TRUE;
return FALSE;
}
int str_eqn(const char *str1, const char *str2, int len)
{
if (!str1 && !str2)
return TRUE;
if (!str1 || !str2)
return FALSE;
if (0 == strncmp(str1, str2, len))
return TRUE;
return FALSE;
}
/* return true if 'str' starts with 'txt', case-sensitive */
int str_startswith(const char *str, const char *txt)
{
if (!str && !txt)
return TRUE;
if (!str || !txt)
return FALSE;
if (0 == strncmp(str, txt, strlen(txt)))
return TRUE;
return FALSE;
}
/* return true if 'str' starts with 'txt', NOT case-sensitive */
int str_startswithi(const char *str, const char *txt)
{
if (!str && !txt)
return TRUE;
if (!str || !txt)
return FALSE;
if (0 == _strnicmp(str, txt, strlen(txt)))
return TRUE;
return FALSE;
}
int str_endswith(const char *txt, const char *end)
{
size_t end_len;
size_t txt_len;
if (!txt || !end)
return FALSE;
txt_len = strlen(txt);
end_len = strlen(end);
if (end_len > txt_len)
return FALSE;
if (str_eq(txt+txt_len-end_len, end))
return TRUE;
return FALSE;
}
int str_endswithi(const char *txt, const char *end)
{
size_t end_len;
size_t txt_len;
if (!txt || !end)
return FALSE;
txt_len = strlen(txt);
end_len = strlen(end);
if (end_len > txt_len)
return FALSE;
if (str_ieq(txt+txt_len-end_len, end))
return TRUE;
return FALSE;
}
int str_endswith_char(const char *str, char c)
{
char end[2];
end[0] = c;
end[1] = 0;
return str_endswith(str, end);
}
int str_empty(const char *str)
{
if (!str)
return TRUE;
if (0 == *str)
return TRUE;
return FALSE;
}
/* Find character 'c' in string 'txt'.
Return pointer to this character or NULL if not found */
const char *str_find_char(const char *txt, char c)
{
while (*txt != c) {
if (0 == *txt)
return NULL;
++txt;
}
return txt;
}
/* split a string '*txt' at the border character 'c'. Something like python's
string.split() except called iteratively.
Returns a copy of the string (must be free()d by the caller).
Returns NULL to indicate there's no more items. */
char *str_split_iter(char **txt, char c)
{
const char *tmp;
const char *pos;
char *result;
tmp = (const char*)*txt;
if (!tmp)
return NULL;
pos = str_find_char(tmp, c);
if (pos) {
result = str_dupn(tmp, (int)(pos-tmp));
*txt = (char*)pos+1;
} else {
result = str_dup(tmp);
*txt = NULL; /* next iteration will return NULL */
}
return result;
}
/* Replace all posible versions (Unix, Windows, Mac) of newline character
with 'replace'. Returns newly allocated string with normalized newlines
or NULL if error.
Caller needs to free() the result */
char *str_normalize_newline(const char *txt, const char *replace)
{
size_t replace_len;
char c;
char * result;
const char * tmp;
char * tmp_out;
size_t result_len = 0;
replace_len = strlen(replace);
tmp = txt;
for (;;) {
c = *tmp++;
if (!c)
break;
if (0xa == c) {
/* a single 0xa => Unix */
result_len += replace_len;
} else if (0xd == c) {
if (0xa == *tmp) {
/* 0xd 0xa => dos */
result_len += replace_len;
++tmp;
}
else {
/* just 0xd => Mac */
result_len += replace_len;
}
} else
++result_len;
}
if (0 == result_len)
return NULL;
result = (char*)malloc(result_len+1);
if (!result)
return NULL;
tmp_out = result;
for (;;) {
c = *txt++;
if (!c)
break;
if (0xa == c) {
/* a single 0xa => Unix */
memcpy(tmp_out, replace, replace_len);
tmp_out += replace_len;
} else if (0xd == c) {
if (0xa == *txt) {
/* 0xd 0xa => dos */
memcpy(tmp_out, replace, replace_len);
tmp_out += replace_len;
++txt;
}
else {
/* just 0xd => Mac */
memcpy(tmp_out, replace, replace_len);
tmp_out += replace_len;
}
} else
*tmp_out++ = c;
}
*tmp_out = 0;
return result;
}
#define WHITE_SPACE_CHARS " \n\t\r"
/* Strip all 'to_strip' characters from the beginning of the string.
Does stripping in-place */
void str_strip_left(char *txt, const char *to_strip)
{
char *new_start = txt;
char c;
if (!txt || !to_strip)
return;
for (;;) {
c = *new_start;
if (0 == c)
break;
if (!str_contains(to_strip, c))
break;
++new_start;
}
if (new_start != txt) {
memmove(txt, new_start, strlen(new_start)+1);
}
}
/* Strip white-space characters from the beginning of the string.
Does stripping in-place */
void str_strip_ws_left(char *txt)
{
str_strip_left(txt, WHITE_SPACE_CHARS);
}
void str_strip_right(char *txt, const char *to_strip)
{
char * new_end;
char c;
if (!txt || !to_strip)
return;
if (0 == *txt)
return;
/* point at the last character in the string */
new_end = txt + strlen(txt) - 1;
for (;;) {
c = *new_end;
if (!str_contains(to_strip, c))
break;
if (txt == new_end)
break;
--new_end;
}
if (str_contains(to_strip, *new_end))
new_end[0] = 0;
else
new_end[1] = 0;
}
void str_strip_ws_right(char *txt)
{
str_strip_right(txt, WHITE_SPACE_CHARS);
}
void str_strip_both(char *txt, const char *to_strip)
{
str_strip_left(txt, to_strip);
str_strip_right(txt, to_strip);
}
void str_strip_ws_both(char *txt)
{
str_strip_ws_left(txt);
str_strip_ws_right(txt);
}
#if 0
int utf8_eq(const utf8* str1, const utf8* str2)
{
return str_eq(str1, str2);
}
int utf8_eqn(const utf8* str1, const utf8* str2, int len)
{
return str_eqn(str1, str2, len);
}
int utf8_copy(utf8 *dst, int dst_size_bytes, utf8* src)
{
return str_copy(dst, dst_size_bytes, src);
}
utf8 *utf8_dup(const utf8 *str)
{
return str_dup(str);
}
utf8 *utf8_cat4(const utf8 *str1, const utf8 *str2, const utf8 *str3, const utf8 *str4)
{
return str_cat4(str1, str2, str3, str4);
}
utf8 *utf8_cat3(const utf8 *str1, const utf8 *str2, const utf8 *str3)
{
return str_cat4(str1, str2, str3, NULL);
}
utf8 *utf8_cat(const utf8 *str1, const utf8 *str2)
{
return str_cat4(str1, str2, NULL, NULL);
}
int utf8_endswith(const utf8 *str, const utf8 *end)
{
return str_endswith(str, end);
}
#endif
#define HEX_NUMBERS "0123456789ABCDEF"
static void char_to_hex(unsigned char c, char* buffer)
{
buffer[0] = HEX_NUMBERS[c / 16];
buffer[1] = HEX_NUMBERS[c % 16];
}
int str_contains(const char *str, char c)
{
const char *pos = str_find_char(str, c);
if (!pos)
return FALSE;
return TRUE;
}
#define CHAR_URL_DONT_ENCODE "-_.!~*'()"
int char_needs_url_encode(char c)
{
if ((c >= 'a') && (c <= 'z'))
return FALSE;
if ((c >= 'A') && (c <= 'Z'))
return FALSE;
if ((c >= '0') && (c <= '9'))
return FALSE;
if (str_contains(CHAR_URL_DONT_ENCODE, c))
return FALSE;
return TRUE;
}
/* url-encode 'str'. Returns NULL in case of error. Caller needs to free()
the result */
char *str_url_encode(const char *str)
{
char * encoded;
char * result;
int res_len = 0;
const char * tmp = str;
/* calc the size of the string after url encoding */
while (*tmp) {
if (char_needs_url_encode(*tmp))
res_len += 3;
else
++res_len;
tmp++;
}
if (0 == res_len)
return NULL;
encoded = (char*)malloc(res_len+1);
if (!encoded)
return NULL;
result = encoded;
tmp = str;
while (*tmp) {
if (char_needs_url_encode(*tmp)) {
*encoded++ = '%';
char_to_hex(*tmp, encoded);
encoded += 2;
} else {
if (' ' == *tmp)
*encoded++ = '+';
else
*encoded++ = *tmp;
}
tmp++;
}
*encoded = 0;
return result;
}
char *str_escape(const char *txt)
{
/* TODO: */
return str_dup(txt);
}
char *str_printf(const char *format, ...)
{
char *result;
va_list args;
va_start(args, format);
result = str_printf_args(format, args);
va_end(args);
return result;
}
char *str_printf_args(const char *format, va_list args)
{
#ifdef _WIN32
HRESULT hr;
char message[256];
char * buf;
size_t bufCchSize;
char * result = NULL;
buf = &(message[0]);
bufCchSize = sizeof(message);
for (;;)
{
/* TODO: this only works on windows with recent C library */
hr = StringCchVPrintfA(buf, bufCchSize, format, args);
if (S_OK == hr)
break;
if (STRSAFE_E_INSUFFICIENT_BUFFER != hr)
{
/* any error other than buffer not big enough:
a) should not happen
b) means we give up */
assert(FALSE);
goto Error;
}
/* we have to make the buffer bigger. The algorithm used to calculate
the new size is arbitrary (aka. educated guess) */
if (buf != &(message[0]))
free(buf);
if (bufCchSize < 4*1024)
bufCchSize += bufCchSize;
else
bufCchSize += 1024;
buf = (char *)malloc(bufCchSize*sizeof(char));
if (NULL == buf)
goto Error;
}
/* free the buffer if it was dynamically allocated */
if (buf == &(message[0]))
return str_dup(buf);
return buf;
Error:
if (buf != &(message[0]))
free((void*)buf);
return NULL;
#else
char* buf;
int len = vasprintf(&buf, format, args);
return buf;
#endif
}
#ifdef _WIN32
void win32_dbg_out(const char *format, ...)
{
char buf[4096];
char * p = buf;
int written;
va_list args;
va_start(args, format);
written = _vsnprintf(p,sizeof(buf), format, args);
/* printf(buf);
fflush(stdout); */
OutputDebugStringA(buf);
va_end(args);
}
void win32_dbg_out_hex(const char *dsc, const unsigned char *data, int dataLen)
{
unsigned char buf[64+1];
unsigned char * curPos;
int bufCharsLeft;
if (dsc) win32_dbg_out(dsc); /* a bit dangerous if contains formatting codes */
if (!data) return;
bufCharsLeft = sizeof(buf)-1;
curPos = buf;
while (dataLen > 0) {
if (bufCharsLeft <= 1) {
*curPos = 0;
win32_dbg_out((char*)buf);
bufCharsLeft = sizeof(buf)-1;
curPos = buf;
}
char_to_hex(*data, curPos);
curPos += 2;
bufCharsLeft -= 2;
--dataLen;
++data;
}
if (curPos != buf) {
*curPos = 0;
win32_dbg_out(buf);
}
win32_dbg_out("\n");
}
#endif
/* Given a pointer to a string in '*txt', skip past whitespace in the string
and put the result in '*txt' */
void str_skip_ws(char **txtInOut)
{
char *cur;
if (!txtInOut)
return;
cur = *txtInOut;
if (!cur)
return;
while (char_is_ws(*cur)) {
++cur;
}
*txtInOut = cur;
}
char *str_parse_quoted(char **txt)
{
char * strStart;
char * strCopy;
char * cur;
char * dst;
char c;
size_t len;
assert(txt);
if (!txt) return NULL;
strStart = *txt;
assert(strStart);
if (!strStart) return NULL;
assert('"' == *strStart);
/* TODO: rewrite as 2-phase logic so that counting and copying are always in sync */
++strStart;
cur = strStart;
len = 0;
for (;;) {
c = *cur;
if ((0 == c) || ('"' == c))
break;
if ('\\' == c) {
/* TODO: should I un-escape more than '"' ?
I used to un-escape '\' as well, but it wasn't right and
files with UNC path like "\\foo\file.pdf" failed to load */
if ('"' == cur[1]) {
++cur;
c = *cur;
}
}
++cur;
++len;
}
strCopy = (char*)malloc(len+1);
if (!strCopy)
return NULL;
cur = strStart;
dst = strCopy;
for (;;) {
c = *cur;
if (0 == c)
break;
if ('"' == c) {
++cur;
break;
}
if ('\\' == c) {
/* TODO: should I un-escape more than '"' ?
I used to un-escape '\' as well, but it wasn't right and
files with UNC path like "\\foo\file.pdf" failed to load */
if ('"' == cur[1]) {
++cur;
c = *cur;
}
}
*dst++ = c;
++cur;
}
*dst = 0;
*txt = cur;
return strCopy;
}
char *str_parse_non_quoted(char **txt)
{
char * cur;
char * strStart;
char * strCopy;
char c;
size_t strLen;
strStart = *txt;
assert(strStart);
if (!strStart) return NULL;
assert('"' != *strStart);
cur = strStart;
for (;;) {
c = *cur;
if (char_is_ws_or_zero(c))
break;
++cur;
}
strLen = cur - strStart;
assert(strLen > 0);
strCopy = str_dupn(strStart, strLen);
*txt = cur;
return strCopy;
}
/* 'txt' is path that can be:
- escaped, in which case it starts with '"', ends with '"' and each '"' that is part of the name is escaped
with '\'
- unescaped, in which case it start with != '"' and ends with ' ' or eol (0)
This function extracts escaped or unescaped path from 'txt'. Returns NULL in case of error.
Caller needs to free() the result. */
char *str_parse_possibly_quoted(char **txt)
{
char * cur;
char * str_copy;
if (!txt)
return NULL;
cur = *txt;
if (!cur)
return NULL;
str_skip_ws(&cur);
if (0 == *cur)
return NULL;
if ('"' == *cur)
str_copy = str_parse_quoted(&cur);
else
str_copy = str_parse_non_quoted(&cur);
*txt = cur;
return str_copy;
}
void str_array_init(str_array *str_arr)
{
assert(str_arr);
if (!str_arr) return;
memzero(str_arr, sizeof(str_array));
}
void str_array_free(str_array *str_arr)
{
int i;
assert(str_arr);
if (!str_arr) return;
for (i = 0; i < str_arr->items_count; i++)
free(str_arr->items[i]);
free(str_arr->items);
str_array_init(str_arr);
}
void str_array_delete(str_array *str_arr)
{
assert(str_arr);
if (!str_arr) return;
str_array_free(str_arr);
free((void*)str_arr);
}
str_item *str_array_get(str_array *str_arr, int index)
{
assert(str_arr);
if (!str_arr) return NULL;
assert(index >= 0);
assert(index < str_arr->items_count);
if ((index < 0) || (index >= str_arr->items_count))
return NULL;
return str_arr->items[index];
}
int str_array_get_count(str_array *str_arr)
{
assert(str_arr);
if (!str_arr) return 0;
return str_arr->items_count;
}
/* Set one string at position 'index' in 'str_arr'. Space for the item
must already be allocated. */
str_item *str_array_set(str_array *str_arr, int index, const char *str)
{
str_item * new_item;
size_t str_len_cch;
assert(str_arr);
if (!str_arr) return NULL;
if (index >= str_arr->items_count)
return NULL;
str_len_cch = str_len(str);
new_item = (str_item*)malloc(sizeof(str_item) + str_len_cch*sizeof(char));
if (!new_item)
return NULL;
str_copy(new_item->str, str_len_cch+1, str);
if (str_arr->items[index])
free(str_arr->items[index]);
str_arr->items[index] = new_item;
return new_item;
}
#define STR_ARR_GROW_VALUE 32
/* make a generic array alloc */
str_item *str_array_add(str_array *str_arr, const char *str)
{
str_item ** tmp;
str_item * new_item;
void * data;
int n;
if (str_arr->items_count >= str_arr->items_allocated) {
/* increase memory for items if necessary */
n = str_arr->items_allocated + STR_ARR_GROW_VALUE;
tmp = (str_item**)realloc(str_arr->items, n * sizeof(str_item *));
if (!tmp)
return NULL;
str_arr->items = tmp;
data = &(str_arr->items[str_arr->items_count]);
memzero(data, STR_ARR_GROW_VALUE * sizeof(str_item *));
str_arr->items_allocated = n;
}
str_arr->items_count++;
new_item = str_array_set(str_arr, str_arr->items_count - 1, str);
if (!new_item)
--str_arr->items_count;
return new_item;
}
int str_array_exists_no_case(str_array *str_arr, const char *str)
{
int count, i;
str_item * item;
char * item_str;
if (!str_arr || !str)
return FALSE;
count = str_arr->items_count;
for (i = 0; i < count; i++)
{
item = str_arr->items[i];
item_str = item->str;
if (str_ieq(str, item_str))
return TRUE;
}
return FALSE;
}
str_item *str_array_add_no_dups(str_array *str_arr, const char *str)
{
if (str_array_exists_no_case(str_arr, str))
return NULL;
return str_array_add(str_arr, str);
}