- Update RtlIsTextUnicode (fixes some ntdll rtlstr winetests).

svn path=/trunk/; revision=33868
2025-08-05 08:53:02 +00:00 · 2008-06-06 21:33:43 +00:00 · 2008-06-06 21:33:43 +00:00 · d409591250
commit d409591250
parent 036789044b
2 changed files with 80 additions and 53 deletions
--- a/reactos/include/ndk/rtlfuncs.h
+++ b/reactos/include/ndk/rtlfuncs.h
@ -1690,12 +1690,12 @@ RtlInitUnicodeStringEx(
 );
 NTSYSAPI
-ULONG
+BOOLEAN
 NTAPI
 RtlIsTextUnicode(
-    PVOID Buffer,
+    LPCVOID Buffer,
-    ULONG Length,
+    INT Length,
-    ULONG *Flags
+    INT *Flags
 );
 NTSYSAPI
--- a/reactos/lib/rtl/unicode.c
+++ b/reactos/lib/rtl/unicode.c
@ -1055,58 +1055,85 @@ RtlUnicodeStringToOemString(
 * RETURNS
 *  The length of the string if all tests were passed, 0 otherwise.
 */
-ULONG NTAPI
+BOOLEAN
-RtlIsTextUnicode (PVOID Buffer,
+NTAPI
-                  ULONG Length,
+RtlIsTextUnicode( LPCVOID buf, INT len, INT *pf )
                  ULONG *Flags)
 {
-   PWSTR s = Buffer;
+    const WCHAR *s = buf;
-   ULONG in_flags = (ULONG)-1;
+    int i;
-   ULONG out_flags = 0;
+    unsigned int flags = ~0U, out_flags = 0;
-
+    
-   if (Length == 0)
+    if (len < sizeof(WCHAR))
-      goto done;
+    {
-
+        /* FIXME: MSDN documents IS_TEXT_UNICODE_BUFFER_TOO_SMALL but there is no such thing... */
-   if (Flags != 0)
+        if (pf) *pf = 0;
-      in_flags = *Flags;
+        return FALSE;
-
+    }
-   /*
+    if (pf)
-    * Apply various tests to the text string. According to the
+        flags = *pf;
-    * docs, each test "passed" sets the corresponding flag in
+    /*
-    * the output flags. But some of the tests are mutually
+     * Apply various tests to the text string. According to the
-    * exclusive, so I don't see how you could pass all tests ...
+     * docs, each test "passed" sets the corresponding flag in
-    */
+     * the output flags. But some of the tests are mutually
-
+     * exclusive, so I don't see how you could pass all tests ...
-   /* Check for an odd length ... pass if even. */
+     */
-   if (!(Length & 1))
+    
-      out_flags |= IS_TEXT_UNICODE_ODD_LENGTH;
+    /* Check for an odd length ... pass if even. */
-
+    if (len & 1) out_flags |= IS_TEXT_UNICODE_ODD_LENGTH;
-   /* Check for the BOM (byte order mark). */
+    
-   if (*s == 0xFEFF)
+    if (((char *)buf)[len - 1] == 0)
-      out_flags |= IS_TEXT_UNICODE_SIGNATURE;
+        len--;  /* Windows seems to do something like that to avoid e.g. false IS_TEXT_UNICODE_NULL_BYTES  */
-
+    
-#if 0
+    len /= sizeof(WCHAR);
-   /* Check for the reverse BOM (byte order mark). */
+    /* Windows only checks the first 256 characters */
-   if (*s == 0xFFFE)
+    if (len > 256) len = 256;
-      out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
+    
-#endif
+    /* Check for the special byte order unicode marks. */
-
+    if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE;
-   /* FIXME: Add more tests */
+    if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
-
+    
-   /*
+    /* apply some statistical analysis */
-    * Check whether the string passed all of the tests.
+    if (flags & IS_TEXT_UNICODE_STATISTICS)
-    */
+    {
-   in_flags &= ITU_IMPLEMENTED_TESTS;
+        int stats = 0;
-   if ((out_flags & in_flags) != in_flags)
+        /* FIXME: checks only for ASCII characters in the unicode stream */
-      Length = 0;
+        for (i = 0; i < len; i++)
-
+        {
-done:
+            if (s[i] <= 255) stats++;
-   if (Flags != 0)
+        }
-      *Flags = out_flags;
+        if (stats > len / 2)
-
+            out_flags |= IS_TEXT_UNICODE_STATISTICS;
-   return Length;
+    }
    /* Check for unicode NULL chars */
    if (flags & IS_TEXT_UNICODE_NULL_BYTES)
    {
        for (i = 0; i < len; i++)
        {
            if (!(s[i] & 0xff) || !(s[i] >> 8))
            {
                out_flags |= IS_TEXT_UNICODE_NULL_BYTES;
                break;
            }
        }
    }
    if (pf)
    {
        out_flags &= *pf;
        *pf = out_flags;
    }
    /* check for flags that indicate it's definitely not valid Unicode */
    if (out_flags & (IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK)) return FALSE;
    /* now check for invalid ASCII, and assume Unicode if so */
    if (out_flags & IS_TEXT_UNICODE_NOT_ASCII_MASK) return TRUE;
    /* now check for Unicode flags */
    if (out_flags & IS_TEXT_UNICODE_UNICODE_MASK) return TRUE;
    /* no flags set */
    return FALSE;
 }
 /*
 * @implemented
 *