From d4095912507004d45d83482f7a37d36936bd4de3 Mon Sep 17 00:00:00 2001
From: Aleksey Bragin <aleksey@reactos.org>
Date: Fri, 6 Jun 2008 21:33:43 +0000
Subject: [PATCH] - Update RtlIsTextUnicode (fixes some ntdll rtlstr
 winetests).

svn path=/trunk/; revision=33868
---
 reactos/include/ndk/rtlfuncs.h |   8 +--
 reactos/lib/rtl/unicode.c      | 125 ++++++++++++++++++++-------------
 2 files changed, 80 insertions(+), 53 deletions(-)

diff --git a/reactos/include/ndk/rtlfuncs.h b/reactos/include/ndk/rtlfuncs.h
index d1ec45a427c..677d583d5ae 100644
--- a/reactos/include/ndk/rtlfuncs.h
+++ b/reactos/include/ndk/rtlfuncs.h
@@ -1690,12 +1690,12 @@ RtlInitUnicodeStringEx(
 );
 
 NTSYSAPI
-ULONG
+BOOLEAN
 NTAPI
 RtlIsTextUnicode(
-    PVOID Buffer,
-    ULONG Length,
-    ULONG *Flags
+    LPCVOID Buffer,
+    INT Length,
+    INT *Flags
 );
 
 NTSYSAPI
diff --git a/reactos/lib/rtl/unicode.c b/reactos/lib/rtl/unicode.c
index afe29068046..eb2f28debb8 100644
--- a/reactos/lib/rtl/unicode.c
+++ b/reactos/lib/rtl/unicode.c
@@ -1055,58 +1055,85 @@ RtlUnicodeStringToOemString(
  * RETURNS
  *  The length of the string if all tests were passed, 0 otherwise.
  */
-ULONG NTAPI
-RtlIsTextUnicode (PVOID Buffer,
-                  ULONG Length,
-                  ULONG *Flags)
+BOOLEAN
+NTAPI
+RtlIsTextUnicode( LPCVOID buf, INT len, INT *pf )
 {
-   PWSTR s = Buffer;
-   ULONG in_flags = (ULONG)-1;
-   ULONG out_flags = 0;
-
-   if (Length == 0)
-      goto done;
-
-   if (Flags != 0)
-      in_flags = *Flags;
-
-   /*
-    * Apply various tests to the text string. According to the
-    * docs, each test "passed" sets the corresponding flag in
-    * the output flags. But some of the tests are mutually
-    * exclusive, so I don't see how you could pass all tests ...
-    */
-
-   /* Check for an odd length ... pass if even. */
-   if (!(Length & 1))
-      out_flags |= IS_TEXT_UNICODE_ODD_LENGTH;
-
-   /* Check for the BOM (byte order mark). */
-   if (*s == 0xFEFF)
-      out_flags |= IS_TEXT_UNICODE_SIGNATURE;
-
-#if 0
-   /* Check for the reverse BOM (byte order mark). */
-   if (*s == 0xFFFE)
-      out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
-#endif
-
-   /* FIXME: Add more tests */
-
-   /*
-    * Check whether the string passed all of the tests.
-    */
-   in_flags &= ITU_IMPLEMENTED_TESTS;
-   if ((out_flags & in_flags) != in_flags)
-      Length = 0;
-
-done:
-   if (Flags != 0)
-      *Flags = out_flags;
-
-   return Length;
+    const WCHAR *s = buf;
+    int i;
+    unsigned int flags = ~0U, out_flags = 0;
+    
+    if (len < sizeof(WCHAR))
+    {
+        /* FIXME: MSDN documents IS_TEXT_UNICODE_BUFFER_TOO_SMALL but there is no such thing... */
+        if (pf) *pf = 0;
+        return FALSE;
+    }
+    if (pf)
+        flags = *pf;
+    /*
+     * Apply various tests to the text string. According to the
+     * docs, each test "passed" sets the corresponding flag in
+     * the output flags. But some of the tests are mutually
+     * exclusive, so I don't see how you could pass all tests ...
+     */
+    
+    /* Check for an odd length ... pass if even. */
+    if (len & 1) out_flags |= IS_TEXT_UNICODE_ODD_LENGTH;
+    
+    if (((char *)buf)[len - 1] == 0)
+        len--;  /* Windows seems to do something like that to avoid e.g. false IS_TEXT_UNICODE_NULL_BYTES  */
+    
+    len /= sizeof(WCHAR);
+    /* Windows only checks the first 256 characters */
+    if (len > 256) len = 256;
+    
+    /* Check for the special byte order unicode marks. */
+    if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE;
+    if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
+    
+    /* apply some statistical analysis */
+    if (flags & IS_TEXT_UNICODE_STATISTICS)
+    {
+        int stats = 0;
+        /* FIXME: checks only for ASCII characters in the unicode stream */
+        for (i = 0; i < len; i++)
+        {
+            if (s[i] <= 255) stats++;
+        }
+        if (stats > len / 2)
+            out_flags |= IS_TEXT_UNICODE_STATISTICS;
+    }
+    
+    /* Check for unicode NULL chars */
+    if (flags & IS_TEXT_UNICODE_NULL_BYTES)
+    {
+        for (i = 0; i < len; i++)
+        {
+            if (!(s[i] & 0xff) || !(s[i] >> 8))
+            {
+                out_flags |= IS_TEXT_UNICODE_NULL_BYTES;
+                break;
+            }
+        }
+    }
+    
+    if (pf)
+    {
+        out_flags &= *pf;
+        *pf = out_flags;
+    }
+    /* check for flags that indicate it's definitely not valid Unicode */
+    if (out_flags & (IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_NOT_UNICODE_MASK)) return FALSE;
+    /* now check for invalid ASCII, and assume Unicode if so */
+    if (out_flags & IS_TEXT_UNICODE_NOT_ASCII_MASK) return TRUE;
+    /* now check for Unicode flags */
+    if (out_flags & IS_TEXT_UNICODE_UNICODE_MASK) return TRUE;
+    /* no flags set */
+    return FALSE;
 }
 
+
 /*
  * @implemented
  *