reactos/dll/win32/usp10/breaking.c

445 lines
14 KiB
C
Raw Normal View History

/*
* Implementation of line breaking algorithm for the Uniscribe Script Processor
*
* Copyright 2011 CodeWeavers, Aric Stewart
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "windef.h"
#include "winbase.h"
#include "winuser.h"
#include "wingdi.h"
#include "winnls.h"
#include "usp10.h"
#include "winternl.h"
#include "wine/debug.h"
#include "wine/heap.h"
#include "usp10_internal.h"
WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
extern const unsigned short wine_linebreak_table[] DECLSPEC_HIDDEN;
enum breaking_types {
b_BK=1, b_CR, b_LF, b_CM, b_SG, b_GL, b_CB, b_SP, b_ZW, b_NL, b_WJ, b_JL, b_JV, b_JT, b_H2, b_H3, b_XX, b_OP, b_CL,
b_CP, b_QU, b_NS, b_EX, b_SY, b_IS, b_PR, b_PO, b_NU, b_AL, b_ID, b_IN, b_HY, b_BB, b_BA, b_SA, b_AI, b_B2, b_HL,
b_CJ, b_RI, b_EB, b_EM, b_ZWJ
};
enum breaking_class {b_r=1, b_s, b_x};
static void debug_output_breaks(const short* breaks, int count)
{
if (TRACE_ON(uniscribe))
{
int i;
TRACE("[");
for (i = 0; i < count && i < 200; i++)
{
switch (breaks[i])
{
case b_x: TRACE("x"); break;
case b_r: TRACE("!"); break;
case b_s: TRACE("+"); break;
default: TRACE("*");
}
}
if (i == 200)
TRACE("...");
TRACE("]\n");
}
}
static inline void else_break(short* before, short class)
{
if (*before == 0) *before = class;
}
void BREAK_line(const WCHAR *chars, int count, const SCRIPT_ANALYSIS *sa, SCRIPT_LOGATTR *la)
{
int i,j;
short *break_class;
short *break_before;
TRACE("In %s\n",debugstr_wn(chars,count));
[USP10] Sync with Wine Staging 2.9. CORE-13362 30e8768 usp10: Validate substition record sequence indices in GSUB_apply_ContextSubst(). ebe75cb usp10: Simplify the "sr_2" assignments in GSUB_apply_ContextSubst(). 6c13170 usp10: Range check glyph counts in GSUB_apply_ContextSubst(). c5619be usp10: Validate positioning record sequence indices in GPOS_apply_ContextPos(). 82d36ba usp10: Simplify the "pr_2" assignment in GPOS_apply_ContextPos(). 1b02c1a usp10: Range check glyph counts in GPOS_apply_ContextPos(). d1ca880 usp10: Validate substition record sequence indices in GSUB_apply_ChainContextSubst(). 34a9400 usp10: Validate positioning record sequence indices in GPOS_apply_ChainContextPos(). 9694aeb usp10: Avoid special handling if the substitution count is 0 in GSUB_apply_ChainContextSubst(). f7b943e usp10: Range check glyph counts in GSUB_apply_ChainContextSubst(). 24943fe usp10: Simplify the chaining context table assignments in GSUB_apply_ChainContextSubst(). 02316e6 usp10: Rename the chaining context tables in GSUB_apply_ChainContextSubst() to something more descriptive. 0389768 usp10: Return early if the number of positioning operations is 0 in GPOS_apply_ChainContextPos(). 7dfe9b9 usp10: Range check glyph counts in GPOS_apply_ChainContextPos(). f6c0146 usp10: Simplify the chaining context table assignments in GPOS_apply_ChainContextPos(). fb3ea0a usp10: Rename the chaining context tables in GPOS_apply_ChainContextPos() to something more descriptive. 04e5466 usp10: Handle invalid arguments in ScriptIsComplex(). 4553871 usp10: Check if corresponding headers are present before accessing their feature lists in _initialize_feature_cache. 63bcfa3 usp10: Get rid of the code duplication between GSUB_initialize_feature_cache() and GPOS_expand_feature_cache(). 9b0f3f1 usp10: Get rid of some code duplication in GPOS_expand_feature_cache(). 0ffd5b4 usp10: Double the languages array size when growing it in GPOS_expand_language_cache(). 218be44 usp10: Double the scripts array size when growing it in GPOS_expand_script_cache(). 9a15753 usp10: Introduce a helper function to lookup a language in a LoadedScript structure. 2a0ffc9 usp10: Introduce a helper function to lookup a script in the script cache. 213d358 usp10: Avoid LPVOID. 1403087 usp10: Avoid LPCVOID. a204e58 usp10: Avoid LPWSTR. cdd47c3 usp10: Avoid LPCWSTR. af617aa usp10: Avoid LPBYTE. 34c2f74 usp10: Avoid LPWORD. ab8ec60 usp10: Avoid LPPOINT. ba66b62 usp10: Avoid LPOUTLINETEXTMETRICW. df21247 usp10: Make the script cache parameter to get_opentype_script() const. c8f87ad usp10: Make the script cache parameter to GPOS_apply_MarkToBase() const. cd9db34 usp10: Pass const metrics to GPOS_convert_design_units_to_device(). c3dc9ed usp10: Use heap_free() instead of HeapFree(). 6a0e5bb usp10: Use heap_alloc() instead of HeapAlloc(). 77e5a2f usp10: Use heap_alloc_zero() instead of HeapAlloc() with HEAP_ZERO_MEMORY. 3c704c2 usp10: Use bsearch() to lookup the script range in get_char_script(). a523b46 usp10: Introduce an enumeration for script types. 502d239 usp10: Do not fall back to presentation form B if the contextual feature exists in ContextualShape_Arabic(). f13dea5 usp10: Properly determine the glyph run direction when shaping Script_Arabic. c94a5fd usp10: Take the script direction into account when applying OpenType features. 1727dd4 usp10: Use USP10_FindGlyphInLogClust() in UpdateClusters(). bab506e usp10: Use memmove() in GSUB_apply_LigatureSubst(). 66df220 usp10: Make feature tables const. b32fb23 usp10: Fix ScriptGetProperties spec file entry. 1a42ee3 usp10: Add __WINE_ALLOC_SIZE attributes to heap_xxx() functions. 892393b usp10: Filter out DeltaFormat == 0 too, valid range is [1, 3]. 66dce1e usp10: Introduce an enumeration for GSUB lookup types. fefd151 usp10: Ignore device tables with invalid delta format. b6d541c usp10: Iterate in visual order in GPOS_apply_feature(). 6fec9b1 usp10: Return the logical offset in GPOS_apply_lookup(). 8d15667 usp10: Return the logical offset in GPOS_apply_ChainContextPos(). 2210196 usp10: Return the logical offset in GPOS_apply_ContextPos(). 6e84358 usp10: Return the logical offset in GPOS_apply_PairAdjustment(). 61e8a38 usp10: Introduce an enumeration for GPOS lookup types. 4b22953 usp10: Properly get glyph widths for OpenType fonts in ScriptPlaceOpenType(). svn path=/trunk/; revision=74851
2017-06-04 01:46:51 +00:00
break_class = heap_alloc(count * sizeof(*break_class));
break_before = heap_alloc(count * sizeof(*break_before));
for (i = 0; i < count; i++)
{
break_class[i] = get_table_entry( wine_linebreak_table, chars[i] );
break_before[i] = 0;
memset(&la[i],0,sizeof(SCRIPT_LOGATTR));
la[i].fCharStop = TRUE;
switch (break_class[i])
{
case b_BK:
case b_ZW:
case b_SP:
la[i].fWhiteSpace = TRUE;
break;
case b_CM:
la[i].fCharStop = FALSE;
break;
}
}
/* LB1 */
/* TODO: Have outside algorithms for these scripts */
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
case b_AI:
case b_SA:
case b_SG:
case b_XX:
break_class[i] = b_AL;
break;
case b_CJ:
break_class[i] = b_NS;
break;
}
}
/* LB2 - LB3 */
break_before[0] = b_x;
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
/* LB4 - LB6 */
case b_CR:
if (i < count-1 && break_class[i+1] == b_LF)
{
else_break(&break_before[i],b_x);
else_break(&break_before[i+1],b_x);
break;
}
case b_LF:
case b_NL:
case b_BK:
if (i < count-1) else_break(&break_before[i+1],b_r);
else_break(&break_before[i],b_x);
break;
/* LB7 */
case b_SP:
else_break(&break_before[i],b_x);
break;
case b_ZW:
else_break(&break_before[i],b_x);
/* LB8 */
while (i < count-1 && break_class[i+1] == b_SP)
i++;
else_break(&break_before[i],b_s);
break;
}
}
debug_output_breaks(break_before,count);
/* LB9 - LB10 */
for (i = 0; i < count; i++)
{
if (break_class[i] == b_CM)
{
if (i > 0)
{
switch (break_class[i-1])
{
case b_SP:
case b_BK:
case b_CR:
case b_LF:
case b_NL:
case b_ZW:
break_class[i] = b_AL;
break;
default:
break_class[i] = break_class[i-1];
}
}
else break_class[i] = b_AL;
}
}
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
/* LB11 */
case b_WJ:
else_break(&break_before[i],b_x);
if (i < count-1)
else_break(&break_before[i+1],b_x);
break;
/* LB12 */
case b_GL:
if (i < count-1)
else_break(&break_before[i+1],b_x);
/* LB12a */
if (i > 0)
{
if (break_class[i-1] != b_SP &&
break_class[i-1] != b_BA &&
break_class[i-1] != b_HY)
else_break(&break_before[i],b_x);
}
break;
/* LB13 */
case b_CL:
case b_CP:
case b_EX:
case b_IS:
case b_SY:
else_break(&break_before[i],b_x);
break;
/* LB14 */
case b_OP:
while (i < count-1 && break_class[i+1] == b_SP)
{
else_break(&break_before[i+1],b_x);
i++;
}
else_break(&break_before[i+1],b_x);
break;
/* LB15 */
case b_QU:
j = i+1;
while (j < count-1 && break_class[j] == b_SP)
j++;
if (break_class[j] == b_OP)
{
for (; j > i; j--)
else_break(&break_before[j],b_x);
}
break;
/* LB16 */
case b_NS:
j = i-1;
while(j > 0 && break_class[j] == b_SP)
j--;
if (break_class[j] == b_CL || break_class[j] == b_CP)
{
for (j++; j <= i; j++)
else_break(&break_before[j],b_x);
}
break;
/* LB17 */
case b_B2:
j = i+1;
while (j < count && break_class[j] == b_SP)
j++;
if (break_class[j] == b_B2)
{
for (; j > i; j--)
else_break(&break_before[j],b_x);
}
break;
}
}
debug_output_breaks(break_before,count);
for (i = 0; i < count; i++)
{
switch(break_class[i])
{
/* LB18 */
case b_SP:
if (i < count-1)
else_break(&break_before[i+1],b_s);
break;
/* LB19 */
case b_QU:
else_break(&break_before[i],b_x);
if (i < count-1)
else_break(&break_before[i+1],b_x);
break;
/* LB20 */
case b_CB:
else_break(&break_before[i],b_s);
if (i < count-1)
else_break(&break_before[i+1],b_s);
break;
/* LB21 */
case b_BA:
case b_HY:
case b_NS:
else_break(&break_before[i],b_x);
break;
case b_BB:
if (i < count-1)
else_break(&break_before[i+1],b_x);
break;
/* LB21a */
case b_HL:
if (i < count-2)
switch (break_class[i+1])
{
case b_HY:
case b_BA:
else_break(&break_before[i+2], b_x);
}
break;
/* LB22 */
case b_IN:
if (i > 0)
{
switch (break_class[i-1])
{
case b_AL:
case b_HL:
case b_ID:
case b_IN:
case b_NU:
else_break(&break_before[i], b_x);
}
}
break;
}
if (i < count-1)
{
/* LB23 */
if ((break_class[i] == b_ID && break_class[i+1] == b_PO) ||
(break_class[i] == b_AL && break_class[i+1] == b_NU) ||
(break_class[i] == b_HL && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_AL) ||
(break_class[i] == b_NU && break_class[i+1] == b_HL))
else_break(&break_before[i+1],b_x);
/* LB24 */
if ((break_class[i] == b_PR && break_class[i+1] == b_ID) ||
(break_class[i] == b_PR && break_class[i+1] == b_AL) ||
(break_class[i] == b_PR && break_class[i+1] == b_HL) ||
(break_class[i] == b_PO && break_class[i+1] == b_AL) ||
(break_class[i] == b_PO && break_class[i+1] == b_HL))
else_break(&break_before[i+1],b_x);
/* LB25 */
if ((break_class[i] == b_CL && break_class[i+1] == b_PO) ||
(break_class[i] == b_CP && break_class[i+1] == b_PO) ||
(break_class[i] == b_CL && break_class[i+1] == b_PR) ||
(break_class[i] == b_CP && break_class[i+1] == b_PR) ||
(break_class[i] == b_NU && break_class[i+1] == b_PO) ||
(break_class[i] == b_NU && break_class[i+1] == b_PR) ||
(break_class[i] == b_PO && break_class[i+1] == b_OP) ||
(break_class[i] == b_PO && break_class[i+1] == b_NU) ||
(break_class[i] == b_PR && break_class[i+1] == b_OP) ||
(break_class[i] == b_PR && break_class[i+1] == b_NU) ||
(break_class[i] == b_HY && break_class[i+1] == b_NU) ||
(break_class[i] == b_IS && break_class[i+1] == b_NU) ||
(break_class[i] == b_NU && break_class[i+1] == b_NU) ||
(break_class[i] == b_SY && break_class[i+1] == b_NU))
else_break(&break_before[i+1],b_x);
/* LB26 */
if (break_class[i] == b_JL)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_H2:
case b_H3:
else_break(&break_before[i+1],b_x);
}
}
if ((break_class[i] == b_JV || break_class[i] == b_H2) &&
(break_class[i+1] == b_JV || break_class[i+1] == b_JT))
else_break(&break_before[i+1],b_x);
if ((break_class[i] == b_JT || break_class[i] == b_H3) &&
break_class[i+1] == b_JT)
else_break(&break_before[i+1],b_x);
/* LB27 */
switch (break_class[i])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
if (break_class[i+1] == b_IN || break_class[i+1] == b_PO)
else_break(&break_before[i+1],b_x);
}
if (break_class[i] == b_PR)
{
switch (break_class[i+1])
{
case b_JL:
case b_JV:
case b_JT:
case b_H2:
case b_H3:
else_break(&break_before[i+1],b_x);
}
}
/* LB28 */
if ((break_class[i] == b_AL && break_class[i+1] == b_AL) ||
(break_class[i] == b_AL && break_class[i+1] == b_HL) ||
(break_class[i] == b_HL && break_class[i+1] == b_AL) ||
(break_class[i] == b_HL && break_class[i+1] == b_HL))
else_break(&break_before[i+1],b_x);
/* LB29 */
if ((break_class[i] == b_IS && break_class[i+1] == b_AL) ||
(break_class[i] == b_IS && break_class[i+1] == b_HL))
else_break(&break_before[i+1],b_x);
/* LB30 */
if ((break_class[i] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU) &&
break_class[i+1] == b_OP)
else_break(&break_before[i+1],b_x);
if (break_class[i] == b_CP &&
(break_class[i+1] == b_AL || break_class[i+1] == b_HL || break_class[i+1] == b_NU))
else_break(&break_before[i+1],b_x);
/* LB30a */
if (break_class[i] == b_RI && break_class[i+1] == b_RI)
else_break(&break_before[i+1],b_x);
}
}
debug_output_breaks(break_before,count);
/* LB31 */
for (i = 0; i < count-1; i++)
else_break(&break_before[i+1],b_s);
debug_output_breaks(break_before,count);
for (i = 0; i < count; i++)
{
if (break_before[i] != b_x)
{
la[i].fSoftBreak = TRUE;
la[i].fWordStop = TRUE;
}
}
[USP10] Sync with Wine Staging 2.9. CORE-13362 30e8768 usp10: Validate substition record sequence indices in GSUB_apply_ContextSubst(). ebe75cb usp10: Simplify the "sr_2" assignments in GSUB_apply_ContextSubst(). 6c13170 usp10: Range check glyph counts in GSUB_apply_ContextSubst(). c5619be usp10: Validate positioning record sequence indices in GPOS_apply_ContextPos(). 82d36ba usp10: Simplify the "pr_2" assignment in GPOS_apply_ContextPos(). 1b02c1a usp10: Range check glyph counts in GPOS_apply_ContextPos(). d1ca880 usp10: Validate substition record sequence indices in GSUB_apply_ChainContextSubst(). 34a9400 usp10: Validate positioning record sequence indices in GPOS_apply_ChainContextPos(). 9694aeb usp10: Avoid special handling if the substitution count is 0 in GSUB_apply_ChainContextSubst(). f7b943e usp10: Range check glyph counts in GSUB_apply_ChainContextSubst(). 24943fe usp10: Simplify the chaining context table assignments in GSUB_apply_ChainContextSubst(). 02316e6 usp10: Rename the chaining context tables in GSUB_apply_ChainContextSubst() to something more descriptive. 0389768 usp10: Return early if the number of positioning operations is 0 in GPOS_apply_ChainContextPos(). 7dfe9b9 usp10: Range check glyph counts in GPOS_apply_ChainContextPos(). f6c0146 usp10: Simplify the chaining context table assignments in GPOS_apply_ChainContextPos(). fb3ea0a usp10: Rename the chaining context tables in GPOS_apply_ChainContextPos() to something more descriptive. 04e5466 usp10: Handle invalid arguments in ScriptIsComplex(). 4553871 usp10: Check if corresponding headers are present before accessing their feature lists in _initialize_feature_cache. 63bcfa3 usp10: Get rid of the code duplication between GSUB_initialize_feature_cache() and GPOS_expand_feature_cache(). 9b0f3f1 usp10: Get rid of some code duplication in GPOS_expand_feature_cache(). 0ffd5b4 usp10: Double the languages array size when growing it in GPOS_expand_language_cache(). 218be44 usp10: Double the scripts array size when growing it in GPOS_expand_script_cache(). 9a15753 usp10: Introduce a helper function to lookup a language in a LoadedScript structure. 2a0ffc9 usp10: Introduce a helper function to lookup a script in the script cache. 213d358 usp10: Avoid LPVOID. 1403087 usp10: Avoid LPCVOID. a204e58 usp10: Avoid LPWSTR. cdd47c3 usp10: Avoid LPCWSTR. af617aa usp10: Avoid LPBYTE. 34c2f74 usp10: Avoid LPWORD. ab8ec60 usp10: Avoid LPPOINT. ba66b62 usp10: Avoid LPOUTLINETEXTMETRICW. df21247 usp10: Make the script cache parameter to get_opentype_script() const. c8f87ad usp10: Make the script cache parameter to GPOS_apply_MarkToBase() const. cd9db34 usp10: Pass const metrics to GPOS_convert_design_units_to_device(). c3dc9ed usp10: Use heap_free() instead of HeapFree(). 6a0e5bb usp10: Use heap_alloc() instead of HeapAlloc(). 77e5a2f usp10: Use heap_alloc_zero() instead of HeapAlloc() with HEAP_ZERO_MEMORY. 3c704c2 usp10: Use bsearch() to lookup the script range in get_char_script(). a523b46 usp10: Introduce an enumeration for script types. 502d239 usp10: Do not fall back to presentation form B if the contextual feature exists in ContextualShape_Arabic(). f13dea5 usp10: Properly determine the glyph run direction when shaping Script_Arabic. c94a5fd usp10: Take the script direction into account when applying OpenType features. 1727dd4 usp10: Use USP10_FindGlyphInLogClust() in UpdateClusters(). bab506e usp10: Use memmove() in GSUB_apply_LigatureSubst(). 66df220 usp10: Make feature tables const. b32fb23 usp10: Fix ScriptGetProperties spec file entry. 1a42ee3 usp10: Add __WINE_ALLOC_SIZE attributes to heap_xxx() functions. 892393b usp10: Filter out DeltaFormat == 0 too, valid range is [1, 3]. 66dce1e usp10: Introduce an enumeration for GSUB lookup types. fefd151 usp10: Ignore device tables with invalid delta format. b6d541c usp10: Iterate in visual order in GPOS_apply_feature(). 6fec9b1 usp10: Return the logical offset in GPOS_apply_lookup(). 8d15667 usp10: Return the logical offset in GPOS_apply_ChainContextPos(). 2210196 usp10: Return the logical offset in GPOS_apply_ContextPos(). 6e84358 usp10: Return the logical offset in GPOS_apply_PairAdjustment(). 61e8a38 usp10: Introduce an enumeration for GPOS lookup types. 4b22953 usp10: Properly get glyph widths for OpenType fonts in ScriptPlaceOpenType(). svn path=/trunk/; revision=74851
2017-06-04 01:46:51 +00:00
heap_free(break_before);
heap_free(break_class);
}