/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2007, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** ************************************************************************ * Date Name Description * 02/28/2001 aliu Creation * 03/01/2001 George port to HP/UX ************************************************************************/ #include "unicode/utypes.h" #if !UCONFIG_NO_TRANSLITERATION #include "jamotest.h" #include "unicode/utypes.h" #include "unicode/translit.h" #include "cpdtrans.h" // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin #define SEP "-" JamoTest::JamoTest() { UParseError parseError; UErrorCode status = U_ZERO_ERROR; NAME_JAMO = Transliterator::createFromRules("Name-Jamo", JAMO_NAMES_RULES, UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { delete NAME_JAMO; NAME_JAMO = NULL; } status = U_ZERO_ERROR; JAMO_NAME = Transliterator::createFromRules("Jamo-Name", JAMO_NAMES_RULES, UTRANS_REVERSE, parseError, status); if (U_FAILURE(status)) { delete JAMO_NAME; JAMO_NAME = NULL; } } JamoTest::~JamoTest() { delete NAME_JAMO; delete JAMO_NAME; } void JamoTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { switch (index) { TESTCASE(0,TestJamo); TESTCASE(1,TestRealText); TESTCASE(2,TestPiecemeal); default: name = ""; break; } } void JamoTest::TestJamo() { UParseError parseError; UErrorCode status = U_ZERO_ERROR; Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); if (latinJamo == 0 || U_FAILURE(status)) { errln("FAIL: createInstance() returned 0"); return; } Transliterator* jamoLatin = latinJamo->createInverse(status); if (jamoLatin == 0) { delete latinJamo; errln("FAIL: createInverse() returned 0"); return; } static const char* CASE[] = { // Column 1 is the latin text L1 to be fed to Latin-Jamo // to yield output J. // Column 2 is expected value of J. J is fed to // Jamo-Latin to yield output L2. // Column 3 is expected value of L2. If the expected // value of L2 is L1, then L2 is NULL. // add tests for the update to fix problems where it didn't follow the standard // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html "gach", "(Gi)(A)(Cf)", NULL, "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL, "choe", "(Ci)(OE)", NULL, "wo", "(IEUNG)(WEO)", NULL, "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil", "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum", "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum", "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae", "gaga", "(Gi)(A)(Gi)(A)", NULL, "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL, "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL, "gakka", "(Gi)(A)(GGi)(A)", NULL, "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL, "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL, "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL, "bab", "(Bi)(A)(Bf)", NULL, "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu", "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba", "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu", "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga", //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga", "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL, "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL, "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf) "gga", "(Gi)(EU)(Gi)(A)", "geuga", "bsa", "(Bi)(EU)(Si)(A)", "beusa", "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu", "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL, "la", "(R)(A)", NULL, "bs", "(Bi)(EU)(Sf)", "beus", "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga", // 'r' in a final position is treated like 'l' "karka", "(Ki)(A)(L)(Ki)(A)", "kalka", }; enum { CASE_length = sizeof(CASE) / sizeof(CASE[0]) }; int32_t i; for (i=0; icreateInverse(status); Transliterator* hangulJamo = jamoHangul->createInverse(status); if (jamoLatin == 0 || hangulJamo == 0) { errln("FAIL: createInverse returned NULL"); delete latinJamo; delete jamoLatin; delete jamoHangul; delete hangulJamo; return; } Transliterator* tarray[4] = { hangulJamo, jamoLatin, latinJamo, jamoHangul }; CompoundTransliterator rt(tarray, 4); UnicodeString buf; int32_t total = 0; int32_t errors = 0; int32_t i; for (i=0; i < WHAT_IS_UNICODE_length; ++i) { ++total; UnicodeString hangul = WHAT_IS_UNICODE[i]; hangul = hangul.unescape(); // Parse backslash-u escapes UnicodeString hangulX = hangul; rt.transliterate(hangulX); if (hangul != hangulX) { ++errors; UnicodeString jamo = hangul; hangulJamo->transliterate(jamo); UnicodeString latin = jamo; jamoLatin->transliterate(latin); UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2); UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2); buf.remove(0); buf.append("FAIL: "); if (hangul2 != hangulX) { buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")"); } // The Hangul-Jamo conversion is not usually the // bug here, so we hide it from display. // Uncomment lines to see the Hangul. buf.append(//hangul + " => " + jamoToName(jamo) + " => " + latin + " => " + jamoToName(jamo2) //+ " => " + hangul2 ); errln(prettify(buf)); } } if (errors != 0) { errln((UnicodeString)"Test word failures: " + errors + " out of " + total); } else { logln((UnicodeString)"All " + total + " test words passed"); } delete latinJamo; delete jamoLatin; delete jamoHangul; delete hangulJamo; } // Override TransliteratorTest void JamoTest::expectAux(const UnicodeString& tag, const UnicodeString& summary, UBool pass, const UnicodeString& expectedResult) { UnicodeString jsum = jamoToName(summary); UnicodeString jexp = jamoToName(expectedResult); TransliteratorTest::expectAux(tag, jsum, pass, jexp); } const char* JamoTest::JAMO_NAMES_RULES = "'(Gi)' <> \\u1100;" "'(GGi)' <> \\u1101;" "'(Ni)' <> \\u1102;" "'(Di)' <> \\u1103;" "'(DD)' <> \\u1104;" "'(R)' <> \\u1105;" "'(Mi)' <> \\u1106;" "'(Bi)' <> \\u1107;" "'(BB)' <> \\u1108;" "'(Si)' <> \\u1109;" "'(SSi)' <> \\u110A;" "'(IEUNG)' <> \\u110B;" "'(Ji)' <> \\u110C;" "'(JJ)' <> \\u110D;" "'(Ci)' <> \\u110E;" "'(Ki)' <> \\u110F;" "'(Ti)' <> \\u1110;" "'(Pi)' <> \\u1111;" "'(Hi)' <> \\u1112;" "'(A)' <> \\u1161;" "'(AE)' <> \\u1162;" "'(YA)' <> \\u1163;" "'(YAE)' <> \\u1164;" "'(EO)' <> \\u1165;" "'(E)' <> \\u1166;" "'(YEO)' <> \\u1167;" "'(YE)' <> \\u1168;" "'(O)' <> \\u1169;" "'(WA)' <> \\u116A;" "'(WAE)' <> \\u116B;" "'(OE)' <> \\u116C;" "'(YO)' <> \\u116D;" "'(U)' <> \\u116E;" "'(WEO)' <> \\u116F;" "'(WE)' <> \\u1170;" "'(WI)' <> \\u1171;" "'(YU)' <> \\u1172;" "'(EU)' <> \\u1173;" "'(YI)' <> \\u1174;" "'(I)' <> \\u1175;" "'(Gf)' <> \\u11A8;" "'(GGf)' <> \\u11A9;" "'(GS)' <> \\u11AA;" "'(Nf)' <> \\u11AB;" "'(NJ)' <> \\u11AC;" "'(NH)' <> \\u11AD;" "'(Df)' <> \\u11AE;" "'(L)' <> \\u11AF;" "'(LG)' <> \\u11B0;" "'(LM)' <> \\u11B1;" "'(LB)' <> \\u11B2;" "'(LS)' <> \\u11B3;" "'(LT)' <> \\u11B4;" "'(LP)' <> \\u11B5;" "'(LH)' <> \\u11B6;" "'(Mf)' <> \\u11B7;" "'(Bf)' <> \\u11B8;" "'(BS)' <> \\u11B9;" "'(Sf)' <> \\u11BA;" "'(SSf)' <> \\u11BB;" "'(NG)' <> \\u11BC;" "'(Jf)' <> \\u11BD;" "'(Cf)' <> \\u11BE;" "'(Kf)' <> \\u11BF;" "'(Tf)' <> \\u11C0;" "'(Pf)' <> \\u11C1;" "'(Hf)' <> \\u11C2;"; /** * Convert short names to actual jamo. E.g., "x(LG)y" returns * "x\u11B0y". See JAMO_NAMES for table of names. */ UnicodeString JamoTest::nameToJamo(const UnicodeString& input) { if (NAME_JAMO == 0) { errln("Failed to create NAME_JAMO"); return input; /* failure! */ } UnicodeString result(input); NAME_JAMO->transliterate(result); return result; } /** * Convert jamo to short names. E.g., "x\u11B0y" returns * "x(LG)y". See JAMO_NAMES for table of names. */ UnicodeString JamoTest::jamoToName(const UnicodeString& input) { if (NAME_JAMO == 0) { errln("Failed to create NAME_JAMO"); return input; /* failure! */ } UnicodeString result(input); JAMO_NAME->transliterate(result); return result; } #endif /* #if !UCONFIG_NO_TRANSLITERATION */