/* ********************************************************************** * Copyright (C) 2002-2006, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 10/11/02 aliu Creation. ********************************************************************** */ #include "unicode/utypes.h" #include "unicode/putil.h" #include "unicode/uclean.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" #include "uarrsort.h" #include "unewdata.h" #include "uoptions.h" #include "uprops.h" #include "propname.h" #include "uassert.h" #include U_NAMESPACE_USE // TODO: Clean up and comment this code. //---------------------------------------------------------------------- // BEGIN DATA // // This is the raw data to be output. We define the data structure, // then include a machine-generated header that contains the actual // data. #include "unicode/uchar.h" #include "unicode/uscript.h" #include "unicode/unorm.h" class AliasName { public: const char* str; int32_t index; AliasName(const char* str, int32_t index); int compare(const AliasName& other) const; UBool operator==(const AliasName& other) const { return compare(other) == 0; } UBool operator!=(const AliasName& other) const { return compare(other) != 0; } }; AliasName::AliasName(const char* _str, int32_t _index) : str(_str), index(_index) { } int AliasName::compare(const AliasName& other) const { return uprv_comparePropertyNames(str, other.str); } class Alias { public: int32_t enumValue; int32_t nameGroupIndex; Alias(int32_t enumValue, int32_t nameGroupIndex); int32_t getUniqueNames(int32_t* nameGroupIndices) const; }; Alias::Alias(int32_t anEnumValue, int32_t aNameGroupIndex) : enumValue(anEnumValue), nameGroupIndex(aNameGroupIndex) { } class Property : public Alias { public: int32_t valueCount; const Alias* valueList; Property(int32_t enumValue, int32_t nameGroupIndex, int32_t valueCount, const Alias* valueList); }; Property::Property(int32_t _enumValue, int32_t _nameGroupIndex, int32_t _valueCount, const Alias* _valueList) : Alias(_enumValue, _nameGroupIndex), valueCount(_valueCount), valueList(_valueList) { } // *** Include the data header *** #include "data.h" /* return a list of unique names, not including "", for this property * @param stringIndices array of at least MAX_NAMES_PER_GROUP * elements, will be filled with indices into STRING_TABLE * @return number of indices, >= 1 */ int32_t Alias::getUniqueNames(int32_t* stringIndices) const { int32_t count = 0; int32_t i = nameGroupIndex; UBool done = FALSE; while (!done) { int32_t j = NAME_GROUP[i++]; if (j < 0) { done = TRUE; j = -j; } if (j == 0) continue; // omit "" entries UBool dupe = FALSE; for (int32_t k=0; knameIndex]. compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]); } //---------------------------------------------------------------------- /** * An element in an enum index. It maps an enum into a name group entry * (given by index). */ class EnumToNameGroupEntry { public: int32_t enumValue; int32_t nameGroupIndex; EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; } // are enumValues contiguous for count entries starting with this one? // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** UBool isContiguous(int32_t count) const { const EnumToNameGroupEntry* p = this; for (int32_t i=1; ienumValue + i)) { return FALSE; } } return TRUE; } }; // Sort function for EnumToNameGroupEntry (sort by name index) U_CFUNC int32_t compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) { return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue; } //---------------------------------------------------------------------- /** * An element in the map from enumerated property enums to value maps. */ class EnumToValueEntry { public: int32_t enumValue; EnumToNameGroupEntry* enumToName; int32_t enumToName_count; NameToEnumEntry* nameToEnum; int32_t nameToEnum_count; // are enumValues contiguous for count entries starting with this one? // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** UBool isContiguous(int32_t count) const { const EnumToValueEntry* p = this; for (int32_t i=1; ienumValue + i)) { return FALSE; } } return TRUE; } }; // Sort function for EnumToValueEntry (sort by enum) U_CFUNC int32_t compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) { return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue; } //---------------------------------------------------------------------- // BEGIN Builder #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET)) class Builder { // header: PropertyAliases header; // 0: NonContiguousEnumToOffset* enumToName; int32_t enumToName_size; Offset enumToName_offset; // 1: (deleted) // 2: NameToEnum* nameToEnum; int32_t nameToEnum_size; Offset nameToEnum_offset; // 3: NonContiguousEnumToOffset* enumToValue; int32_t enumToValue_size; Offset enumToValue_offset; // 4: ValueMap* valueMap; int32_t valueMap_size; int32_t valueMap_count; Offset valueMap_offset; // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is // NULL and one is not. valueEnumToName_size[i] is the size of // the non-NULL one. i=0..valueMapCount-1 // 5a: EnumToOffset** valueEnumToName; // 5b: NonContiguousEnumToOffset** valueNCEnumToName; int32_t* valueEnumToName_size; Offset* valueEnumToName_offset; // 6: // arrays of valueMap_count pointers, sizes, & offsets NameToEnum** valueNameToEnum; int32_t* valueNameToEnum_size; Offset* valueNameToEnum_offset; // 98: Offset* nameGroupPool; int32_t nameGroupPool_count; int32_t nameGroupPool_size; Offset nameGroupPool_offset; // 99: char* stringPool; int32_t stringPool_count; int32_t stringPool_size; Offset stringPool_offset; Offset* stringPool_offsetArray; // relative to stringPool int32_t total_size; // size of everything int32_t debug; public: Builder(int32_t debugLevel); ~Builder(); void buildTopLevelProperties(const NameToEnumEntry* propName, int32_t propNameCount, const EnumToNameGroupEntry* propEnum, int32_t propEnumCount); void buildValues(const EnumToValueEntry* e2v, int32_t count); void buildStringPool(const AliasName* propertyNames, int32_t propertyNameCount, const int32_t* nameGroupIndices, int32_t nameGroupIndicesCount); void fixup(); int8_t* createData(int32_t& length) const; private: static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size); static NonContiguousEnumToOffset* buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size); static NonContiguousEnumToOffset* buildNCEnumToValue(const EnumToValueEntry* e2v, int32_t count, int32_t& size); static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum, int32_t count, int32_t& size); Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const; void fixupNameToEnum(NameToEnum* n); void fixupEnumToNameGroup(EnumToOffset* e2ng); void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng); void computeOffsets(); void fixupStringPoolOffsets(); void fixupNameGroupPoolOffsets(); void fixupMiscellaneousOffsets(); static int32_t align(int32_t a); static void erase(void* p, int32_t size); }; Builder::Builder(int32_t debugLevel) { debug = debugLevel; enumToName = 0; nameToEnum = 0; enumToValue = 0; valueMap_count = 0; valueMap = 0; valueEnumToName = 0; valueNCEnumToName = 0; valueEnumToName_size = 0; valueEnumToName_offset = 0; valueNameToEnum = 0; valueNameToEnum_size = 0; valueNameToEnum_offset = 0; nameGroupPool = 0; stringPool = 0; stringPool_offsetArray = 0; } Builder::~Builder() { uprv_free(enumToName); uprv_free(nameToEnum); uprv_free(enumToValue); uprv_free(valueMap); for (int32_t i=0; i= 0); int32_t k = a % sizeof(int32_t); if (k == 0) { return a; } a += sizeof(int32_t) - k; return a; } void Builder::erase(void* p, int32_t size) { U_ASSERT(size >= 0); int8_t* q = (int8_t*) p; while (size--) { *q++ = 0; } } EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng, int32_t count, int32_t& size) { U_ASSERT(e2ng->isContiguous(count)); size = align(EnumToOffset::getSize(count)); EnumToOffset* result = (EnumToOffset*) uprv_malloc(size); erase(result, size); result->enumStart = e2ng->enumValue; result->enumLimit = e2ng->enumValue + count; Offset* p = result->getOffsetArray(); for (int32_t i=0; iisContiguous(count)); size = align(NonContiguousEnumToOffset::getSize(count)); NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size); erase(nc, size); nc->count = count; EnumValue* e = nc->getEnumArray(); Offset* p = nc->getOffsetArray(); for (int32_t i=0; iisContiguous(count)); size = align(NonContiguousEnumToOffset::getSize(count)); NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size); erase(result, size); result->count = count; EnumValue* e = result->getEnumArray(); for (int32_t i=0; i= stringPool_count) { die("String pool index too large"); } Offset result = stringPool_offset + stringPool_offsetArray[index]; U_ASSERT(result >= 0 && result < total_size); return result; } return 0; // never executed; make compiler happy } NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum, int32_t count, int32_t& size) { size = align(NameToEnum::getSize(count)); NameToEnum* n2e = (NameToEnum*) uprv_malloc(size); erase(n2e, size); n2e->count = count; Offset* p = n2e->getNameArray(); EnumValue* e = n2e->getEnumArray(); for (int32_t i=0; iisContiguous(count)); valueMap_count = count; enumToValue = buildNCEnumToValue(e2v, count, enumToValue_size); valueMap_size = align(count * sizeof(ValueMap)); valueMap = (ValueMap*) uprv_malloc(valueMap_size); erase(valueMap, valueMap_size); valueEnumToName = MALLOC(EnumToOffset*, count); valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count); valueEnumToName_size = MALLOC(int32_t, count); valueEnumToName_offset = MALLOC(Offset, count); valueNameToEnum = MALLOC(NameToEnum*, count); valueNameToEnum_size = MALLOC(int32_t, count); valueNameToEnum_offset = MALLOC(Offset, count); for (i=0; iisContiguous(e2v[i].enumToName_count); valueEnumToName[i] = 0; valueNCEnumToName[i] = 0; if (isContiguous) { valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName, e2v[i].enumToName_count, valueEnumToName_size[i]); } else { valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName, e2v[i].enumToName_count, valueEnumToName_size[i]); } valueNameToEnum[i] = buildNameToEnum(e2v[i].nameToEnum, e2v[i].nameToEnum_count, valueNameToEnum_size[i]); } } void Builder::buildStringPool(const AliasName* propertyNames, int32_t propertyNameCount, const int32_t* nameGroupIndices, int32_t nameGroupIndicesCount) { int32_t i; nameGroupPool_count = nameGroupIndicesCount; nameGroupPool_size = sizeof(Offset) * nameGroupPool_count; nameGroupPool = MALLOC(Offset, nameGroupPool_count); for (i=0; i0) { printf("header \t offset=%4d size=%5d\n", 0, off); } // PropertyAliases must have no v-table and must be // padded (if necessary) to the next 32-bit boundary. //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above U_ASSERT(sizeof(header) % sizeof(int32_t) == 0); #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t) #define COMPUTE_OFFSET2(foo,type) \ if (debug>0)\ printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\ foo##_offset = off;\ U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\ U_ASSERT(foo##_offset % sizeof(type) == 0);\ off = (Offset) (off + foo##_size); COMPUTE_OFFSET(enumToName); // 0: COMPUTE_OFFSET(nameToEnum); // 2: COMPUTE_OFFSET(enumToValue); // 3: COMPUTE_OFFSET(valueMap); // 4: for (i=0; i0) { printf(" enumToName[%d]\t offset=%4d size=%5d\n", (int)i, off, (int)valueEnumToName_size[i]); } valueEnumToName_offset[i] = off; // 5: U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i])); off = (Offset) (off + valueEnumToName_size[i]); if (debug>0) { printf(" nameToEnum[%d]\t offset=%4d size=%5d\n", (int)i, off, (int)valueNameToEnum_size[i]); } valueNameToEnum_offset[i] = off; // 6: U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i])); off = (Offset) (off + valueNameToEnum_size[i]); } // These last two chunks have weaker alignment needs COMPUTE_OFFSET2(nameGroupPool,Offset); // 98: COMPUTE_OFFSET2(stringPool,char); // 99: total_size = off; if (debug>0) printf("total size=%5d\n\n", (int)total_size); U_ASSERT(total_size <= (MAX_OFFSET+1)); } void Builder::fixupNameToEnum(NameToEnum* n) { // Fix the string pool offsets in n Offset* p = n->getNameArray(); for (int32_t i=0; icount; ++i) { p[i] = stringIndexToOffset(p[i]); } } void Builder::fixupStringPoolOffsets() { int32_t i; // 2: fixupNameToEnum(nameToEnum); // 6: for (i=0; igetOffsetArray(); for (i=e2ng->enumStart, j=0; ienumLimit; ++i, ++j) { p[j] = nameGroupPool_offset + sizeof(Offset) * p[j]; } } void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) { int32_t i; /*EnumValue* e = e2ng->getEnumArray();*/ Offset* p = e2ng->getOffsetArray(); for (i=0; icount; ++i) { p[i] = nameGroupPool_offset + sizeof(Offset) * p[i]; } } void Builder::fixupNameGroupPoolOffsets() { int32_t i; // 0: fixupNCEnumToNameGroup(enumToName); // 1: (deleted) // 5: for (i=0; i 0 && total_size < 0x7FFF); header.total_size = (int16_t) total_size; header.valueMap_offset = valueMap_offset; header.valueMap_count = (int16_t) valueMap_count; header.nameGroupPool_offset = nameGroupPool_offset; header.nameGroupPool_count = (int16_t) nameGroupPool_count; header.stringPool_offset = stringPool_offset; header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry U_ASSERT(valueMap_count <= 0x7FFF); U_ASSERT(nameGroupPool_count <= 0x7FFF); U_ASSERT(stringPool_count <= 0x7FFF); // 3: Offset* p = enumToValue->getOffsetArray(); /*EnumValue* e = enumToValue->getEnumArray();*/ U_ASSERT(valueMap_count == enumToValue->count); for (i=0; i enum map // This is an n->1 map. There are typically multiple names // mapping to one enum. The name index is sorted in order of the name, // as defined by the uprv_compareAliasNames() function. int32_t i, j; int32_t count = list.count(); // compute upper limit on number of names in the index int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP; NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity); nameIndexCount = 0; int32_t names[MAX_NAMES_PER_GROUP]; for (i=0; i1) { printf("Alias names: %d\n", (int)nameIndexCount); for (i=0; i %d\n", STRING_TABLE[nameIndex[i].nameIndex].str, (int)nameIndex[i].enumValue); } printf("\n"); } // make sure there are no duplicates. for a sorted list we need // only compare adjacent items. Alias.getUniqueNames() has // already eliminated duplicate names for a single property, which // does occur, so we're checking for duplicate names between two // properties, which should never occur. UBool ok = TRUE; for (i=1; i name map // This is a 1->n map. Each enum maps to 1 or more names. To // accomplish this the index entry points to an element of the // NAME_GROUP array. This is the short name (which may be empty). // From there, subsequent elements of NAME_GROUP are alternate // names for this enum, up to and including the first one that is // negative (negate for actual index). int32_t i, j, k; int32_t count = list.count(); EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count); for (i=0; i1) { printf("Property enums: %d\n", (int)count); for (i=0; i %d: ", (int)enumIndex[i].enumValue, (int)enumIndex[i].nameGroupIndex); UBool done = FALSE; for (j=enumIndex[i].nameGroupIndex; !done; ++j) { k = NAME_GROUP[j]; if (k < 0) { k = -k; done = TRUE; } printf("\"%s\"", STRING_TABLE[k].str); if (!done) printf(", "); } printf("\n"); } printf("\n"); } return enumIndex; } int genpname::MMain(int argc, char* argv[]) { int32_t i, j; UErrorCode status = U_ZERO_ERROR; u_init(&status); if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status)); status = U_ZERO_ERROR; } /* preset then read command line options */ options[3].value=u_getDataDirectory(); argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); /* error handling, printing usage message */ if (argc<0) { fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); } debug = options[5].doesOccur ? (*options[5].value - '0') : 0; if (argc!=1 || options[0].doesOccur || options[1].doesOccur || debug < 0 || debug > 9) { fprintf(stderr, "usage: %s [-options]\n" "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n" "options:\n" "\t-h or -? or --help this usage text\n" "\t-v or --verbose turn on verbose output\n" "\t-c or --copyright include a copyright notice\n" "\t-d or --destdir destination directory, followed by the path\n" "\t-D or --debug 0..9 emit debugging messages (if > 0)\n", argv[0]); return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; } /* get the options values */ useCopyright=options[2].doesOccur; verbose = options[4].doesOccur; // ------------------------------------------------------------ // Do not sort the string table, instead keep it in data.h order. // This simplifies data swapping and testing thereof because the string // table itself need not be sorted during swapping. // The NameToEnum sorter sorts each such map's string offsets instead. if (debug>1) { printf("String pool: %d\n", (int)STRING_COUNT); for (i=0; i