[UTF16LE]

- Implement ability to disable BOM writing

svn path=/trunk/; revision=72426
This commit is contained in:
Dmitry Chapyshev 2016-08-22 13:14:41 +00:00
parent 6ba5e47c0e
commit 457d4cff98

View file

@ -13,6 +13,7 @@
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <string.h>
//#define DISPLAY_DETECTED_UNICODE //#define DISPLAY_DETECTED_UNICODE
@ -25,14 +26,16 @@ public:
// due to ambiguous BOM // due to ambiguous BOM
enum enc_types { detect, utf8, utf16le, utf16be, utf32le, utf32be }; enum enc_types { detect, utf8, utf16le, utf16be, utf32le, utf32be };
enum err_types { none, iopen, oopen, eof, read, write, decode }; enum err_types { none, iopen, oopen, eof, read, write, decode };
enum bom_types { bom, nobom };
protected: protected:
err_types error; err_types error;
enc_types encoding; enc_types encoding;
bom_types bom_type;
unsigned char buffer[4], fill, index; // need 4 char buffer for optional BOM handling unsigned char buffer[4], fill, index; // need 4 char buffer for optional BOM handling
fstream inputfile,outputfile; fstream inputfile,outputfile;
static const unsigned char utf8table[64]; static const unsigned char utf8table[64];
public: public:
utf_converter(string ifname, string ofname, enc_types enc = detect) : error(none), encoding(enc), fill(0), index(0) utf_converter(string ifname, string ofname, bom_types ofbom = bom, enc_types enc = detect) : error(none), bom_type(ofbom), encoding(enc), fill(0), index(0)
{ {
enc_types tmp_enc; enc_types tmp_enc;
inputfile.open(ifname.c_str(), ios::in | ios::binary); inputfile.open(ifname.c_str(), ios::in | ios::binary);
@ -230,10 +233,15 @@ public:
} }
void convert2utf16le() void convert2utf16le()
{ {
wchar_t c; unsigned char buffer[2] = { 0xff, 0xfe };
unsigned char buffer[2] = {0xff, 0xfe};
outputfile.write(reinterpret_cast<char*>(&buffer),2); // write BOM if (bom_type == bom)
c = get_wchar_t(); {
outputfile.write(reinterpret_cast<char*>(&buffer), 2); // write BOM
}
wchar_t c = get_wchar_t();
while (!inputfile.eof()) while (!inputfile.eof())
{ {
buffer[0] = c & 0xff; buffer[0] = c & 0xff;
@ -262,12 +270,22 @@ const unsigned char utf_converter::utf8table[64] = {
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
utf_converter::err_types err; utf_converter::err_types err;
if (argc < 3) if (argc < 3)
{ {
cout << "usage: " << argv[0] << " inputfile outputfile" << endl; cout << "usage: " << argv[0] << " inputfile outputfile" << endl;
return -1; return -1;
} }
utf_converter conv(argv[1],argv[2]);
utf_converter::bom_types bom_type = utf_converter::bom;
if (argc == 4 && _stricmp(argv[3], "nobom") == 0)
{
bom_type = utf_converter::nobom;
}
utf_converter conv(argv[1],argv[2], bom_type);
if ((err = conv.getError())!=utf_converter::none) if ((err = conv.getError())!=utf_converter::none)
{ {
switch (err) switch (err)
@ -282,7 +300,11 @@ int main(int argc, char* argv[])
cerr << "Unknown error." << endl; cerr << "Unknown error." << endl;
} }
return -1; return -1;
} else }
conv.convert2utf16le(); else
{
conv.convert2utf16le();
}
return 0; return 0;
} }