mirror of
https://github.com/reactos/reactos.git
synced 2024-08-06 19:36:50 +00:00
[UTF16LE]
- Implement ability to disable BOM writing svn path=/trunk/; revision=72426
This commit is contained in:
parent
6ba5e47c0e
commit
457d4cff98
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
//#define DISPLAY_DETECTED_UNICODE
|
//#define DISPLAY_DETECTED_UNICODE
|
||||||
|
|
||||||
|
@ -25,14 +26,16 @@ public:
|
||||||
// due to ambiguous BOM
|
// due to ambiguous BOM
|
||||||
enum enc_types { detect, utf8, utf16le, utf16be, utf32le, utf32be };
|
enum enc_types { detect, utf8, utf16le, utf16be, utf32le, utf32be };
|
||||||
enum err_types { none, iopen, oopen, eof, read, write, decode };
|
enum err_types { none, iopen, oopen, eof, read, write, decode };
|
||||||
|
enum bom_types { bom, nobom };
|
||||||
protected:
|
protected:
|
||||||
err_types error;
|
err_types error;
|
||||||
enc_types encoding;
|
enc_types encoding;
|
||||||
|
bom_types bom_type;
|
||||||
unsigned char buffer[4], fill, index; // need 4 char buffer for optional BOM handling
|
unsigned char buffer[4], fill, index; // need 4 char buffer for optional BOM handling
|
||||||
fstream inputfile,outputfile;
|
fstream inputfile,outputfile;
|
||||||
static const unsigned char utf8table[64];
|
static const unsigned char utf8table[64];
|
||||||
public:
|
public:
|
||||||
utf_converter(string ifname, string ofname, enc_types enc = detect) : error(none), encoding(enc), fill(0), index(0)
|
utf_converter(string ifname, string ofname, bom_types ofbom = bom, enc_types enc = detect) : error(none), bom_type(ofbom), encoding(enc), fill(0), index(0)
|
||||||
{
|
{
|
||||||
enc_types tmp_enc;
|
enc_types tmp_enc;
|
||||||
inputfile.open(ifname.c_str(), ios::in | ios::binary);
|
inputfile.open(ifname.c_str(), ios::in | ios::binary);
|
||||||
|
@ -230,10 +233,15 @@ public:
|
||||||
}
|
}
|
||||||
void convert2utf16le()
|
void convert2utf16le()
|
||||||
{
|
{
|
||||||
wchar_t c;
|
unsigned char buffer[2] = { 0xff, 0xfe };
|
||||||
unsigned char buffer[2] = {0xff, 0xfe};
|
|
||||||
outputfile.write(reinterpret_cast<char*>(&buffer),2); // write BOM
|
if (bom_type == bom)
|
||||||
c = get_wchar_t();
|
{
|
||||||
|
outputfile.write(reinterpret_cast<char*>(&buffer), 2); // write BOM
|
||||||
|
}
|
||||||
|
|
||||||
|
wchar_t c = get_wchar_t();
|
||||||
|
|
||||||
while (!inputfile.eof())
|
while (!inputfile.eof())
|
||||||
{
|
{
|
||||||
buffer[0] = c & 0xff;
|
buffer[0] = c & 0xff;
|
||||||
|
@ -262,12 +270,22 @@ const unsigned char utf_converter::utf8table[64] = {
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
utf_converter::err_types err;
|
utf_converter::err_types err;
|
||||||
|
|
||||||
if (argc < 3)
|
if (argc < 3)
|
||||||
{
|
{
|
||||||
cout << "usage: " << argv[0] << " inputfile outputfile" << endl;
|
cout << "usage: " << argv[0] << " inputfile outputfile" << endl;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
utf_converter conv(argv[1],argv[2]);
|
|
||||||
|
utf_converter::bom_types bom_type = utf_converter::bom;
|
||||||
|
|
||||||
|
if (argc == 4 && _stricmp(argv[3], "nobom") == 0)
|
||||||
|
{
|
||||||
|
bom_type = utf_converter::nobom;
|
||||||
|
}
|
||||||
|
|
||||||
|
utf_converter conv(argv[1],argv[2], bom_type);
|
||||||
|
|
||||||
if ((err = conv.getError())!=utf_converter::none)
|
if ((err = conv.getError())!=utf_converter::none)
|
||||||
{
|
{
|
||||||
switch (err)
|
switch (err)
|
||||||
|
@ -282,7 +300,11 @@ int main(int argc, char* argv[])
|
||||||
cerr << "Unknown error." << endl;
|
cerr << "Unknown error." << endl;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
} else
|
}
|
||||||
conv.convert2utf16le();
|
else
|
||||||
|
{
|
||||||
|
conv.convert2utf16le();
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue