Return libdxfrw internal text codec converter.
This commit is contained in:
parent
c2fea54864
commit
5f091344fa
7812
src/libs/vdxf/libdxfrw/intern/drw_cptable932.h
Normal file
7812
src/libs/vdxf/libdxfrw/intern/drw_cptable932.h
Normal file
File diff suppressed because it is too large
Load Diff
21943
src/libs/vdxf/libdxfrw/intern/drw_cptable936.h
Normal file
21943
src/libs/vdxf/libdxfrw/intern/drw_cptable936.h
Normal file
File diff suppressed because it is too large
Load Diff
17199
src/libs/vdxf/libdxfrw/intern/drw_cptable949.h
Normal file
17199
src/libs/vdxf/libdxfrw/intern/drw_cptable949.h
Normal file
File diff suppressed because it is too large
Load Diff
13654
src/libs/vdxf/libdxfrw/intern/drw_cptable950.h
Normal file
13654
src/libs/vdxf/libdxfrw/intern/drw_cptable950.h
Normal file
File diff suppressed because it is too large
Load Diff
1330
src/libs/vdxf/libdxfrw/intern/drw_cptables.h
Normal file
1330
src/libs/vdxf/libdxfrw/intern/drw_cptables.h
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -1,25 +1,26 @@
|
|||
#include "drw_textcodec.h"
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include "../drw_base.h"
|
||||
#include "drw_cptable932.h"
|
||||
#include "drw_cptable936.h"
|
||||
#include "drw_cptable949.h"
|
||||
#include "drw_cptable950.h"
|
||||
#include "drw_cptables.h"
|
||||
#include <QDebug>
|
||||
#include <QString>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <QString>
|
||||
#include <QDebug>
|
||||
#include "../drw_base.h"
|
||||
#include "../vmisc/vabstractvalapplication.h"
|
||||
#include "../ifc/exception/vexception.h"
|
||||
|
||||
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
|
||||
#include "../vmisc/vtextcodec.h"
|
||||
#else
|
||||
#include <QTextCodec>
|
||||
#endif
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
DRW_TextCodec::DRW_TextCodec()
|
||||
: version(DRW::AC1021)
|
||||
{}
|
||||
: version(DRW::AC1021),
|
||||
conv(new DRW_Converter(nullptr, 0))
|
||||
{
|
||||
}
|
||||
|
||||
void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
|
||||
void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat)
|
||||
{
|
||||
switch (v)
|
||||
{
|
||||
case DRW::UNKNOWNV:
|
||||
|
@ -47,10 +48,10 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
|
|||
case DRW::AC1018:
|
||||
{
|
||||
version = DRW::AC1015;
|
||||
// if (cp.empty()) { //codepage not set, initialize
|
||||
cp = "ANSI_1252";
|
||||
setCodePage(cp, dxfFormat);
|
||||
// }
|
||||
// if (cp.empty()) { //codepage not set, initialize
|
||||
cp = "ANSI_1252";
|
||||
setCodePage(cp, dxfFormat);
|
||||
// }
|
||||
break;
|
||||
}
|
||||
case DRW::AC1021:
|
||||
|
@ -61,11 +62,11 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
|
|||
version = DRW::AC1021;
|
||||
if (dxfFormat)
|
||||
{
|
||||
cp = "UTF-8";//RLZ: can be UCS2 or UTF-16 16bits per char
|
||||
cp = "UTF-8"; // RLZ: can be UCS2 or UTF-16 16bits per char
|
||||
}
|
||||
else
|
||||
{
|
||||
cp = "UTF-16";//RLZ: can be UCS2 or UTF-16 16bits per char
|
||||
cp = "UTF-16"; // RLZ: can be UCS2 or UTF-16 16bits per char
|
||||
}
|
||||
setCodePage(cp, dxfFormat);
|
||||
break;
|
||||
|
@ -75,134 +76,609 @@ void DRW_TextCodec::setVersion(DRW::Version v, bool dxfFormat){
|
|||
}
|
||||
}
|
||||
|
||||
void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat){
|
||||
void DRW_TextCodec::setVersion(const std::string &v, bool dxfFormat)
|
||||
{
|
||||
version = DRW::UNKNOWNV;
|
||||
for (auto dwgVersionString : DRW::dwgVersionStrings)
|
||||
{
|
||||
if (std::strcmp( v.c_str(), dwgVersionString.first ) == 0)
|
||||
if (std::strcmp(v.c_str(), dwgVersionString.first) == 0)
|
||||
{
|
||||
version = dwgVersionString.second;
|
||||
setVersion( dwgVersionString.second, dxfFormat);
|
||||
setVersion(dwgVersionString.second, dxfFormat);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat){
|
||||
void DRW_TextCodec::setCodePage(const std::string &c, bool dxfFormat)
|
||||
{
|
||||
cp = correctCodePage(c);
|
||||
if (version < DRW::AC1021)
|
||||
conv.reset();
|
||||
if (version == DRW::AC1009 || version == DRW::AC1015)
|
||||
{
|
||||
if (cp == "UTF-8")
|
||||
if (cp == "ANSI_874")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table874, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_932")
|
||||
{
|
||||
conv = std::make_unique<DRW_Conv932Table>();
|
||||
}
|
||||
else if (cp == "ANSI_936")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table936, DRW_LeadTable936, DRW_DoubleTable936, CPLENGTH936);
|
||||
}
|
||||
else if (cp == "ANSI_949")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table949, DRW_LeadTable949, DRW_DoubleTable949, CPLENGTH949);
|
||||
}
|
||||
else if (cp == "ANSI_950")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvDBCSTable>(DRW_Table950, DRW_LeadTable950, DRW_DoubleTable950, CPLENGTH950);
|
||||
}
|
||||
else if (cp == "ANSI_1250")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1250, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1251")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1251, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1253")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1253, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1254")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1254, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1255")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1255, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1256")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1256, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1257")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1257, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "ANSI_1258")
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1258, CPLENGTHCOMMON);
|
||||
}
|
||||
else if (cp == "UTF-8")
|
||||
{ // DXF older than 2007 are write in win codepages
|
||||
cp = "ANSI_1252";
|
||||
conv = std::make_unique<DRW_Converter>(nullptr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvTable>(DRW_Table1252, CPLENGTHCOMMON);
|
||||
}
|
||||
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp));
|
||||
}
|
||||
else
|
||||
{
|
||||
conv = DRW_TextCodec::CodecForName(dxfFormat ? QStringLiteral("UTF-8") : QStringLiteral("UTF-16"));
|
||||
}
|
||||
|
||||
if (conv == nullptr)
|
||||
{
|
||||
const QString errorMsg = QCoreApplication::translate("DRW_TextCodec", "No available codec for code page '%1'.")
|
||||
.arg(cp.c_str());
|
||||
VAbstractApplication::VApp()->IsPedantic()
|
||||
? throw VException(errorMsg) : qWarning() << VAbstractValApplication::warningMessageSignature + errorMsg;
|
||||
|
||||
if (version < DRW::AC1021 && cp == "UTF-8")
|
||||
if (dxfFormat)
|
||||
{
|
||||
cp = "ANSI_1252";
|
||||
conv = DRW_TextCodec::CodecForName(QString::fromStdString(cp)); // Fallback to latin
|
||||
conv = std::make_unique<DRW_Converter>(nullptr, 0); // utf16 to utf8
|
||||
}
|
||||
else
|
||||
{
|
||||
conv = std::make_unique<DRW_ConvUTF16>(); // utf16 to utf8
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto DRW_TextCodec::DXFCodePageMap() -> QMap<QString, QStringList>
|
||||
{
|
||||
static auto map = QMap<QString, QStringList>
|
||||
{
|
||||
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Latin/Thai
|
||||
{"ANSI_932", {"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932",
|
||||
"X-SJIS", "EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "CP932",
|
||||
"JIS7"}}, // Japanese
|
||||
{"ANSI_936", {"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280",
|
||||
"GB_2312-80", "GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Chinese PRC GBK (XGB) simplified
|
||||
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Korean
|
||||
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5",
|
||||
"BIG5-HKSCS"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
|
||||
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, //Central Europe and Eastern Europe
|
||||
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Cyrillic script
|
||||
{"ANSI_1252", {"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "L1",
|
||||
"ISO_8859-1", "APPLE ROMAN", "ISO8859-1", "ISO8859-15", "ISO-IR-100", "IBM 850"}}, // Western Europe
|
||||
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Greek
|
||||
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "iso8859-3"}}, // Turkish
|
||||
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Hebrew
|
||||
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Arabic
|
||||
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Baltic
|
||||
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Vietnamese
|
||||
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
|
||||
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}},
|
||||
};
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
auto DRW_TextCodec::CodecForName(const QString &name) -> VTextCodec *
|
||||
{
|
||||
QMap<QString, QStringList> knownCodecs = DXFCodePageMap();
|
||||
if (knownCodecs.contains(name))
|
||||
{
|
||||
QStringList aliases = knownCodecs.value(name);
|
||||
for (auto &alias : aliases)
|
||||
{
|
||||
if (VTextCodec *codec = VTextCodec::codecForName(alias.toLatin1()))
|
||||
{
|
||||
return codec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto DRW_TextCodec::toUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
if (conv == nullptr)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
const QString encodedString = conv->toUnicode(s.c_str());
|
||||
return encodedString.toStdString();
|
||||
return conv->toUtf8(s);
|
||||
}
|
||||
|
||||
auto DRW_TextCodec::fromUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
if (conv == nullptr)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
const QByteArray encodedString = conv->fromUnicode(QString::fromStdString(s));
|
||||
return std::string(encodedString.constData());
|
||||
return conv->fromUtf8(s);
|
||||
}
|
||||
|
||||
auto DRW_TextCodec::correctCodePage(const std::string& s) -> std::string {
|
||||
//stringstream cause crash in OS/X, bug#3597944
|
||||
QString codePage = QString::fromStdString(s);
|
||||
codePage = codePage.toUpper();
|
||||
QMap<QString, QStringList> codeMap = DRW_TextCodec::DXFCodePageMap();
|
||||
auto DRW_TextCodec::correctCodePage(const std::string &s) -> std::string
|
||||
{
|
||||
// stringstream cause crash in OS/X, bug#3597944
|
||||
std::string cp = s;
|
||||
transform(cp.begin(), cp.end(), cp.begin(), toupper);
|
||||
|
||||
auto i = codeMap.constBegin();
|
||||
while (i != codeMap.constEnd())
|
||||
static auto map = QMap<std::string, QSet<std::string>>{
|
||||
// Latin/Thai
|
||||
{"ANSI_874", {"ANSI_874", "CP874", "ISO8859-11", "TIS-620"}}, // Central Europe and Eastern Europe
|
||||
{"ANSI_1250", {"ANSI_1250", "CP1250", "ISO8859-2"}}, // Cyrillic script
|
||||
{"ANSI_1251", {"ANSI_1251", "CP1251", "ISO8859-5", "KOI8-R", "KOI8-U", "IBM 866"}}, // Western Europe
|
||||
{"ANSI_1252",
|
||||
{"ANSI_1252", "CP1252", "LATIN1", "ISO-8859-1", "CP819", "CSISO", "IBM819", "ISO_8859-1", "APPLE ROMAN",
|
||||
"ISO8859-1", "ISO8859-15", "ISO-IR-100", "L1", "IBM 850"}}, // Greek
|
||||
{"ANSI_1253", {"ANSI_1253", "CP1253", "ISO8859-7"}}, // Turkish
|
||||
{"ANSI_1254", {"ANSI_1254", "CP1254", "ISO8859-9", "ISO8859-3"}}, // Hebrew
|
||||
{"ANSI_1255", {"ANSI_1255", "CP1255", "ISO8859-8"}}, // Arabic
|
||||
{"ANSI_1256", {"ANSI_1256", "CP1256", "ISO8859-6"}}, // Baltic
|
||||
{"ANSI_1257", {"ANSI_1257", "CP1257", "ISO8859-4", "ISO8859-10", "ISO8859-13"}}, // Vietnamese
|
||||
{"ANSI_1258", {"ANSI_1258", "CP1258"}}, // Japanese
|
||||
{"ANSI_932",
|
||||
{"ANSI_932", "SHIFT-JIS", "SHIFT_JIS", "CSSHIFTJIS", "CSWINDOWS31J", "MS_KANJI", "X-MS-CP932", "X-SJIS",
|
||||
"EUCJP", "EUC-JP", "CSEUCPKDFMTJAPANESE", "X-EUC", "X-EUC-JP", "JIS7"}}, // Chinese PRC GBK (XGB) simplified
|
||||
{"ANSI_936",
|
||||
{"ANSI_936", "GBK", "GB2312", "CHINESE", "CN-GB", "CSGB2312", "CSGB231280", "CSISO58BG231280", "GB_2312-80",
|
||||
"GB231280", "GB2312-80", "ISO-IR-58", "GB18030"}}, // Korean
|
||||
{"ANSI_949", {"ANSI_949", "EUCKR"}}, // Chinese Big5 (Taiwan, Hong Kong SAR)
|
||||
{"ANSI_950", {"ANSI_950", "BIG5", "CN-BIG5", "CSBIG5", "X-X-BIG5", "BIG5-HKSCS"}}, // celtic
|
||||
// {"ISO8859-14", {"ISO8859-14"}},
|
||||
// {"TSCII", {"TSCII"}},//tamil
|
||||
{"UTF-8", {"UTF-8", "UTF8", "UTF8-BIT"}},
|
||||
{"UTF-16", {"UTF-16", "UTF16", "UTF16-BIT"}}};
|
||||
|
||||
auto i = map.constBegin();
|
||||
while (i != map.constEnd())
|
||||
{
|
||||
if (i.value().contains(codePage))
|
||||
if (i.value().contains(cp))
|
||||
{
|
||||
return i.key().toStdString();
|
||||
return i.key();
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
return "ANSI_1252";
|
||||
}
|
||||
|
||||
auto DRW_Converter::toUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string result;
|
||||
unsigned int j = 0;
|
||||
unsigned int i = 0;
|
||||
for (i = 0; i < s.length(); i++)
|
||||
{
|
||||
auto c = static_cast<unsigned char>(s.at(i));
|
||||
if (c < 0x80)
|
||||
{ // ascii check for /U+????
|
||||
if (c == '\\' && i + 6 < s.length() && s.at(i + 1) == 'U' && s.at(i + 2) == '+')
|
||||
{
|
||||
result += s.substr(j, i - j);
|
||||
result += encodeText(s.substr(i, 7));
|
||||
i += 6;
|
||||
j = i + 1;
|
||||
}
|
||||
}
|
||||
else if (c < 0xE0)
|
||||
{ // 2 bits
|
||||
i++;
|
||||
}
|
||||
else if (c < 0xF0)
|
||||
{ // 3 bits
|
||||
i += 2;
|
||||
}
|
||||
else if (c < 0xF8)
|
||||
{ // 4 bits
|
||||
i += 3;
|
||||
}
|
||||
}
|
||||
result += s.substr(j);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto DRW_ConvTable::fromUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string result;
|
||||
bool notFound;
|
||||
int code;
|
||||
|
||||
unsigned int j = 0;
|
||||
for (unsigned int i = 0; i < s.length(); i++)
|
||||
{
|
||||
auto c = static_cast<unsigned char>(s.at(i));
|
||||
if (c > 0x7F)
|
||||
{ // need to decode
|
||||
result += s.substr(j, i - j);
|
||||
std::string part1 = s.substr(i, 4);
|
||||
unsigned int l;
|
||||
code = decodeNum(part1, &l);
|
||||
j = i + l;
|
||||
i = j - 1;
|
||||
notFound = true;
|
||||
for (int k = 0; k < cpLength; k++)
|
||||
{
|
||||
if (table[k] == code)
|
||||
{
|
||||
result += static_cast<char>(CPOFFSET + k); // translate from table
|
||||
notFound = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (notFound)
|
||||
{
|
||||
result += decodeText(code);
|
||||
}
|
||||
}
|
||||
}
|
||||
result += s.substr(j);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto DRW_ConvTable::toUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string res;
|
||||
for (auto it = s.begin(); it < s.end(); ++it)
|
||||
{
|
||||
auto c = static_cast<unsigned char>(*it);
|
||||
if (c < 0x80)
|
||||
{
|
||||
// check for \U+ encoded text
|
||||
if (c == '\\')
|
||||
{
|
||||
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
|
||||
{
|
||||
res += encodeText(std::string(it, it + 7));
|
||||
it += 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += static_cast<char>(c); // no \U+ encoded text write
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res += static_cast<char>(c); // c!='\' ascii char write
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // end c < 0x80
|
||||
res += encodeNum(table[c - 0x80]); // translate from table
|
||||
}
|
||||
} // end for
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
auto DRW_Converter::encodeText(const std::string &stmp) -> std::string
|
||||
{
|
||||
int code;
|
||||
#if defined(__APPLE__)
|
||||
int Succeeded = sscanf(&(stmp.substr(3, 4)[0]), "%x", &code);
|
||||
if (!Succeeded || Succeeded == EOF)
|
||||
code = 0;
|
||||
#else
|
||||
std::istringstream sd(stmp.substr(3, 4));
|
||||
sd >> std::hex >> code;
|
||||
#endif
|
||||
return encodeNum(code);
|
||||
}
|
||||
|
||||
auto DRW_Converter::decodeText(int c) -> std::string
|
||||
{
|
||||
std::string res = "\\U+";
|
||||
std::string num;
|
||||
#if defined(__APPLE__)
|
||||
std::string str(16, '\0');
|
||||
snprintf(&(str[0]), 16, "%04X", c);
|
||||
num = str;
|
||||
#else
|
||||
std::stringstream ss;
|
||||
ss << std::uppercase << std::setfill('0') << std::setw(4) << std::hex << c;
|
||||
ss >> num;
|
||||
#endif
|
||||
res += num;
|
||||
return res;
|
||||
}
|
||||
|
||||
auto DRW_Converter::encodeNum(int c) -> std::string
|
||||
{
|
||||
unsigned char ret[5];
|
||||
if (c < 128)
|
||||
{ // 0-7F US-ASCII 7 bits
|
||||
ret[0] = static_cast<unsigned char>(c);
|
||||
ret[1] = 0;
|
||||
}
|
||||
else if (c < 0x800)
|
||||
{ // 80-07FF 2 bytes
|
||||
ret[0] = static_cast<unsigned char>(0xC0 | (c >> 6));
|
||||
ret[1] = 0x80 | (c & 0x3f);
|
||||
ret[2] = 0;
|
||||
}
|
||||
else if (c < 0x10000)
|
||||
{ // 800-FFFF 3 bytes
|
||||
ret[0] = static_cast<unsigned char>(0xe0 | (c >> 12));
|
||||
ret[1] = 0x80 | ((c >> 6) & 0x3f);
|
||||
ret[2] = 0x80 | (c & 0x3f);
|
||||
ret[3] = 0;
|
||||
}
|
||||
else
|
||||
{ // 10000-10FFFF 4 bytes
|
||||
ret[0] = static_cast<unsigned char>(0xf0 | (c >> 18));
|
||||
ret[1] = 0x80 | ((c >> 12) & 0x3f);
|
||||
ret[2] = 0x80 | ((c >> 6) & 0x3f);
|
||||
ret[3] = 0x80 | (c & 0x3f);
|
||||
ret[4] = 0;
|
||||
}
|
||||
return {reinterpret_cast<char *>(ret)};
|
||||
}
|
||||
|
||||
/** 's' is a string with at least 4 bytes length
|
||||
** returned 'b' is byte length of encoded char: 2,3 or 4
|
||||
**/
|
||||
auto DRW_Converter::decodeNum(const std::string &s, unsigned int *b) -> int
|
||||
{
|
||||
int code = 0;
|
||||
auto c = static_cast<unsigned char>(s.at(0));
|
||||
if ((c & 0xE0) == 0xC0)
|
||||
{ // 2 bytes
|
||||
code = (c & 0x1F) << 6;
|
||||
code = (s.at(1) & 0x3F) | code;
|
||||
*b = 2;
|
||||
}
|
||||
else if ((c & 0xF0) == 0xE0)
|
||||
{ // 3 bytes
|
||||
code = (c & 0x0F) << 12;
|
||||
code = ((s.at(1) & 0x3F) << 6) | code;
|
||||
code = (s.at(2) & 0x3F) | code;
|
||||
*b = 3;
|
||||
}
|
||||
else if ((c & 0xF8) == 0xF0)
|
||||
{ // 4 bytes
|
||||
code = (c & 0x07) << 18;
|
||||
code = ((s.at(1) & 0x3F) << 12) | code;
|
||||
code = ((s.at(2) & 0x3F) << 6) | code;
|
||||
code = (s.at(3) & 0x3F) | code;
|
||||
*b = 4;
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
auto DRW_ConvDBCSTable::fromUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string result;
|
||||
bool notFound;
|
||||
int code;
|
||||
|
||||
unsigned int j = 0;
|
||||
for (unsigned int i = 0; i < s.length(); i++)
|
||||
{
|
||||
auto c = static_cast<unsigned char>(s.at(i));
|
||||
if (c > 0x7F)
|
||||
{ // need to decode
|
||||
result += s.substr(j, i - j);
|
||||
std::string part1 = s.substr(i, 4);
|
||||
unsigned int l;
|
||||
code = decodeNum(part1, &l);
|
||||
j = i + l;
|
||||
i = j - 1;
|
||||
notFound = true;
|
||||
for (int k = 0; k < cpLength; k++)
|
||||
{
|
||||
if (doubleTable[k][1] == code)
|
||||
{
|
||||
int data = doubleTable[k][0];
|
||||
char d[3];
|
||||
d[0] = static_cast<char>(data >> 8);
|
||||
d[1] = static_cast<char>(data & 0xFF);
|
||||
d[2] = '\0';
|
||||
result += d; // translate from table
|
||||
notFound = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (notFound)
|
||||
{
|
||||
result += decodeText(code);
|
||||
}
|
||||
} // direct conversion
|
||||
}
|
||||
result += s.substr(j);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto DRW_ConvDBCSTable::toUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string res;
|
||||
for (auto it = s.begin(); it < s.end(); ++it)
|
||||
{
|
||||
bool notFound = true;
|
||||
auto c = static_cast<unsigned char>(*it);
|
||||
if (c < 0x80)
|
||||
{
|
||||
notFound = false;
|
||||
// check for \U+ encoded text
|
||||
if (c == '\\')
|
||||
{
|
||||
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
|
||||
{
|
||||
res += encodeText(std::string(it, it + 7));
|
||||
it += 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += static_cast<char>(c); // no \U+ encoded text write
|
||||
}
|
||||
}
|
||||
else
|
||||
res += static_cast<char>(c); // c!='\' ascii char write
|
||||
}
|
||||
else if (c == 0x80)
|
||||
{ // 1 byte table
|
||||
notFound = false;
|
||||
res += encodeNum(0x20AC); // euro sign
|
||||
}
|
||||
else
|
||||
{ // 2 bytes
|
||||
++it;
|
||||
int code = (c << 8) | static_cast<unsigned char>(*it);
|
||||
int sta = leadTable[c - 0x81];
|
||||
int end = leadTable[c - 0x80];
|
||||
for (int k = sta; k < end; k++)
|
||||
{
|
||||
if (doubleTable[k][0] == code)
|
||||
{
|
||||
res += encodeNum(doubleTable[k][1]); // translate from table
|
||||
notFound = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// not found
|
||||
if (notFound)
|
||||
{
|
||||
res += encodeNum(NOTFOUND936);
|
||||
}
|
||||
} // end for
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
DRW_Conv932Table::DRW_Conv932Table()
|
||||
: DRW_Converter(DRW_Table932, CPLENGTH932)
|
||||
{
|
||||
}
|
||||
|
||||
auto DRW_Conv932Table::fromUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string result;
|
||||
bool notFound;
|
||||
int code;
|
||||
|
||||
unsigned int j = 0;
|
||||
for (unsigned int i = 0; i < s.length(); i++)
|
||||
{
|
||||
auto c = static_cast<unsigned char>(s.at(i));
|
||||
if (c > 0x7F)
|
||||
{ // need to decode
|
||||
result += s.substr(j, i - j);
|
||||
std::string part1 = s.substr(i, 4);
|
||||
unsigned int l;
|
||||
code = decodeNum(part1, &l);
|
||||
j = i + l;
|
||||
i = j - 1;
|
||||
notFound = true;
|
||||
// 1 byte table
|
||||
if (code > 0xff60 && code < 0xFFA0)
|
||||
{
|
||||
result += static_cast<char>(code - CPOFFSET932); // translate from table
|
||||
notFound = false;
|
||||
}
|
||||
if (notFound &&
|
||||
(code < 0xF8 || (code > 0x390 && code < 0x542) || (code > 0x200F && code < 0x9FA1) || code > 0xF928))
|
||||
{
|
||||
for (int k = 0; k < cpLength; k++)
|
||||
{
|
||||
if (DRW_DoubleTable932[k][1] == code)
|
||||
{
|
||||
int data = DRW_DoubleTable932[k][0];
|
||||
char d[3];
|
||||
d[0] = static_cast<char>(data >> 8);
|
||||
d[1] = static_cast<char>(data & 0xFF);
|
||||
d[2] = '\0';
|
||||
result += d; // translate from table
|
||||
notFound = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (notFound)
|
||||
result += decodeText(code);
|
||||
} // direct conversion
|
||||
}
|
||||
result += s.substr(j);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
auto DRW_Conv932Table::toUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
std::string res;
|
||||
for (auto it = s.begin(); it < s.end(); ++it)
|
||||
{
|
||||
bool notFound = true;
|
||||
auto c = static_cast<unsigned char>(*it);
|
||||
if (c < 0x80)
|
||||
{
|
||||
notFound = false;
|
||||
// check for \U+ encoded text
|
||||
if (c == '\\')
|
||||
{
|
||||
if (s.end() - it > 6 && *(it + 1) == 'U' && *(it + 2) == '+')
|
||||
{
|
||||
res += encodeText(std::string(it, it + 7));
|
||||
it += 6;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += static_cast<char>(c); // no \U+ encoded text write
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res += static_cast<char>(c); // c!='\' ascii char write
|
||||
}
|
||||
}
|
||||
else if (c > 0xA0 && c < 0xE0)
|
||||
{ // 1 byte table
|
||||
notFound = false;
|
||||
res += encodeNum(c + CPOFFSET932); // translate from table
|
||||
}
|
||||
else
|
||||
{ // 2 bytes
|
||||
++it;
|
||||
int code = (c << 8) | static_cast<unsigned char>(*it);
|
||||
int sta = 0;
|
||||
int end = 0;
|
||||
if (c > 0x80 && c < 0xA0)
|
||||
{
|
||||
sta = DRW_LeadTable932[c - 0x81];
|
||||
end = DRW_LeadTable932[c - 0x80];
|
||||
}
|
||||
else if (c > 0xDF && c < 0xFD)
|
||||
{
|
||||
sta = DRW_LeadTable932[c - 0xC1];
|
||||
end = DRW_LeadTable932[c - 0xC0];
|
||||
}
|
||||
if (end > 0)
|
||||
{
|
||||
for (int k = sta; k < end; k++)
|
||||
{
|
||||
if (DRW_DoubleTable932[k][0] == code)
|
||||
{
|
||||
res += encodeNum(DRW_DoubleTable932[k][1]); // translate from table
|
||||
notFound = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// not found
|
||||
if (notFound)
|
||||
{
|
||||
res += encodeNum(NOTFOUND932);
|
||||
}
|
||||
} // end for
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
auto DRW_ConvUTF16::fromUtf8(const std::string &s) -> std::string
|
||||
{
|
||||
DRW_UNUSED(s);
|
||||
// RLZ: to be written (only needed for write dwg 2007+)
|
||||
return {};
|
||||
}
|
||||
|
||||
auto DRW_ConvUTF16::toUtf8(const std::string &s) -> std::string
|
||||
{ // RLZ: pending to write
|
||||
std::string res;
|
||||
for (auto it = s.begin(); it < s.end(); ++it)
|
||||
{
|
||||
auto c1 = static_cast<unsigned char>(*it);
|
||||
auto c2 = static_cast<unsigned char>(*(++it));
|
||||
auto ch = static_cast<duint16>((c2 << 8) | c1);
|
||||
res += encodeNum(ch);
|
||||
} // end for
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -1,26 +1,23 @@
|
|||
#ifndef DRW_TEXTCODEC_H
|
||||
#define DRW_TEXTCODEC_H
|
||||
|
||||
#include <string>
|
||||
#include <QtGlobal>
|
||||
#include "../drw_base.h"
|
||||
#include <QtCore/qcontainerfwd.h>
|
||||
|
||||
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
|
||||
#include "../vmisc/vtextcodec.h"
|
||||
#else
|
||||
#include "../vmisc/defglobal.h"
|
||||
#include <QTextCodec>
|
||||
#endif
|
||||
#include <QtGlobal>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
|
||||
class QStringList;
|
||||
#endif
|
||||
|
||||
class DRW_Converter;
|
||||
|
||||
class DRW_TextCodec
|
||||
{
|
||||
public:
|
||||
DRW_TextCodec();
|
||||
~DRW_TextCodec() = default;
|
||||
auto fromUtf8(const std::string &s) -> std::string;
|
||||
auto toUtf8(const std::string &s) -> std::string;
|
||||
auto getVersion() const -> int { return version; }
|
||||
|
@ -29,18 +26,84 @@ public:
|
|||
void setCodePage(const std::string &c, bool dxfFormat);
|
||||
auto getCodePage() const -> std::string { return cp; }
|
||||
|
||||
static auto DXFCodePageMap() -> QMap<QString, QStringList>;
|
||||
static auto CodecForName(const QString &name) -> VTextCodec *;
|
||||
|
||||
private:
|
||||
static auto correctCodePage(const std::string& s) -> std::string;
|
||||
static auto correctCodePage(const std::string &s) -> std::string;
|
||||
|
||||
private:
|
||||
// cppcheck-suppress unknownMacro
|
||||
Q_DISABLE_COPY_MOVE(DRW_TextCodec) // NOLINT
|
||||
DRW::Version version{DRW::UNKNOWNV};
|
||||
std::string cp{};
|
||||
VTextCodec *conv{nullptr};
|
||||
std::unique_ptr<DRW_Converter> conv;
|
||||
};
|
||||
|
||||
class DRW_Converter
|
||||
{
|
||||
public:
|
||||
DRW_Converter(const int *t, int l)
|
||||
: table{t},
|
||||
cpLength{l}
|
||||
{
|
||||
}
|
||||
virtual ~DRW_Converter() = default;
|
||||
virtual auto fromUtf8(const std::string &s) -> std::string { return s; }
|
||||
virtual auto toUtf8(const std::string &s) -> std::string;
|
||||
|
||||
static auto encodeText(const std::string &stmp) -> std::string;
|
||||
static auto decodeText(int c) -> std::string;
|
||||
static auto encodeNum(int c) -> std::string;
|
||||
static auto decodeNum(const std::string &s, unsigned int *b) -> int;
|
||||
|
||||
const int *table{nullptr};
|
||||
int cpLength;
|
||||
};
|
||||
|
||||
class DRW_ConvUTF16 : public DRW_Converter
|
||||
{
|
||||
public:
|
||||
DRW_ConvUTF16()
|
||||
: DRW_Converter(nullptr, 0)
|
||||
{
|
||||
}
|
||||
auto fromUtf8(const std::string &s) -> std::string override;
|
||||
auto toUtf8(const std::string &s) -> std::string override;
|
||||
};
|
||||
|
||||
class DRW_ConvTable : public DRW_Converter
|
||||
{
|
||||
public:
|
||||
DRW_ConvTable(const int *t, int l)
|
||||
: DRW_Converter(t, l)
|
||||
{
|
||||
}
|
||||
auto fromUtf8(const std::string &s) -> std::string override;
|
||||
auto toUtf8(const std::string &s) -> std::string override;
|
||||
};
|
||||
|
||||
class DRW_ConvDBCSTable : public DRW_Converter
|
||||
{
|
||||
public:
|
||||
DRW_ConvDBCSTable(const int *t, const int *lt, const int dt[][2], int l)
|
||||
: DRW_Converter(t, l),
|
||||
leadTable{lt},
|
||||
doubleTable{dt}
|
||||
{
|
||||
}
|
||||
|
||||
auto fromUtf8(const std::string &s) -> std::string override;
|
||||
auto toUtf8(const std::string &s) -> std::string override;
|
||||
|
||||
private:
|
||||
const int *leadTable{nullptr};
|
||||
const int (*doubleTable)[2];
|
||||
};
|
||||
|
||||
class DRW_Conv932Table : public DRW_Converter
|
||||
{
|
||||
public:
|
||||
DRW_Conv932Table();
|
||||
auto fromUtf8(const std::string &s) -> std::string override;
|
||||
auto toUtf8(const std::string &s) -> std::string override;
|
||||
};
|
||||
|
||||
#endif // DRW_TEXTCODEC_H
|
||||
|
|
|
@ -9,6 +9,11 @@ SOURCES += \
|
|||
$$PWD/libdxfrw/intern/drw_textcodec.cpp \
|
||||
$$PWD/libdxfrw/intern/dxfreader.cpp \
|
||||
$$PWD/libdxfrw/intern/dxfwriter.cpp \
|
||||
$$PWD/libdxfrw/intern/drw_cptable932.h \
|
||||
$$PWD/libdxfrw/intern/drw_cptable936.h \
|
||||
$$PWD/libdxfrw/intern/drw_cptable949.h \
|
||||
$$PWD/libdxfrw/intern/drw_cptable950.h \
|
||||
$$PWD/libdxfrw/intern/drw_cptables.h \
|
||||
$$PWD/libdxfrw/drw_classes.cpp \
|
||||
$$PWD/libdxfrw/drw_entities.cpp \
|
||||
$$PWD/libdxfrw/drw_header.cpp \
|
||||
|
|
|
@ -21,6 +21,11 @@ VLib {
|
|||
prefix: "libdxfrw/"
|
||||
files: [
|
||||
"drw_base.cpp",
|
||||
"intern/drw_cptable932.h",
|
||||
"intern/drw_cptable936.h",
|
||||
"intern/drw_cptable949.h",
|
||||
"intern/drw_cptable950.h",
|
||||
"intern/drw_cptables.h",
|
||||
"intern/drw_dbg.cpp",
|
||||
"intern/drw_textcodec.cpp",
|
||||
"intern/dxfreader.cpp",
|
||||
|
|
Loading…
Reference in New Issue
Block a user