Program Listing for File support.h#
↰ Return to documentation for file (sdv_idl_compiler\support.h)
#ifndef SUPPORT_H
#define SUPPORT_H
#include "exception.h"
#include "codepos.h"
#include <utility>
#include <sstream>
template <typename TLiteral, typename TCharType>
inline void InterpretCText(TLiteral rtLiteral, const char* szDelim, std::basic_string<TCharType>& rssText, uint32_t& ruiByteCnt,
bool bIgnoreEscape = false, bool bNotUnicode = false)
{
if (!rtLiteral)
throw CCompileException("Internal error: invalid parameter.");
TLiteral rtLiteralLocal = rtLiteral; // Prevent changing the local variable (some compilers don't like this).
rssText.clear();
ruiByteCnt = 0;
// Check to see an incomptiblity with the parameter 'bNotUnicode'.
if (bNotUnicode && sizeof(TCharType) != 1)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Internal error: invalid parameter.");
// Get the octal number for correct presentation.
auto fnGetOctalChar = [&]()
{
// Octal number of three digits not exceeding the character capacity
std::string ssNumber;
size_t n = 0;
do
{
if (*rtLiteralLocal < '0' || *rtLiteralLocal > '7')
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(),
"Invalid escape sequence in string; expecting a valid octal number.");
ssNumber += *rtLiteralLocal;
ruiByteCnt++;
rtLiteralLocal++;
} while (++n < 3 && *rtLiteralLocal >= '0' && *rtLiteralLocal <= '7');
uint32_t uiResult = static_cast<uint32_t>(std::stoul(ssNumber, nullptr, 8));
// Byte character literals cannot have a value higher than 0377 (0xff).
if (uiResult > 0377)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(),
"Invalid escape sequence in string; expecting a valid octal number of at the most 0377.");
return uiResult;
};
// Get the hexadecimal number for correct presentation.
auto fnGetHexChar = [&](size_t nFixedDigits = 0)
{
// Determine the amount of digits
size_t nFixedDigitsLocal = nFixedDigits ? nFixedDigits : sizeof(TCharType) * 2;
// Check the amount of digits
std::string ssNumber;
size_t n = 0;
do
{
if (!std::isxdigit(*rtLiteralLocal))
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; expecting a "
"valid hexadecimal number of ", nFixedDigitsLocal, " digits.");
ssNumber += *rtLiteralLocal;
rtLiteralLocal++;
ruiByteCnt++;
} while (++n < nFixedDigitsLocal && std::isxdigit(*rtLiteralLocal));
if (nFixedDigits && n != nFixedDigits)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; expecting a "
"valid hexadecimal number of ", nFixedDigitsLocal, " digits.");
return static_cast<uint32_t>(std::stoul(ssNumber, nullptr, 16));
};
// Check for the correct unicode presentation and add the characters to the vector.
auto fnAddUnicodeCharacter = [&](bool b32BitCodePoint = false)
{
uint32_t uiCodePoint = fnGetHexChar(b32BitCodePoint ? 8 : 4);
if (uiCodePoint >= 0xD800 && uiCodePoint < 0xE000) // Range U+D800...U+DFFF are reserved and not assigned any character.
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; Unicode range"
" U+D800...U+DFFF are not valid code point.");
// Check whether the number fits (wide characters are considered to correspond to a UTF-16 or UTF-32
// character).
if constexpr (sizeof(TCharType) == 4) // UTF-32
rssText.push_back(uiCodePoint);
else if constexpr (sizeof(TCharType) == 2) // UTF-16
{
if (uiCodePoint < 0xD800) // Range U+0000...U+D7FF fit in 16 bits.
rssText.push_back(static_cast<TCharType>(uiCodePoint));
else if (uiCodePoint >= 0xE000 && uiCodePoint < 0x10000) // Range U+E000...U+FFFF fit in 16 bits.
rssText.push_back(static_cast<TCharType>(uiCodePoint));
else if (uiCodePoint >= 0x10000 && uiCodePoint < 0x110000) // Range U+10000...U+10FFFF fit in 2X 16 bits.
{
// Subtract 0x10000 from the code point
uiCodePoint -= 0x10000;
// High ten bits form the first character
rssText.push_back(static_cast<TCharType>(0xD800 | ((uiCodePoint >> 10) & 0x3ff)));
// Low ten bits form the second character
rssText.push_back(static_cast<TCharType>(0xDC00 | (uiCodePoint & 0x3FF)));
}
else if (uiCodePoint >= 0x110000) // Range above U+10FFFF are invalid.
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; expecting a"
" valid Unicode code point.");
}
else if constexpr (sizeof(TCharType) == 1) // ASCII and UTF-8
{
if (uiCodePoint < 0x0080) // Range between U+0000 and U+007F fit in 8 bits.
rssText.push_back(static_cast<TCharType>(uiCodePoint));
else if (/*uiCodePoint >= 0x0080 &&*/ uiCodePoint < 0x009F) // Range U+0080...U+009F fit in 2X 8 bits (only UTF-8).
{
if (bNotUnicode)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; Unicode"
" code point doesn't fit into ASCII character.");
// High 5 bits form the first character
rssText.push_back(static_cast<TCharType>(0xC0 | ((uiCodePoint >> 6) & 0x1f)));
// Low 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | (uiCodePoint & 0x3F)));
}
else if (/*uiCodePoint >= 0x00A0 &&*/ uiCodePoint < 0x0100) // Range U+00A0...U+00FF fit in 2X 8 bits or 1X 8 bits.
{
if (bNotUnicode) // ASCII character
rssText.push_back(static_cast<TCharType>(uiCodePoint));
else
{
// High 5 bits form the first character
rssText.push_back(static_cast<TCharType>(0xC0 | ((uiCodePoint >> 6) & 0x1f)));
// Low 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | (uiCodePoint & 0x3F)));
}
}
else if (/*uiCodePoint >= 0x0100 &&*/ uiCodePoint < 0x0800) // Range U+0100...U+07FF fit in 2X 8 bits (only UTF-8).
{
if (bNotUnicode)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; Unicode"
" code point doesn't fit into ASCII character.");
// High 5 bits form the first character
rssText.push_back(static_cast<TCharType>(0xC0 | ((uiCodePoint >> 6) & 0x1f)));
// Low 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | (uiCodePoint & 0x3F)));
}
else if (/*uiCodePoint >= 0x0800 &&*/ uiCodePoint < 0xFFFF) // Range U+0800...U+FFFF fit in 3X 8 bits (only UTF-8).
{
if (bNotUnicode)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; Unicode"
" code point doesn't fit into ASCII character.");
// High 4 bits form the first character
rssText.push_back(static_cast<TCharType>(0xE0 | ((uiCodePoint >> 12) & 0x0f)));
// Next 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | ((uiCodePoint >> 6) & 0x3f)));
// Low 6 bits form the third character
rssText.push_back(static_cast<TCharType>(0x80 | (uiCodePoint & 0x3F)));
}
else if (/*uiCodePoint >= 0x01000 &&*/ uiCodePoint < 0x10FFFF) // Range U+10000...U+10FFFF fit in 4x 8 bits (only UTF-8).
{
if (bNotUnicode)
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; Unicode"
" code point doesn't fit into ASCII character.");
// High 3 bits form the first character
rssText.push_back(static_cast<TCharType>(0xF0 | ((uiCodePoint >> 18) & 0x07)));
// Next 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | ((uiCodePoint >> 12) & 0x3f)));
// Next 6 bits form the second character
rssText.push_back(static_cast<TCharType>(0x80 | ((uiCodePoint >> 6) & 0x3f)));
// Low 6 bits form the third character
rssText.push_back(static_cast<TCharType>(0x80 | (uiCodePoint & 0x3F)));
}
else if (uiCodePoint >= 0x110000) // Range above U+10FFF are invalid.
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string; expecting a"
" valid Unicode code point.");
}
};
// Check whether to continue processing based on the delimiter.
auto fnContinueProcessing = [&]() -> bool
{
// In case there is no delimiter, end processing when the literal is '\0'.
if (!szDelim) return *rtLiteralLocal ? true : false;
// There is a delimiter. Continue processing when the delimiter pattern hasn't been detected.
uint32_t uiIndex = 0;
do
{
if (szDelim[uiIndex] != rtLiteralLocal[uiIndex])
return true;
} while (szDelim[++uiIndex] != '\0');
return false;
};
while (fnContinueProcessing())
{
switch (*rtLiteralLocal)
{
case '\0':
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid string; unexpected end of file detected.");
break;
case '\\':
// Skip backslash
rtLiteralLocal++;
ruiByteCnt++;
// Handle as escape or as character
if (bIgnoreEscape)
{
rssText.push_back('\\');
break;
}
// Handle escape
if (std::isdigit(*rtLiteralLocal))
{
// Octal character value
rssText.push_back(static_cast<TCharType>(fnGetOctalChar()));
}
else
{
char cEscapeChar = *rtLiteralLocal;
rtLiteralLocal++;
ruiByteCnt++;
switch (cEscapeChar)
{
case '\'': // Single quote
case '\"': // Double quote
case '?': // Question mark
case '\\': // Backslash
rssText.push_back(cEscapeChar);
break;
case 'a': // Bell
rssText.push_back('\a');
break;
case 'b': // Backspace
rssText.push_back('\b');
break;
case 'f': // Form feed
rssText.push_back('\f');
break;
case 'n': // Line feed
rssText.push_back('\n');
break;
case 'r': // Carriage return
rssText.push_back('\r');
break;
case 't': // Horizontal tab
rssText.push_back('\t');
break;
case 'v': // Vertical tab
rssText.push_back('\v');
break;
case 'x': // Hex character value
rssText.push_back(static_cast<TCharType>(fnGetHexChar()));
break;
case 'u': // 4 digit Unicode character
fnAddUnicodeCharacter();
break;
case 'U': // 8 digit Unicode character
fnAddUnicodeCharacter(true);
break;
default:
throw CCompileException(CCodePos(rtLiteralLocal).GetLocation(), "Invalid escape sequence in string");
}
}
break;
default:
// Skip char
rssText.push_back(*rtLiteralLocal);
rtLiteralLocal++;
ruiByteCnt++;
break;
}
}
}
template <typename TCharType>
inline std::string GenerateCText(const TCharType* szText, uint32_t uiLen = 0xFFFFFFFF, bool bNotUnicode = false)
{
std::stringstream sstreamResult;
if (!szText) return sstreamResult.str();
// Check to see an incomptiblity with the parameter 'bNotUnicode'.
if (bNotUnicode && sizeof(TCharType) != 1)
throw CCompileException("Internal error: invalid parameter.");
// Standard ASCII code generation (until the value of 128)
auto fnGenerateASCII = [&](uint32_t c) -> bool
{
// Check for low level...
switch (c)
{
case '\'': sstreamResult << "\\\'"; break;
case '\"': sstreamResult << "\\\""; break;
case '\\': sstreamResult << "\\\\"; break;
case '\a': sstreamResult << "\\a"; break;
case '\b': sstreamResult << "\\b"; break;
case '\f': sstreamResult << "\\f"; break;
case '\n': sstreamResult << "\\n"; break;
case '\r': sstreamResult << "\\r"; break;
case '\t': sstreamResult << "\\t"; break;
case '\v': sstreamResult << "\\v"; break;
default:
if (c < 0x20)
sstreamResult << "\\" << std::oct << c;
else if (c < 0x80)
sstreamResult << static_cast<char>(c);
else return false;
}
return true;
};
const TCharType* szTextPos = szText;
size_t n = 0;
if constexpr (sizeof(TCharType) == 4) // UTF-32 and possibly wide character
{
while (*szTextPos && n++ < uiLen)
{
uint32_t uiChar = static_cast<uint32_t>(*szTextPos);
if (!fnGenerateASCII(uiChar))
{
// Generate an UNICODE character
if (uiChar < 0x10000)
sstreamResult << "\\u" << std::hex << std::setfill('0') << std::setw(4) << uiChar;
else
sstreamResult << "\\U" << std::hex << std::setfill('0') << std::setw(8) << uiChar;
}
szTextPos++;
}
} else if constexpr (sizeof(TCharType) == 2) // UTF-16 and possibly wide character
{
while (*szTextPos && n++ < uiLen)
{
uint16_t uiChar = static_cast<uint16_t>(*szTextPos);
if (!fnGenerateASCII(uiChar))
{
// Generate an UNICODE character
if (uiChar < 0xD800 || uiChar >= 0xE000) // One character
sstreamResult << "\\u" << std::hex << std::setfill('0') << std::setw(4) << static_cast<uint32_t>(uiChar);
else // Two characters
{
uint32_t uiUnicodeCodePoint = 0x10000 + ((static_cast<uint32_t>(uiChar) & 0x3ff) << 10);
szTextPos++;
uiChar = static_cast<uint16_t>(*szTextPos);
uiUnicodeCodePoint |= static_cast<uint32_t>(uiChar) & 0x3ff;
sstreamResult << "\\U" << std::hex << std::setfill('0') << std::setw(8) << uiUnicodeCodePoint;
}
}
szTextPos++;
}
}
else // UTF-8 or ASCII
{
while (*szTextPos && n++ < uiLen)
{
uint8_t uiChar = static_cast<uint8_t>(*szTextPos);
if (!fnGenerateASCII(static_cast<uint8_t>(uiChar)))
{
// Generate ASCII character
if (bNotUnicode)
{
sstreamResult << "\\" << std::oct << static_cast<uint32_t>(uiChar);
} else // Generate an UNICODE character
{
if (uiChar < 0x80) // One character
sstreamResult << "\\u" << std::hex << std::setfill('0') << std::setw(4) << static_cast<uint32_t>(uiChar);
else if (uiChar < 0xE0) // Two characters
{
uint32_t uiUnicodeCodePoint = ((static_cast<uint32_t>(uiChar) & ~0xE0) << 6);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= static_cast<uint32_t>(uiChar) & ~0xC0;
sstreamResult << "\\u" << std::hex << std::setfill('0') << std::setw(4) << uiUnicodeCodePoint;
}
else if (/*uiChar >= 0xE0 &&*/ uiChar < 0xF0) // Three characters
{
uint32_t uiUnicodeCodePoint = ((static_cast<uint32_t>(uiChar) & ~0xF0) << 12);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= ((static_cast<uint32_t>(uiChar) & ~0xC0) << 6);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= static_cast<uint32_t>(uiChar) & ~0xC0;
sstreamResult << "\\u" << std::hex << std::setfill('0') << std::setw(4) << uiUnicodeCodePoint;
}
else /*if (uiChar >= 0xF0)*/ // Four characters
{
uint32_t uiUnicodeCodePoint = ((static_cast<uint32_t>(uiChar) & ~0xF0) << 18);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= ((static_cast<uint32_t>(uiChar) & ~0xC0) << 12);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= ((static_cast<uint32_t>(uiChar) & ~0xC0) << 6);
szTextPos++;
uiChar = static_cast<uint8_t>(*szTextPos);
uiUnicodeCodePoint |= static_cast<uint32_t>(uiChar) & ~0xC0;
sstreamResult << "\\U" << std::hex << std::setfill('0') << std::setw(8) << uiUnicodeCodePoint;
}
}
}
szTextPos++;
}
}
return sstreamResult.str();
}
inline std::string GenerateCText(char cChar, bool bNotUnicode = false)
{
return GenerateCText(&cChar, 1, bNotUnicode);
}
inline std::string GenerateCText(char16_t cChar, bool bNotUnicode = false)
{
return GenerateCText(&cChar, 1, bNotUnicode);
}
inline std::string GenerateCText(char32_t cChar, bool bNotUnicode = false)
{
return GenerateCText(&cChar, 1, bNotUnicode);
}
inline std::string GenerateCText(wchar_t cChar, bool bNotUnicode = false)
{
return GenerateCText(&cChar, 1, bNotUnicode);
}
#endif // !defined(SUPPORT_H)