summaryrefslogtreecommitdiffstats
path: root/CPP/Common/UTFConvert.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'CPP/Common/UTFConvert.cpp')
-rwxr-xr-xCPP/Common/UTFConvert.cpp145
1 files changed, 145 insertions, 0 deletions
diff --git a/CPP/Common/UTFConvert.cpp b/CPP/Common/UTFConvert.cpp
new file mode 100755
index 0000000..d3935b1
--- /dev/null
+++ b/CPP/Common/UTFConvert.cpp
@@ -0,0 +1,145 @@
+// UTFConvert.cpp
+
+#include "StdAfx.h"
+
+#include "UTFConvert.h"
+#include "Types.h"
+
+static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
+{
+ size_t destPos = 0, srcPos = 0;
+ for (;;)
+ {
+ Byte c;
+ int numAdds;
+ if (srcPos == srcLen)
+ {
+ *destLen = destPos;
+ return True;
+ }
+ c = (Byte)src[srcPos++];
+
+ if (c < 0x80)
+ {
+ if (dest)
+ dest[destPos] = (wchar_t)c;
+ destPos++;
+ continue;
+ }
+ if (c < 0xC0)
+ break;
+ for (numAdds = 1; numAdds < 5; numAdds++)
+ if (c < kUtf8Limits[numAdds])
+ break;
+ UInt32 value = (c - kUtf8Limits[numAdds - 1]);
+
+ do
+ {
+ Byte c2;
+ if (srcPos == srcLen)
+ break;
+ c2 = (Byte)src[srcPos++];
+ if (c2 < 0x80 || c2 >= 0xC0)
+ break;
+ value <<= 6;
+ value |= (c2 - 0x80);
+ }
+ while (--numAdds != 0);
+
+ if (value < 0x10000)
+ {
+ if (dest)
+ dest[destPos] = (wchar_t)value;
+ destPos++;
+ }
+ else
+ {
+ value -= 0x10000;
+ if (value >= 0x100000)
+ break;
+ if (dest)
+ {
+ dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
+ dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
+ }
+ destPos += 2;
+ }
+ }
+ *destLen = destPos;
+ return False;
+}
+
+static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
+{
+ size_t destPos = 0, srcPos = 0;
+ for (;;)
+ {
+ unsigned numAdds;
+ UInt32 value;
+ if (srcPos == srcLen)
+ {
+ *destLen = destPos;
+ return True;
+ }
+ value = src[srcPos++];
+ if (value < 0x80)
+ {
+ if (dest)
+ dest[destPos] = (char)value;
+ destPos++;
+ continue;
+ }
+ if (value >= 0xD800 && value < 0xE000)
+ {
+ UInt32 c2;
+ if (value >= 0xDC00 || srcPos == srcLen)
+ break;
+ c2 = src[srcPos++];
+ if (c2 < 0xDC00 || c2 >= 0xE000)
+ break;
+ value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
+ }
+ for (numAdds = 1; numAdds < 5; numAdds++)
+ if (value < (((UInt32)1) << (numAdds * 5 + 6)))
+ break;
+ if (dest)
+ dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
+ destPos++;
+ do
+ {
+ numAdds--;
+ if (dest)
+ dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
+ destPos++;
+ }
+ while (numAdds != 0);
+ }
+ *destLen = destPos;
+ return False;
+}
+
+bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
+{
+ dest.Empty();
+ size_t destLen = 0;
+ Utf8_To_Utf16(NULL, &destLen, src, src.Length());
+ wchar_t *p = dest.GetBuffer((int)destLen);
+ Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
+ p[destLen] = 0;
+ dest.ReleaseBuffer();
+ return res ? true : false;
+}
+
+bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
+{
+ dest.Empty();
+ size_t destLen = 0;
+ Utf16_To_Utf8(NULL, &destLen, src, src.Length());
+ char *p = dest.GetBuffer((int)destLen);
+ Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
+ p[destLen] = 0;
+ dest.ReleaseBuffer();
+ return res ? true : false;
+}