mirror of
https://github.com/deneraraujo/OpenVPNAdapter.git
synced 2026-04-06 00:00:03 +08:00
Merge commit '86cc97e55fe346502462284d2e636a2b3708163e' as 'Sources/OpenVPN3'
This commit is contained in:
301
Sources/OpenVPN3/openvpn/common/unicode.hpp
Normal file
301
Sources/OpenVPN3/openvpn/common/unicode.hpp
Normal file
@@ -0,0 +1,301 @@
|
||||
// OpenVPN -- An application to securely tunnel IP networks
|
||||
// over a single port, with support for SSL/TLS-based
|
||||
// session authentication and key exchange,
|
||||
// packet encryption, packet authentication, and
|
||||
// packet compression.
|
||||
//
|
||||
// Copyright (C) 2012-2017 OpenVPN Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License Version 3
|
||||
// as published by the Free Software Foundation.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program in the COPYING file.
|
||||
// If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// General-purpose function for dealing with unicode.
|
||||
|
||||
#ifndef OPENVPN_COMMON_UNICODE_H
|
||||
#define OPENVPN_COMMON_UNICODE_H
|
||||
|
||||
#include <string>
|
||||
#include <cstring> // for std::memcpy
|
||||
#include <algorithm> // for std::min
|
||||
#include <memory>
|
||||
#include <cctype>
|
||||
|
||||
#include <openvpn/common/size.hpp>
|
||||
#include <openvpn/common/exception.hpp>
|
||||
#include <openvpn/common/unicode-impl.hpp>
|
||||
#include <openvpn/buffer/buffer.hpp>
|
||||
|
||||
namespace openvpn {
|
||||
namespace Unicode {
|
||||
|
||||
OPENVPN_SIMPLE_EXCEPTION(unicode_src_overflow);
|
||||
OPENVPN_SIMPLE_EXCEPTION(unicode_dest_overflow);
|
||||
OPENVPN_SIMPLE_EXCEPTION(unicode_malformed);
|
||||
|
||||
// Return true if the given buffer is a valid UTF-8 string.
|
||||
// Extra constraints:
|
||||
enum {
|
||||
UTF8_NO_CTRL = (1<<30), // no control chars allowed
|
||||
UTF8_NO_SPACE = (1<<31), // no space chars allowed
|
||||
};
|
||||
inline bool is_valid_utf8_uchar_buf(const unsigned char *source,
|
||||
size_t size,
|
||||
const size_t max_len_flags=0) // OR max length (or 0 to disable) with UTF8_x flags above
|
||||
{
|
||||
const size_t max_len = max_len_flags & ((size_t)UTF8_NO_CTRL-1); // NOTE -- use smallest flag value here
|
||||
size_t unicode_len = 0;
|
||||
while (size)
|
||||
{
|
||||
const unsigned char c = *source;
|
||||
if (c == '\0')
|
||||
return false;
|
||||
const int length = trailingBytesForUTF8[c]+1;
|
||||
if ((size_t)length > size)
|
||||
return false;
|
||||
if (!isLegalUTF8(source, length))
|
||||
return false;
|
||||
if (length == 1)
|
||||
{
|
||||
if ((max_len_flags & UTF8_NO_CTRL) && std::iscntrl(c))
|
||||
return false;
|
||||
if ((max_len_flags & UTF8_NO_SPACE) && std::isspace(c))
|
||||
return false;
|
||||
}
|
||||
|
||||
source += length;
|
||||
size -= length;
|
||||
++unicode_len;
|
||||
if (max_len && unicode_len > max_len)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename STRING>
|
||||
inline bool is_valid_utf8(const STRING& str, const size_t max_len_flags=0)
|
||||
{
|
||||
return is_valid_utf8_uchar_buf((const unsigned char *)str.c_str(), str.length(), max_len_flags);
|
||||
}
|
||||
|
||||
// Return the byte position in the string that corresponds with
|
||||
// the given character index. Return values:
|
||||
enum {
|
||||
UTF8_GOOD=0, // succeeded, result in index
|
||||
UTF8_BAD, // failed, string is not legal UTF8
|
||||
UTF8_RANGE, // failed, index is beyond end of string
|
||||
};
|
||||
template <typename STRING>
|
||||
inline int utf8_index(STRING& str, size_t& index)
|
||||
{
|
||||
const size_t size = str.length();
|
||||
size_t upos = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < size)
|
||||
{
|
||||
const int len = trailingBytesForUTF8[(unsigned char)str[pos]]+1;
|
||||
if (pos + len > size || !isLegalUTF8((const unsigned char *)&str[pos], len))
|
||||
return UTF8_BAD;
|
||||
if (upos >= index)
|
||||
{
|
||||
index = pos;
|
||||
return UTF8_GOOD;
|
||||
}
|
||||
pos += len;
|
||||
++upos;
|
||||
}
|
||||
return UTF8_RANGE;
|
||||
}
|
||||
|
||||
// Truncate a UTF8 string if its length exceeds max_len
|
||||
template <typename STRING>
|
||||
inline void utf8_truncate(STRING& str, size_t max_len)
|
||||
{
|
||||
const int status = utf8_index(str, max_len);
|
||||
if (status == UTF8_GOOD || status == UTF8_BAD)
|
||||
str = str.substr(0, max_len);
|
||||
}
|
||||
|
||||
// Return a printable UTF-8 string, where bad UTF-8 chars and
|
||||
// control chars are mapped to '?'.
|
||||
// If max_len_flags > 0, print a maximum of max_len_flags chars.
|
||||
// If UTF8_PASS_FMT flag is set in max_len_flags, pass through \r\n\t
|
||||
enum {
|
||||
UTF8_PASS_FMT=(1<<31),
|
||||
UTF8_FILTER=(1<<30),
|
||||
};
|
||||
template <typename STRING>
|
||||
inline STRING utf8_printable(const STRING& str, size_t max_len_flags)
|
||||
{
|
||||
STRING ret;
|
||||
const size_t size = str.length();
|
||||
const size_t max_len = max_len_flags & ((size_t)UTF8_FILTER-1); // NOTE -- use smallest flag value here
|
||||
size_t upos = 0;
|
||||
size_t pos = 0;
|
||||
ret.reserve(std::min(str.length(), max_len) + 3); // add 3 for "..."
|
||||
while (pos < size)
|
||||
{
|
||||
if (!max_len || upos < max_len)
|
||||
{
|
||||
unsigned char c = str[pos];
|
||||
int len = trailingBytesForUTF8[c]+1;
|
||||
if (pos + len <= size
|
||||
&& c >= 0x20 && c != 0x7F
|
||||
&& isLegalUTF8((const unsigned char *)&str[pos], len))
|
||||
{
|
||||
// non-control, legal UTF-8
|
||||
ret.append(str, pos, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
// control char or bad UTF-8 char
|
||||
if (c == '\r' || c == '\n' || c == '\t')
|
||||
{
|
||||
if (!(max_len_flags & UTF8_PASS_FMT))
|
||||
c = ' ';
|
||||
}
|
||||
else if (max_len_flags & UTF8_FILTER)
|
||||
c = 0;
|
||||
else
|
||||
c = '?';
|
||||
if (c)
|
||||
ret += c;
|
||||
len = 1;
|
||||
}
|
||||
pos += len;
|
||||
++upos;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.append("...");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename STRING>
|
||||
inline size_t utf8_length(const STRING& str)
|
||||
{
|
||||
const size_t size = str.length();
|
||||
size_t upos = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < size)
|
||||
{
|
||||
int len = std::min((int)trailingBytesForUTF8[(unsigned char)str[pos]]+1,
|
||||
(int)size);
|
||||
if (!isLegalUTF8((const unsigned char *)&str[pos], len))
|
||||
len = 1;
|
||||
pos += len;
|
||||
++upos;
|
||||
}
|
||||
return upos;
|
||||
}
|
||||
|
||||
inline void conversion_result_throw(const ConversionResult res)
|
||||
{
|
||||
switch (res)
|
||||
{
|
||||
case conversionOK:
|
||||
return;
|
||||
case sourceExhausted:
|
||||
throw unicode_src_overflow();
|
||||
case targetExhausted:
|
||||
throw unicode_dest_overflow();
|
||||
case sourceIllegal:
|
||||
throw unicode_malformed();
|
||||
}
|
||||
}
|
||||
|
||||
// Convert a UTF-8 string to UTF-16 little endian (no null termination in return)
|
||||
template <typename STRING>
|
||||
inline BufferPtr string_to_utf16(const STRING& str)
|
||||
{
|
||||
std::unique_ptr<UTF16[]> utf16_dest(new UTF16[str.length()]);
|
||||
const UTF8 *src = (UTF8 *)str.c_str();
|
||||
UTF16 *dest = utf16_dest.get();
|
||||
const ConversionResult res = ConvertUTF8toUTF16(&src, src + str.length(),
|
||||
&dest, dest + str.length(),
|
||||
lenientConversion);
|
||||
conversion_result_throw(res);
|
||||
BufferPtr ret(new BufferAllocated((dest - utf16_dest.get()) * 2, BufferAllocated::ARRAY));
|
||||
UTF8 *d = ret->data();
|
||||
for (const UTF16 *s = utf16_dest.get(); s < dest; ++s)
|
||||
{
|
||||
*d++ = *s & 0xFF;
|
||||
*d++ = (*s >> 8) & 0xFF;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
class UTF8Iterator
|
||||
{
|
||||
public:
|
||||
struct Char
|
||||
{
|
||||
unsigned int len;
|
||||
unsigned char data[4];
|
||||
bool valid;
|
||||
|
||||
const bool is_valid() const
|
||||
{
|
||||
return valid && len >= 1 && len <= sizeof(data);
|
||||
}
|
||||
|
||||
std::string str(const char *malformed)
|
||||
{
|
||||
if (is_valid())
|
||||
return std::string((char *)data, len);
|
||||
else
|
||||
return malformed;
|
||||
}
|
||||
};
|
||||
|
||||
UTF8Iterator(const std::string& str_arg)
|
||||
: str((unsigned char *)str_arg.c_str()),
|
||||
size(str_arg.length())
|
||||
{
|
||||
}
|
||||
|
||||
bool get(Char &c)
|
||||
{
|
||||
if (size)
|
||||
{
|
||||
unsigned int len = std::min((unsigned int)trailingBytesForUTF8[*str]+1,
|
||||
(unsigned int)size);
|
||||
if (isLegalUTF8(str, len))
|
||||
{
|
||||
c.valid = true;
|
||||
c.len = std::min(len, (unsigned int)sizeof(c.data));
|
||||
std::memcpy(c.data, str, c.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.valid = false;
|
||||
c.len = 1;
|
||||
}
|
||||
str += c.len;
|
||||
size -= c.len;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const unsigned char *str;
|
||||
unsigned int size;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user