Merge commit '86cc97e55fe346502462284d2e636a2b3708163e' as 'Sources/OpenVPN3'

2026-04-24 00:00:05 +08:00 · 2020-02-24 14:43:11 +03:00
parent 133b3756e6 86cc97e55f
commit 32f1555929
655 changed files with 146468 additions and 0 deletions
@@ -0,0 +1,301 @@
+//    OpenVPN -- An application to securely tunnel IP networks
+//               over a single port, with support for SSL/TLS-based
+//               session authentication and key exchange,
+//               packet encryption, packet authentication, and
+//               packet compression.
+//
+//    Copyright (C) 2012-2017 OpenVPN Inc.
+//
+//    This program is free software: you can redistribute it and/or modify
+//    it under the terms of the GNU Affero General Public License Version 3
+//    as published by the Free Software Foundation.
+//
+//    This program is distributed in the hope that it will be useful,
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//    GNU Affero General Public License for more details.
+//
+//    You should have received a copy of the GNU Affero General Public License
+//    along with this program in the COPYING file.
+//    If not, see <http://www.gnu.org/licenses/>.
+
+// General-purpose function for dealing with unicode.
+
+#ifndef OPENVPN_COMMON_UNICODE_H
+#define OPENVPN_COMMON_UNICODE_H
+
+#include <string>
+#include <cstring>           // for std::memcpy
+#include <algorithm>         // for std::min
+#include <memory>
+#include <cctype>
+
+#include <openvpn/common/size.hpp>
+#include <openvpn/common/exception.hpp>
+#include <openvpn/common/unicode-impl.hpp>
+#include <openvpn/buffer/buffer.hpp>
+
+namespace openvpn {
+  namespace Unicode {
+
+    OPENVPN_SIMPLE_EXCEPTION(unicode_src_overflow);
+    OPENVPN_SIMPLE_EXCEPTION(unicode_dest_overflow);
+    OPENVPN_SIMPLE_EXCEPTION(unicode_malformed);
+
+    // Return true if the given buffer is a valid UTF-8 string.
+    // Extra constraints:
+    enum {
+      UTF8_NO_CTRL  = (1<<30), // no control chars allowed
+      UTF8_NO_SPACE = (1<<31), // no space chars allowed
+    };
+    inline bool is_valid_utf8_uchar_buf(const unsigned char *source,
+					size_t size,
+					const size_t max_len_flags=0) // OR max length (or 0 to disable) with UTF8_x flags above
+    {
+      const size_t max_len = max_len_flags & ((size_t)UTF8_NO_CTRL-1); // NOTE -- use smallest flag value here
+      size_t unicode_len = 0;
+      while (size)
+	{
+	  const unsigned char c = *source;
+	  if (c == '\0')
+	    return false;
+	  const int length = trailingBytesForUTF8[c]+1;
+	  if ((size_t)length > size)
+	    return false;
+	  if (!isLegalUTF8(source, length))
+	    return false;
+	  if (length == 1)
+	    {
+	      if ((max_len_flags & UTF8_NO_CTRL) && std::iscntrl(c))
+		return false;
+	      if ((max_len_flags & UTF8_NO_SPACE) && std::isspace(c))
+		return false;
+	    }
+
+	  source += length;
+	  size -= length;
+	  ++unicode_len;
+	  if (max_len && unicode_len > max_len)
+	    return false;
+	}
+      return true;
+    }
+
+    template <typename STRING>
+    inline bool is_valid_utf8(const STRING& str, const size_t max_len_flags=0)
+    {
+      return is_valid_utf8_uchar_buf((const unsigned char *)str.c_str(), str.length(), max_len_flags);
+    }
+
+    // Return the byte position in the string that corresponds with
+    // the given character index.  Return values:
+    enum {
+      UTF8_GOOD=0, // succeeded, result in index
+      UTF8_BAD,    // failed, string is not legal UTF8
+      UTF8_RANGE,  // failed, index is beyond end of string
+    };
+    template <typename STRING>
+    inline int utf8_index(STRING& str, size_t& index)
+    {
+      const size_t size = str.length();
+      size_t upos = 0;
+      size_t pos = 0;
+      while (pos < size)
+	{
+	  const int len = trailingBytesForUTF8[(unsigned char)str[pos]]+1;
+	  if (pos + len > size || !isLegalUTF8((const unsigned char *)&str[pos], len))
+	    return UTF8_BAD;
+	  if (upos >= index)
+	    {
+	      index = pos;
+	      return UTF8_GOOD;
+	    }
+	  pos += len;
+	  ++upos;
+	}
+      return UTF8_RANGE;
+    }
+
+    // Truncate a UTF8 string if its length exceeds max_len
+    template <typename STRING>
+    inline void utf8_truncate(STRING& str, size_t max_len)
+    {
+      const int status = utf8_index(str, max_len);
+      if (status == UTF8_GOOD || status == UTF8_BAD)
+	str = str.substr(0, max_len);
+    }
+
+    // Return a printable UTF-8 string, where bad UTF-8 chars and
+    // control chars are mapped to '?'.
+    // If max_len_flags > 0, print a maximum of max_len_flags chars.
+    // If UTF8_PASS_FMT flag is set in max_len_flags, pass through \r\n\t
+    enum {
+      UTF8_PASS_FMT=(1<<31),
+      UTF8_FILTER=(1<<30),
+    };
+    template <typename STRING>
+    inline STRING utf8_printable(const STRING& str, size_t max_len_flags)
+    {
+      STRING ret;
+      const size_t size = str.length();
+      const size_t max_len = max_len_flags & ((size_t)UTF8_FILTER-1); // NOTE -- use smallest flag value here
+      size_t upos = 0;
+      size_t pos = 0;
+      ret.reserve(std::min(str.length(), max_len) + 3); // add 3 for "..."
+      while (pos < size)
+	{
+	  if (!max_len || upos < max_len)
+	    {
+	      unsigned char c = str[pos];
+	      int len = trailingBytesForUTF8[c]+1;
+	      if (pos + len <= size
+		  && c >= 0x20 && c != 0x7F
+		  && isLegalUTF8((const unsigned char *)&str[pos], len))
+		{
+		  // non-control, legal UTF-8
+		  ret.append(str, pos, len);
+		}
+	      else
+		{
+		  // control char or bad UTF-8 char
+		  if (c == '\r' || c == '\n' || c == '\t')
+		    {
+		      if (!(max_len_flags & UTF8_PASS_FMT))
+			c = ' ';
+		    }
+		  else if (max_len_flags & UTF8_FILTER)
+		    c = 0;
+		  else
+		    c = '?';
+		  if (c)
+		    ret += c;
+		  len = 1;
+		}
+	      pos += len;
+	      ++upos;
+	    }
+	  else
+	    {
+	      ret.append("...");
+	      break;
+	    }
+	}
+      return ret;
+    }
+
+    template <typename STRING>
+    inline size_t utf8_length(const STRING& str)
+    {
+      const size_t size = str.length();
+      size_t upos = 0;
+      size_t pos = 0;
+      while (pos < size)
+	{
+	  int len = std::min((int)trailingBytesForUTF8[(unsigned char)str[pos]]+1,
+			     (int)size);
+	  if (!isLegalUTF8((const unsigned char *)&str[pos], len))
+	    len = 1;
+	  pos += len;
+	  ++upos;
+	}
+      return upos;
+    }
+
+    inline void conversion_result_throw(const ConversionResult res)
+    {
+      switch (res)
+	{
+	case conversionOK:
+	  return;
+	case sourceExhausted:
+	  throw unicode_src_overflow();
+	case targetExhausted:
+	  throw unicode_dest_overflow();
+	case sourceIllegal:
+	  throw unicode_malformed();
+	}
+    }
+
+    // Convert a UTF-8 string to UTF-16 little endian (no null termination in return)
+    template <typename STRING>
+    inline BufferPtr string_to_utf16(const STRING& str)
+    {
+      std::unique_ptr<UTF16[]> utf16_dest(new UTF16[str.length()]);
+      const UTF8 *src = (UTF8 *)str.c_str();
+      UTF16 *dest = utf16_dest.get();
+      const ConversionResult res = ConvertUTF8toUTF16(&src, src + str.length(),
+						      &dest, dest + str.length(),
+						      lenientConversion);
+      conversion_result_throw(res);
+      BufferPtr ret(new BufferAllocated((dest - utf16_dest.get()) * 2, BufferAllocated::ARRAY));
+      UTF8 *d = ret->data();
+      for (const UTF16 *s = utf16_dest.get(); s < dest; ++s)
+	{
+	  *d++ = *s & 0xFF;
+	  *d++ = (*s >> 8) & 0xFF;
+	}
+      return ret;
+    }
+
+    class UTF8Iterator
+    {
+    public:
+      struct Char
+      {
+	unsigned int len;
+	unsigned char data[4];
+	bool valid;
+
+	const bool is_valid() const
+	{
+	  return valid && len >= 1 && len <= sizeof(data);
+	}
+
+	std::string str(const char *malformed)
+	{
+	  if (is_valid())
+	    return std::string((char *)data, len);
+	  else
+	    return malformed;
+	}
+      };
+
+      UTF8Iterator(const std::string& str_arg)
+	: str((unsigned char *)str_arg.c_str()),
+	  size(str_arg.length())
+      {
+      }
+
+      bool get(Char &c)
+      {
+	if (size)
+	  {
+	    unsigned int len = std::min((unsigned int)trailingBytesForUTF8[*str]+1,
+					(unsigned int)size);
+	    if (isLegalUTF8(str, len))
+	      {
+		c.valid = true;
+		c.len = std::min(len, (unsigned int)sizeof(c.data));
+		std::memcpy(c.data, str, c.len);
+	      }
+	    else
+	      {
+		c.valid = false;
+		c.len = 1;
+	      }
+	    str += c.len;
+	    size -= c.len;
+	    return true;
+	  }
+	else
+	  return false;
+      }
+
+    private:
+      const unsigned char *str;
+      unsigned int size;
+    };
+  }
+}
+
+#endif