mirror of
https://github.com/OpenVPN/openvpn3.git
synced 2024-09-20 12:12:15 +02:00
7728ab5bc3
attributes such as length and presence of control and/or space chars.
302 lines
8.0 KiB
C++
302 lines
8.0 KiB
C++
// OpenVPN -- An application to securely tunnel IP networks
|
|
// over a single port, with support for SSL/TLS-based
|
|
// session authentication and key exchange,
|
|
// packet encryption, packet authentication, and
|
|
// packet compression.
|
|
//
|
|
// Copyright (C) 2012-2015 OpenVPN Technologies, Inc.
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License Version 3
|
|
// as published by the Free Software Foundation.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program in the COPYING file.
|
|
// If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
// General-purpose function for dealing with unicode.
|
|
|
|
#ifndef OPENVPN_COMMON_UNICODE_H
|
|
#define OPENVPN_COMMON_UNICODE_H
|
|
|
|
#include <string>
|
|
#include <cstring> // for std::memcpy
|
|
#include <algorithm> // for std::min
|
|
#include <memory>
|
|
#include <cctype>
|
|
|
|
#include <openvpn/common/size.hpp>
|
|
#include <openvpn/common/exception.hpp>
|
|
#include <openvpn/common/unicode-impl.hpp>
|
|
#include <openvpn/buffer/buffer.hpp>
|
|
|
|
namespace openvpn {
|
|
namespace Unicode {
|
|
|
|
OPENVPN_SIMPLE_EXCEPTION(unicode_src_overflow);
|
|
OPENVPN_SIMPLE_EXCEPTION(unicode_dest_overflow);
|
|
OPENVPN_SIMPLE_EXCEPTION(unicode_malformed);
|
|
|
|
// Return true if the given buffer is a valid UTF-8 string.
|
|
// Extra constraints:
|
|
enum {
|
|
UTF8_NO_CTRL = (1<<30), // no control chars allowed
|
|
UTF8_NO_SPACE = (1<<31), // no space chars allowed
|
|
};
|
|
inline bool is_valid_utf8_uchar_buf(const unsigned char *source,
|
|
size_t size,
|
|
const size_t max_len_flags=0) // OR max length (or 0 to disable) with UTF8_x flags above
|
|
{
|
|
const size_t max_len = max_len_flags & ((size_t)UTF8_NO_CTRL-1); // NOTE -- use smallest flag value here
|
|
size_t unicode_len = 0;
|
|
while (size)
|
|
{
|
|
const unsigned char c = *source;
|
|
if (c == '\0')
|
|
return false;
|
|
const int length = trailingBytesForUTF8[c]+1;
|
|
if ((size_t)length > size)
|
|
return false;
|
|
if (!isLegalUTF8(source, length))
|
|
return false;
|
|
if (length == 1)
|
|
{
|
|
if ((max_len_flags & UTF8_NO_CTRL) && std::iscntrl(c))
|
|
return false;
|
|
if ((max_len_flags & UTF8_NO_SPACE) && std::isspace(c))
|
|
return false;
|
|
}
|
|
|
|
source += length;
|
|
size -= length;
|
|
++unicode_len;
|
|
if (max_len && unicode_len > max_len)
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename STRING>
|
|
inline bool is_valid_utf8(const STRING& str, const size_t max_len_flags=0)
|
|
{
|
|
return is_valid_utf8_uchar_buf((const unsigned char *)str.c_str(), str.length(), max_len_flags);
|
|
}
|
|
|
|
// Return the byte position in the string that corresponds with
|
|
// the given character index. Return values:
|
|
enum {
|
|
UTF8_GOOD=0, // succeeded, result in index
|
|
UTF8_BAD, // failed, string is not legal UTF8
|
|
UTF8_RANGE, // failed, index is beyond end of string
|
|
};
|
|
template <typename STRING>
|
|
inline int utf8_index(STRING& str, size_t& index)
|
|
{
|
|
const size_t size = str.length();
|
|
size_t upos = 0;
|
|
size_t pos = 0;
|
|
while (pos < size)
|
|
{
|
|
const int len = trailingBytesForUTF8[(unsigned char)str[pos]]+1;
|
|
if (pos + len > size || !isLegalUTF8((const unsigned char *)&str[pos], len))
|
|
return UTF8_BAD;
|
|
if (upos >= index)
|
|
{
|
|
index = pos;
|
|
return UTF8_GOOD;
|
|
}
|
|
pos += len;
|
|
++upos;
|
|
}
|
|
return UTF8_RANGE;
|
|
}
|
|
|
|
// Truncate a UTF8 string if its length exceeds max_len
|
|
template <typename STRING>
|
|
inline void utf8_truncate(STRING& str, size_t max_len)
|
|
{
|
|
const int status = utf8_index(str, max_len);
|
|
if (status == UTF8_GOOD || status == UTF8_BAD)
|
|
str = str.substr(0, max_len);
|
|
}
|
|
|
|
// Return a printable UTF-8 string, where bad UTF-8 chars and
|
|
// control chars are mapped to '?'.
|
|
// If max_len_flags > 0, print a maximum of max_len_flags chars.
|
|
// If UTF8_PASS_FMT flag is set in max_len_flags, pass through \r\n\t
|
|
enum {
|
|
UTF8_PASS_FMT=(1<<31),
|
|
UTF8_FILTER=(1<<30),
|
|
};
|
|
template <typename STRING>
|
|
inline STRING utf8_printable(const STRING& str, size_t max_len_flags)
|
|
{
|
|
STRING ret;
|
|
const size_t size = str.length();
|
|
const size_t max_len = max_len_flags & ((size_t)UTF8_FILTER-1); // NOTE -- use smallest flag value here
|
|
size_t upos = 0;
|
|
size_t pos = 0;
|
|
ret.reserve(std::min(str.length(), max_len) + 3); // add 3 for "..."
|
|
while (pos < size)
|
|
{
|
|
if (!max_len || upos < max_len)
|
|
{
|
|
unsigned char c = str[pos];
|
|
int len = trailingBytesForUTF8[c]+1;
|
|
if (pos + len <= size
|
|
&& c >= 0x20 && c != 0x7F
|
|
&& isLegalUTF8((const unsigned char *)&str[pos], len))
|
|
{
|
|
// non-control, legal UTF-8
|
|
ret.append(str, pos, len);
|
|
}
|
|
else
|
|
{
|
|
// control char or bad UTF-8 char
|
|
if (c == '\r' || c == '\n' || c == '\t')
|
|
{
|
|
if (!(max_len_flags & UTF8_PASS_FMT))
|
|
c = ' ';
|
|
}
|
|
else if (max_len_flags & UTF8_FILTER)
|
|
c = 0;
|
|
else
|
|
c = '?';
|
|
if (c)
|
|
ret += c;
|
|
len = 1;
|
|
}
|
|
pos += len;
|
|
++upos;
|
|
}
|
|
else
|
|
{
|
|
ret.append("...");
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
template <typename STRING>
|
|
inline size_t utf8_length(const STRING& str)
|
|
{
|
|
const size_t size = str.length();
|
|
size_t upos = 0;
|
|
size_t pos = 0;
|
|
while (pos < size)
|
|
{
|
|
int len = std::min((int)trailingBytesForUTF8[(unsigned char)str[pos]]+1,
|
|
(int)size);
|
|
if (!isLegalUTF8((const unsigned char *)&str[pos], len))
|
|
len = 1;
|
|
pos += len;
|
|
++upos;
|
|
}
|
|
return upos;
|
|
}
|
|
|
|
inline void conversion_result_throw(const ConversionResult res)
|
|
{
|
|
switch (res)
|
|
{
|
|
case conversionOK:
|
|
return;
|
|
case sourceExhausted:
|
|
throw unicode_src_overflow();
|
|
case targetExhausted:
|
|
throw unicode_dest_overflow();
|
|
case sourceIllegal:
|
|
throw unicode_malformed();
|
|
}
|
|
}
|
|
|
|
// Convert a UTF-8 string to UTF-16 little endian (no null termination in return)
|
|
template <typename STRING>
|
|
inline BufferPtr string_to_utf16(const STRING& str)
|
|
{
|
|
std::unique_ptr<UTF16[]> utf16_dest(new UTF16[str.length()]);
|
|
const UTF8 *src = (UTF8 *)str.c_str();
|
|
UTF16 *dest = utf16_dest.get();
|
|
const ConversionResult res = ConvertUTF8toUTF16(&src, src + str.length(),
|
|
&dest, dest + str.length(),
|
|
lenientConversion);
|
|
conversion_result_throw(res);
|
|
BufferPtr ret(new BufferAllocated((dest - utf16_dest.get()) * 2, BufferAllocated::ARRAY));
|
|
UTF8 *d = ret->data();
|
|
for (const UTF16 *s = utf16_dest.get(); s < dest; ++s)
|
|
{
|
|
*d++ = *s & 0xFF;
|
|
*d++ = (*s >> 8) & 0xFF;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
class UTF8Iterator
|
|
{
|
|
public:
|
|
struct Char
|
|
{
|
|
unsigned int len;
|
|
unsigned char data[4];
|
|
bool valid;
|
|
|
|
const bool is_valid() const
|
|
{
|
|
return valid && len >= 1 && len <= sizeof(data);
|
|
}
|
|
|
|
std::string str(const char *malformed)
|
|
{
|
|
if (is_valid())
|
|
return std::string((char *)data, len);
|
|
else
|
|
return malformed;
|
|
}
|
|
};
|
|
|
|
UTF8Iterator(const std::string& str_arg)
|
|
: str((unsigned char *)str_arg.c_str()),
|
|
size(str_arg.length())
|
|
{
|
|
}
|
|
|
|
bool get(Char &c)
|
|
{
|
|
if (size)
|
|
{
|
|
unsigned int len = std::min((unsigned int)trailingBytesForUTF8[*str]+1,
|
|
(unsigned int)size);
|
|
if (isLegalUTF8(str, len))
|
|
{
|
|
c.valid = true;
|
|
c.len = std::min(len, (unsigned int)sizeof(c.data));
|
|
std::memcpy(c.data, str, c.len);
|
|
}
|
|
else
|
|
{
|
|
c.valid = false;
|
|
c.len = 1;
|
|
}
|
|
str += c.len;
|
|
size -= c.len;
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
const unsigned char *str;
|
|
unsigned int size;
|
|
};
|
|
}
|
|
}
|
|
|
|
#endif
|