0
0
mirror of https://github.com/florisboard/florisboard.git synced 2024-09-19 19:42:20 +02:00

Move icu4c to separate repository

Preparation step to build ICU separately for F-Droid builds. Also the
main repo's stats are now not polluted again from the sheer massive
size of ICU.
This commit is contained in:
Patrick Goldinger 2021-08-16 20:20:53 +02:00 committed by Patrick Goldinger
parent 5bede68a82
commit 06c585885e
47 changed files with 10 additions and 26863 deletions

6
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "app/src/main/cpp/icu4c/android"]
path = app/src/main/cpp/icu4c/android
url = https://github.com/patrickgold/icu4c-android
[submodule "app/src/main/icu4c"]
path = app/src/main/icu4c
url = https://github.com/florisboard/icu4c

View File

@ -54,8 +54,11 @@ android {
sourceSets {
maybeCreate("main").apply {
assets {
srcDirs("src/main/assets", "src/main/icu4c/prebuilt/assets")
}
jniLibs {
srcDirs("src/main/jniLibs")
srcDirs("src/main/icu4c/prebuilt/jniLibs")
}
}
}

Binary file not shown.

View File

@ -10,8 +10,8 @@ set(CMAKE_CXX_STANDARD 17)
include_directories(.)
### ICU4C ###
include_directories(icu4c/include)
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../jniLibs/${ANDROID_ABI})
include_directories(../icu4c/prebuilt/include)
set(JNI_LIBS ${CMAKE_SOURCE_DIR}/../icu4c/prebuilt/jniLibs/${ANDROID_ABI})
add_library(ICU::data STATIC IMPORTED)
set_property(TARGET ICU::data PROPERTY IMPORTED_LOCATION "${JNI_LIBS}/libicudata.a")
add_library(ICU::uc STATIC IMPORTED)

@ -1 +0,0 @@
Subproject commit ff5c585a438ba3b94199f95fc0eb0d569178a1c3

View File

@ -1,67 +0,0 @@
{
"strategy": "subtractive",
"featureFilters": {
"coll_ucadata": {
"filterType": "exclude"
},
"coll_tree": {
"filterType": "exclude"
},
"confusables": {
"filterType": "exclude"
},
"curr_supplemental": {
"filterType": "exclude"
},
"curr_tree": {
"filterType": "exclude"
},
"lang_tree": {
"filterType": "exclude"
},
"locales_tree": {
"excludelist": [
"en_US_POSIX"
]
},
"misc": {
"excludelist": [
"currencyNumericCodes",
"dayPeriods",
"genderList",
"metaZones",
"numberingSystems",
"pluralRanges",
"plurals",
"supplementalData",
"timezoneTypes",
"windowsZones",
"zoneinfo64"
]
},
"region_tree": {
"filterType": "exclude"
},
"rbnf_tree": {
"filterType": "exclude"
},
"stringprep": {
"filterType": "exclude"
},
"translit": {
"filterType": "exclude"
},
"unames": {
"filterType": "exclude"
},
"unit_tree": {
"filterType": "exclude"
},
"zone_supplemental": {
"filterType": "exclude"
},
"zone_tree": {
"filterType": "exclude"
}
}
}

View File

@ -1,118 +0,0 @@
#!/bin/bash
# Copyright 2021 Patrick Goldinger
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build script for ICU4C, tailored exactly for FlorisBoard's needs.
# Before executing this script to manually rebuild the ICU libraries, make sure to execute
# git submodule update --init --recursive
# else the script won't find the ICU source files!
###### Build ICU4C ######
./android/cc-icu4c.sh build \
--arch=arm,arm64 \
--api=23 \
--library-type=static \
--build-dir=./build \
--icu-src-dir=./android/icu/icu4c \
--install-include-dir=./include \
--install-libs-dir=./../../jniLibs \
--install-data-dir=./../../assets/icu \
--data-filter-file=./data-feature-filter.json \
--data-packaging=archive \
--enable-collation=no \
--enable-formatting=no \
--enable-legacy-converters=yes \
--enable-regex=no \
--enable-transliteration=no
# shellcheck disable=SC2181
if ! [ $? == 0 ]; then
exit
fi
###### Clean up unused header files in include/unicode header dir ######
readonly SEP=":"
readonly NUSPELL_DIR=$(realpath ../nuspell)
readonly JNI_BRIDGE_DIR=$(realpath ..)
readonly UNICODE_DIR=$(realpath include/unicode)
scan_file() {
file=$1
local -n var=$2
#echo "Scanning '$file'..."
while IFS= read -r line; do
case $line in
*"#include <unicode/"*)
# shellcheck disable=SC2001
header=$(sed -e 's/.*<unicode\/\(.*\)>.*/\1/' <<< "$line")
;;
*"#include \"unicode/"*)
# shellcheck disable=SC2001
header=$(sed -e 's/.*\"unicode\/\(.*\)\".*/\1/' <<< "$line")
;;
*)
header=""
;;
esac
if [ -z "$header" ]; then
continue
fi
# shellcheck disable=SC2091
# shellcheck disable=SC2086
if [[ ! "$var" == *"$header"* ]]; then
# shellcheck disable=SC2140
var+="$SEP$header"
fi
done < "$file"
}
req_headers=""
for nsrcfile in "$NUSPELL_DIR"/*; do
scan_file "$nsrcfile" "req_headers"
done
for nsrcfile in "$JNI_BRIDGE_DIR"/*; do
scan_file "$nsrcfile" "req_headers"
done
if [ -n "$req_headers" ]; then
req_headers=${req_headers:1}
fi
while true; do
old_req_headers=$req_headers
IFS="$SEP" read -ra req_header_splitted <<< "$req_headers"
for req_header in "${req_header_splitted[@]}"; do
scan_file "$UNICODE_DIR/$req_header" "req_headers"
done
[ ! $req_headers = $old_req_headers ] || break
done
#echo "$req_headers"
for headerfile in "$UNICODE_DIR"/*; do
header=$(basename "$headerfile")
if [[ "$req_headers" == *"$header"* ]]; then
echo "KEEP '$headerfile'"
else
echo "DELETE '$headerfile'"
rm "$headerfile"
fi
done

View File

@ -1,307 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2012, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
//
// Abstract interface that consumes a sequence of bytes (ByteSink).
//
// Used so that we can write a single piece of code that can operate
// on a variety of output string types.
//
// Various implementations of this interface are provided:
// ByteSink:
// CheckedArrayByteSink Write to a flat array, with bounds checking
// StringByteSink Write to an STL string
// This code is a contribution of Google code, and the style used here is
// a compromise between the original Google code and the ICU coding guidelines.
// For example, data types are ICU-ified (size_t,int->int32_t),
// and API comments doxygen-ified, but function names and behavior are
// as in the original, if possible.
// Assertion-style error handling, not available in ICU, was changed to
// parameter "pinning" similar to UnicodeString.
//
// In addition, this is only a partial port of the original Google code,
// limited to what was needed so far. The (nearly) complete original code
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
// (see ICU ticket 6765, r25517).
#ifndef __BYTESTREAM_H__
#define __BYTESTREAM_H__
/**
* \file
* \brief C++ API: Interface for writing bytes, and implementation classes.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#include "unicode/std_string.h"
U_NAMESPACE_BEGIN
/**
* A ByteSink can be filled with bytes.
* @stable ICU 4.2
*/
class U_COMMON_API ByteSink : public UMemory {
public:
/**
* Default constructor.
* @stable ICU 4.2
*/
ByteSink() { }
/**
* Virtual destructor.
* @stable ICU 4.2
*/
virtual ~ByteSink();
/**
* Append "bytes[0,n-1]" to this.
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* bytes, int32_t n) = 0;
/**
* Appends n bytes to this. Same as Append().
* Call AppendU8() with u8"string literals" which are const char * in C++11
* but const char8_t * in C++20.
* If the compiler does support char8_t as a distinct type,
* then an AppendU8() overload for that is defined and will be chosen.
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 67
*/
inline void AppendU8(const char* bytes, int32_t n) {
Append(bytes, n);
}
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
* Call AppendU8() with u8"string literals" which are const char * in C++11
* but const char8_t * in C++20.
* If the compiler does support char8_t as a distinct type,
* then this AppendU8() overload for that is defined and will be chosen.
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 67
*/
inline void AppendU8(const char8_t* bytes, int32_t n) {
Append(reinterpret_cast<const char*>(bytes), n);
}
#endif
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* *result_capacity. Guarantees *result_capacity>=min_capacity.
* May return a pointer to the caller-owned scratch buffer which must have
* scratch_capacity>=min_capacity.
* The returned buffer is only valid until the next operation
* on this ByteSink.
*
* After writing at most *result_capacity bytes, call Append() with the
* pointer returned from this function and the number of bytes written.
* Many Append() implementations will avoid copying bytes if this function
* returned an internal buffer.
*
* Partial usage example:
* int32_t capacity;
* char* buffer = sink->GetAppendBuffer(..., &capacity);
* ... Write n bytes into buffer, with n <= capacity.
* sink->Append(buffer, n);
* In many implementations, that call to Append will avoid copying bytes.
*
* If the ByteSink allocates or reallocates an internal buffer, it should use
* the desired_capacity_hint if appropriate.
* If a caller cannot provide a reasonable guess at the desired capacity,
* it should pass desired_capacity_hint=0.
*
* If a non-scratch buffer is returned, the caller may only pass
* a prefix to it to Append().
* That is, it is not correct to pass an interior pointer to Append().
*
* The default implementation always returns the scratch buffer.
*
* @param min_capacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desired_capacity_hint desired capacity of the returned buffer;
* must be non-negative
* @param scratch default caller-owned buffer
* @param scratch_capacity capacity of the scratch buffer
* @param result_capacity pointer to an integer which will be set to the
* capacity of the returned buffer
* @return a buffer with *result_capacity>=min_capacity
* @stable ICU 4.2
*/
virtual char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity);
/**
* Flush internal buffers.
* Some byte sinks use internal buffers or provide buffering
* and require calling Flush() at the end of the stream.
* The ByteSink should be ready for further Append() calls after Flush().
* The default implementation of Flush() does nothing.
* @stable ICU 4.2
*/
virtual void Flush();
private:
ByteSink(const ByteSink &) = delete;
ByteSink &operator=(const ByteSink &) = delete;
};
// -------------------------------------------------------------
// Some standard implementations
/**
* Implementation of ByteSink that writes to a flat byte array,
* with bounds-checking:
* This sink will not write more than capacity bytes to outbuf.
* If more than capacity bytes are Append()ed, then excess bytes are ignored,
* and Overflowed() will return true.
* Overflow does not cause a runtime error.
* @stable ICU 4.2
*/
class U_COMMON_API CheckedArrayByteSink : public ByteSink {
public:
/**
* Constructs a ByteSink that will write to outbuf[0..capacity-1].
* @param outbuf buffer to write to
* @param capacity size of the buffer
* @stable ICU 4.2
*/
CheckedArrayByteSink(char* outbuf, int32_t capacity);
/**
* Destructor.
* @stable ICU 4.2
*/
virtual ~CheckedArrayByteSink();
/**
* Returns the sink to its original state, without modifying the buffer.
* Useful for reusing both the buffer and the sink for multiple streams.
* Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
* and Overflowed()=false.
* @return *this
* @stable ICU 4.6
*/
virtual CheckedArrayByteSink& Reset();
/**
* Append "bytes[0,n-1]" to this.
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* bytes, int32_t n);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* *result_capacity. For details see the base class documentation.
* @param min_capacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desired_capacity_hint desired capacity of the returned buffer;
* must be non-negative
* @param scratch default caller-owned buffer
* @param scratch_capacity capacity of the scratch buffer
* @param result_capacity pointer to an integer which will be set to the
* capacity of the returned buffer
* @return a buffer with *result_capacity>=min_capacity
* @stable ICU 4.2
*/
virtual char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity);
/**
* Returns the number of bytes actually written to the sink.
* @return number of bytes written to the buffer
* @stable ICU 4.2
*/
int32_t NumberOfBytesWritten() const { return size_; }
/**
* Returns true if any bytes were discarded, i.e., if there was an
* attempt to write more than 'capacity' bytes.
* @return true if more than 'capacity' bytes were Append()ed
* @stable ICU 4.2
*/
UBool Overflowed() const { return overflowed_; }
/**
* Returns the number of bytes appended to the sink.
* If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
* else they return the same number.
* @return number of bytes written to the buffer
* @stable ICU 4.6
*/
int32_t NumberOfBytesAppended() const { return appended_; }
private:
char* outbuf_;
const int32_t capacity_;
int32_t size_;
int32_t appended_;
UBool overflowed_;
CheckedArrayByteSink() = delete;
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};
/**
* Implementation of ByteSink that writes to a "string".
* The StringClass is usually instantiated with a std::string.
* @stable ICU 4.2
*/
template<typename StringClass>
class StringByteSink : public ByteSink {
public:
/**
* Constructs a ByteSink that will append bytes to the dest string.
* @param dest pointer to string object to append to
* @stable ICU 4.2
*/
StringByteSink(StringClass* dest) : dest_(dest) { }
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @stable ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
/**
* Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @stable ICU 4.2
*/
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private:
StringClass* dest_;
StringByteSink() = delete;
StringByteSink(const StringByteSink &) = delete;
StringByteSink &operator=(const StringByteSink &) = delete;
};
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __BYTESTREAM_H__

View File

@ -1,313 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// char16ptr.h
// created: 2017feb28 Markus W. Scherer
#ifndef __CHAR16PTR_H__
#define __CHAR16PTR_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
/**
* \file
* \brief C++ API: char16_t pointer wrappers with
* implicit conversion from bit-compatible raw pointer types.
* Also conversion functions from char16_t * to UChar * and OldUChar *.
*/
U_NAMESPACE_BEGIN
/**
* \def U_ALIASING_BARRIER
* Barrier for pointer anti-aliasing optimizations even across function boundaries.
* @internal
*/
#ifdef U_ALIASING_BARRIER
// Use the predefined value.
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
#elif defined(U_IN_DOXYGEN)
# define U_ALIASING_BARRIER(ptr)
#endif
/**
* char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59
*/
class U_COMMON_API Char16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @stable ICU 59
*/
inline Char16Ptr(char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @stable ICU 59
*/
inline Char16Ptr(uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @stable ICU 59
*/
inline Char16Ptr(wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @stable ICU 59
*/
inline Char16Ptr(std::nullptr_t p);
/**
* Destructor.
* @stable ICU 59
*/
inline ~Char16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @stable ICU 59
*/
inline char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @stable ICU 59
*/
inline operator char16_t *() const { return get(); }
private:
Char16Ptr() = delete;
#ifdef U_ALIASING_BARRIER
template<typename T> static char16_t *cast(T *t) {
U_ALIASING_BARRIER(t);
return reinterpret_cast<char16_t *>(t);
}
char16_t *p_;
#else
union {
char16_t *cp;
uint16_t *up;
wchar_t *wp;
} u_;
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p_);
}
char16_t *Char16Ptr::get() const { return p_; }
#else
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
* @stable ICU 59
*/
class U_COMMON_API ConstChar16Ptr U_FINAL {
public:
/**
* Copies the pointer.
* @param p pointer
* @stable ICU 59
*/
inline ConstChar16Ptr(const char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @stable ICU 59
*/
inline ConstChar16Ptr(const uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* @param p pointer to be converted
* @stable ICU 59
*/
inline ConstChar16Ptr(const wchar_t *p);
#endif
/**
* nullptr constructor.
* @param p nullptr
* @stable ICU 59
*/
inline ConstChar16Ptr(const std::nullptr_t p);
/**
* Destructor.
* @stable ICU 59
*/
inline ~ConstChar16Ptr();
/**
* Pointer access.
* @return the wrapped pointer
* @stable ICU 59
*/
inline const char16_t *get() const;
/**
* char16_t pointer access via type conversion (e.g., static_cast).
* @return the wrapped pointer
* @stable ICU 59
*/
inline operator const char16_t *() const { return get(); }
private:
ConstChar16Ptr() = delete;
#ifdef U_ALIASING_BARRIER
template<typename T> static const char16_t *cast(const T *t) {
U_ALIASING_BARRIER(t);
return reinterpret_cast<const char16_t *>(t);
}
const char16_t *p_;
#else
union {
const char16_t *cp;
const uint16_t *up;
const wchar_t *wp;
} u_;
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p_);
}
const char16_t *ConstChar16Ptr::get() const { return p_; }
#else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* Converts from const char16_t * to const UChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const UChar *
* @stable ICU 59
*/
inline const UChar *toUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const UChar *>(p);
}
/**
* Converts from char16_t * to UChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as UChar *
* @stable ICU 59
*/
inline UChar *toUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<UChar *>(p);
}
/**
* Converts from const char16_t * to const OldUChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as const OldUChar *
* @stable ICU 59
*/
inline const OldUChar *toOldUCharPtr(const char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<const OldUChar *>(p);
}
/**
* Converts from char16_t * to OldUChar *.
* Includes an aliasing barrier if available.
* @param p pointer
* @return p as OldUChar *
* @stable ICU 59
*/
inline OldUChar *toOldUCharPtr(char16_t *p) {
#ifdef U_ALIASING_BARRIER
U_ALIASING_BARRIER(p);
#endif
return reinterpret_cast<OldUChar *>(p);
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __CHAR16PTR_H__

View File

@ -1,595 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2009-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: localpointer.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009nov13
* created by: Markus W. Scherer
*/
#ifndef __LOCALPOINTER_H__
#define __LOCALPOINTER_H__
/**
* \file
* \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
*
* These classes are inspired by
* - std::auto_ptr
* - boost::scoped_ptr & boost::scoped_array
* - Taligent Safe Pointers (TOnlyPointerTo)
*
* but none of those provide for all of the goals for ICU smart pointers:
* - Smart pointer owns the object and releases it when it goes out of scope.
* - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
* - ICU-compatible: No exceptions.
* - Need to be able to orphan/release the pointer and its ownership.
* - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
*
* For details see http://site.icu-project.org/design/cpp/scoped_ptr
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <memory>
U_NAMESPACE_BEGIN
/**
* "Smart pointer" base class; do not use directly: use LocalPointer etc.
*
* Base class for smart pointer classes that do not throw exceptions.
*
* Do not use this base class directly, since it does not delete its pointer.
* A subclass must implement methods that delete the pointer:
* Destructor and adoptInstead().
*
* There is no operator T *() provided because the programmer must decide
* whether to use getAlias() (without transfer of ownership) or orphan()
* (with transfer of ownership and NULLing of the pointer).
*
* @see LocalPointer
* @see LocalArray
* @see U_DEFINE_LOCAL_OPEN_POINTER
* @stable ICU 4.4
*/
template<typename T>
class LocalPointerBase {
public:
// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) = delete;
static void* U_EXPORT2 operator new[](size_t) = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) = delete;
#endif
/**
* Constructor takes ownership.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
/**
* Destructor deletes the object it owns.
* Subclass must override: Base class does nothing.
* @stable ICU 4.4
*/
~LocalPointerBase() { /* delete ptr; */ }
/**
* NULL check.
* @return true if ==NULL
* @stable ICU 4.4
*/
UBool isNull() const { return ptr==NULL; }
/**
* NULL check.
* @return true if !=NULL
* @stable ICU 4.4
*/
UBool isValid() const { return ptr!=NULL; }
/**
* Comparison with a simple pointer, so that existing code
* with ==NULL need not be changed.
* @param other simple pointer for comparison
* @return true if this pointer value equals other
* @stable ICU 4.4
*/
bool operator==(const T *other) const { return ptr==other; }
/**
* Comparison with a simple pointer, so that existing code
* with !=NULL need not be changed.
* @param other simple pointer for comparison
* @return true if this pointer value differs from other
* @stable ICU 4.4
*/
bool operator!=(const T *other) const { return ptr!=other; }
/**
* Access without ownership change.
* @return the pointer value
* @stable ICU 4.4
*/
T *getAlias() const { return ptr; }
/**
* Access without ownership change.
* @return the pointer value as a reference
* @stable ICU 4.4
*/
T &operator*() const { return *ptr; }
/**
* Access without ownership change.
* @return the pointer value
* @stable ICU 4.4
*/
T *operator->() const { return ptr; }
/**
* Gives up ownership; the internal pointer becomes NULL.
* @return the pointer value;
* caller becomes responsible for deleting the object
* @stable ICU 4.4
*/
T *orphan() {
T *p=ptr;
ptr=NULL;
return p;
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
* Subclass must override: Base class does not delete the object.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
// delete ptr;
ptr=p;
}
protected:
/**
* Actual pointer.
* @internal
*/
T *ptr;
private:
// No comparison operators with other LocalPointerBases.
bool operator==(const LocalPointerBase<T> &other);
bool operator!=(const LocalPointerBase<T> &other);
// No ownership sharing: No copy constructor, no assignment operator.
LocalPointerBase(const LocalPointerBase<T> &other);
void operator=(const LocalPointerBase<T> &other);
};
/**
* "Smart pointer" class, deletes objects via the standard C++ delete operator.
* For most methods see the LocalPointerBase base class.
*
* Usage example:
* \code
* LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
* int32_t length=s->length(); // 2
* char16_t lead=s->charAt(0); // 0xd900
* if(some condition) { return; } // no need to explicitly delete the pointer
* s.adoptInstead(new UnicodeString((char16_t)0xfffc));
* length=s->length(); // 1
* // no need to explicitly delete the pointer
* \endcode
*
* @see LocalPointerBase
* @stable ICU 4.4
*/
template<typename T>
class LocalPointer : public LocalPointerBase<T> {
public:
using LocalPointerBase<T>::operator*;
using LocalPointerBase<T>::operator->;
/**
* Constructor takes ownership.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
/**
* Constructor takes ownership and reports an error if NULL.
*
* This constructor is intended to be used with other-class constructors
* that may report a failure UErrorCode,
* so that callers need to check only for U_FAILURE(errorCode)
* and not also separately for isNull().
*
* @param p simple pointer to an object that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 55
*/
LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
if(p==NULL && U_SUCCESS(errorCode)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
* @stable ICU 56
*/
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
/**
* Constructs a LocalPointer from a C++11 std::unique_ptr.
* The LocalPointer steals the object owned by the std::unique_ptr.
*
* This constructor works via move semantics. If your std::unique_ptr is
* in a local variable, you must use std::move.
*
* @param p The std::unique_ptr from which the pointer will be stolen.
* @stable ICU 64
*/
explicit LocalPointer(std::unique_ptr<T> &&p)
: LocalPointerBase<T>(p.release()) {}
/**
* Destructor deletes the object it owns.
* @stable ICU 4.4
*/
~LocalPointer() {
delete LocalPointerBase<T>::ptr;
}
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
* @param src source smart pointer
* @return *this
* @stable ICU 56
*/
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
}
/**
* Move-assign from an std::unique_ptr to this LocalPointer.
* Steals the pointer from the std::unique_ptr.
*
* @param p The std::unique_ptr from which the pointer will be stolen.
* @return *this
* @stable ICU 64
*/
LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT {
adoptInstead(p.release());
return *this;
}
/**
* Swap pointers.
* @param other other smart pointer
* @stable ICU 56
*/
void swap(LocalPointer<T> &other) U_NOEXCEPT {
T *temp=LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=other.ptr;
other.ptr=temp;
}
/**
* Non-member LocalPointer swap function.
* @param p1 will get p2's pointer
* @param p2 will get p1's pointer
* @stable ICU 56
*/
friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT {
p1.swap(p2);
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an object that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the object it owns,
* and adopts (takes ownership of) the one passed in.
*
* If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
*
* If U_SUCCESS(errorCode) but the input pointer is NULL,
* then U_MEMORY_ALLOCATION_ERROR is set,
* the current object is deleted, and NULL is set.
*
* @param p simple pointer to an object that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 55
*/
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode)) {
delete LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
if(p==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
delete p;
}
}
/**
* Conversion operator to a C++11 std::unique_ptr.
* Disowns the object and gives it to the returned std::unique_ptr.
*
* This operator works via move semantics. If your LocalPointer is
* in a local variable, you must use std::move.
*
* @return An std::unique_ptr owning the pointer previously owned by this
* icu::LocalPointer.
* @stable ICU 64
*/
operator std::unique_ptr<T> () && {
return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
}
};
/**
* "Smart pointer" class, deletes objects via the C++ array delete[] operator.
* For most methods see the LocalPointerBase base class.
* Adds operator[] for array item access.
*
* Usage example:
* \code
* LocalArray<UnicodeString> a(new UnicodeString[2]);
* a[0].append((char16_t)0x61);
* if(some condition) { return; } // no need to explicitly delete the array
* a.adoptInstead(new UnicodeString[4]);
* a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
* // no need to explicitly delete the array
* \endcode
*
* @see LocalPointerBase
* @stable ICU 4.4
*/
template<typename T>
class LocalArray : public LocalPointerBase<T> {
public:
using LocalPointerBase<T>::operator*;
using LocalPointerBase<T>::operator->;
/**
* Constructor takes ownership.
* @param p simple pointer to an array of T objects that is adopted
* @stable ICU 4.4
*/
explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
/**
* Constructor takes ownership and reports an error if NULL.
*
* This constructor is intended to be used with other-class constructors
* that may report a failure UErrorCode,
* so that callers need to check only for U_FAILURE(errorCode)
* and not also separately for isNull().
*
* @param p simple pointer to an array of T objects that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 56
*/
LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
if(p==NULL && U_SUCCESS(errorCode)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
* @stable ICU 56
*/
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
/**
* Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
* The LocalPointer steals the array owned by the std::unique_ptr.
*
* This constructor works via move semantics. If your std::unique_ptr is
* in a local variable, you must use std::move.
*
* @param p The std::unique_ptr from which the array will be stolen.
* @stable ICU 64
*/
explicit LocalArray(std::unique_ptr<T[]> &&p)
: LocalPointerBase<T>(p.release()) {}
/**
* Destructor deletes the array it owns.
* @stable ICU 4.4
*/
~LocalArray() {
delete[] LocalPointerBase<T>::ptr;
}
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
* @param src source smart pointer
* @return *this
* @stable ICU 56
*/
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
}
/**
* Move-assign from an std::unique_ptr to this LocalPointer.
* Steals the array from the std::unique_ptr.
*
* @param p The std::unique_ptr from which the array will be stolen.
* @return *this
* @stable ICU 64
*/
LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT {
adoptInstead(p.release());
return *this;
}
/**
* Swap pointers.
* @param other other smart pointer
* @stable ICU 56
*/
void swap(LocalArray<T> &other) U_NOEXCEPT {
T *temp=LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=other.ptr;
other.ptr=temp;
}
/**
* Non-member LocalArray swap function.
* @param p1 will get p2's pointer
* @param p2 will get p1's pointer
* @stable ICU 56
*/
friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT {
p1.swap(p2);
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an array of T objects that is adopted
* @stable ICU 4.4
*/
void adoptInstead(T *p) {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
*
* If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
*
* If U_SUCCESS(errorCode) but the input pointer is NULL,
* then U_MEMORY_ALLOCATION_ERROR is set,
* the current array is deleted, and NULL is set.
*
* @param p simple pointer to an array of T objects that is adopted
* @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
* if p==NULL and no other failure code had been set
* @stable ICU 56
*/
void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode)) {
delete[] LocalPointerBase<T>::ptr;
LocalPointerBase<T>::ptr=p;
if(p==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
delete[] p;
}
}
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
* @stable ICU 4.4
*/
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
/**
* Conversion operator to a C++11 std::unique_ptr.
* Disowns the object and gives it to the returned std::unique_ptr.
*
* This operator works via move semantics. If your LocalPointer is
* in a local variable, you must use std::move.
*
* @return An std::unique_ptr owning the pointer previously owned by this
* icu::LocalPointer.
* @stable ICU 64
*/
operator std::unique_ptr<T[]> () && {
return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
}
};
/**
* \def U_DEFINE_LOCAL_OPEN_POINTER
* "Smart pointer" definition macro, deletes objects via the closeFunction.
* Defines a subclass of LocalPointerBase which works just
* like LocalPointer<Type> except that this subclass will use the closeFunction
* rather than the C++ delete operator.
*
* Usage example:
* \code
* LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
* utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
* utf8Out, (int32_t)sizeof(utf8Out),
* utf8In, utf8InLength, &errorCode);
* if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
* \endcode
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
using LocalPointerBase<Type>::operator*; \
using LocalPointerBase<Type>::operator->; \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \
: LocalPointerBase<Type>(src.ptr) { \
src.ptr=NULL; \
} \
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
: LocalPointerBase<Type>(p.release()) {} \
~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \
LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \
if (ptr != NULL) { closeFunction(ptr); } \
LocalPointerBase<Type>::ptr=src.ptr; \
src.ptr=NULL; \
return *this; \
} \
/* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
adoptInstead(p.release()); \
return *this; \
} \
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
Type *temp=LocalPointerBase<Type>::ptr; \
LocalPointerBase<Type>::ptr=other.ptr; \
other.ptr=temp; \
} \
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
p1.swap(p2); \
} \
void adoptInstead(Type *p) { \
if (ptr != NULL) { closeFunction(ptr); } \
ptr=p; \
} \
operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
} \
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif /* __LOCALPOINTER_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -1,885 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
******************************************************************************
*/
#ifndef _PLATFORM_H
#define _PLATFORM_H
#include "unicode/uconfig.h"
#include "unicode/uvernum.h"
/**
* \file
* \brief Basic types for the platform.
*
* This file used to be generated by autoconf/configure.
* Starting with ICU 49, platform.h is a normal source file,
* to simplify cross-compiling and working with non-autoconf/make build systems.
*
* When a value in this file does not work on a platform, then please
* try to derive it from the U_PLATFORM value
* (for which we might need a new value constant in rare cases)
* and/or from other macros that are predefined by the compiler
* or defined in standard (POSIX or platform or compiler) headers.
*
* As a temporary workaround, you can add an explicit \#define for some macros
* before it is first tested, or add an equivalent -D macro definition
* to the compiler's command line.
*
* Note: Some compilers provide ways to show the predefined macros.
* For example, with gcc you can compile an empty .c file and have the compiler
* print the predefined macros with
* \code
* gcc -E -dM -x c /dev/null | sort
* \endcode
* (You can provide an actual empty .c file rather than /dev/null.
* <code>-x c++</code> is for C++.)
*/
/**
* Define some things so that they can be documented.
* @internal
*/
#ifdef U_IN_DOXYGEN
/*
* Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
* Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
*/
/* None for now. */
#endif
/**
* \def U_PLATFORM
* The U_PLATFORM macro defines the platform we're on.
*
* We used to define one different, value-less macro per platform.
* That made it hard to know the set of relevant platforms and macros,
* and hard to deal with variants of platforms.
*
* Starting with ICU 49, we define platforms as numeric macros,
* with ranges of values for related platforms and their variants.
* The U_PLATFORM macro is set to one of these values.
*
* Historical note from the Solaris Wikipedia article:
* AT&T and Sun collaborated on a project to merge the most popular Unix variants
* on the market at that time: BSD, System V, and Xenix.
* This became Unix System V Release 4 (SVR4).
*
* @internal
*/
/** Unknown platform. @internal */
#define U_PF_UNKNOWN 0
/** Windows @internal */
#define U_PF_WINDOWS 1000
/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
#define U_PF_MINGW 1800
/**
* Cygwin. Windows, calls to cygwin1.dll for Posix functions,
* using MSVC or GNU gcc and binutils.
* @internal
*/
#define U_PF_CYGWIN 1900
/* Reserve 2000 for U_PF_UNIX? */
/** HP-UX is based on UNIX System V. @internal */
#define U_PF_HPUX 2100
/** Solaris is a Unix operating system based on SVR4. @internal */
#define U_PF_SOLARIS 2600
/** BSD is a UNIX operating system derivative. @internal */
#define U_PF_BSD 3000
/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
#define U_PF_AIX 3100
/** IRIX is based on UNIX System V with BSD extensions. @internal */
#define U_PF_IRIX 3200
/**
* Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
* as well as code derived from NeXTSTEP, BSD, and other projects,
* built around the Mach kernel.
* Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
* (Original description modified from WikiPedia.)
* @internal
*/
#define U_PF_DARWIN 3500
/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
#define U_PF_IPHONE 3550
/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
#define U_PF_QNX 3700
/** Linux is a Unix-like operating system. @internal */
#define U_PF_LINUX 4000
/**
* Native Client is pretty close to Linux.
* See https://developer.chrome.com/native-client and
* http://www.chromium.org/nativeclient
* @internal
*/
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */
/**
* Emscripten is a C++ transpiler for the Web that can target asm.js or
* WebAssembly. It provides some POSIX-compatible wrappers and stubs and
* some Linux-like functionality, but is not fully compatible with
* either.
* @internal
*/
#define U_PF_EMSCRIPTEN 5010
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000
/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
#define U_PF_OS400 9400
#ifdef U_PLATFORM
/* Use the predefined value. */
#elif defined(__MINGW32__)
# define U_PLATFORM U_PF_MINGW
#elif defined(__CYGWIN__)
# define U_PLATFORM U_PF_CYGWIN
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
# define U_PLATFORM U_PF_WINDOWS
#elif defined(__ANDROID__)
# define U_PLATFORM U_PF_ANDROID
/* Android wchar_t support depends on the API level. */
# include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
#elif defined(__Fuchsia__)
# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__)
# include <TargetConditionals.h>
# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
# define U_PLATFORM U_PF_IPHONE
# else
# define U_PLATFORM U_PF_DARWIN
# endif
#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
# if defined(__FreeBSD__)
# include <sys/endian.h>
# endif
# define U_PLATFORM U_PF_BSD
#elif defined(sun) || defined(__sun)
/* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
# define U_PLATFORM U_PF_SOLARIS
# if defined(__GNUC__)
/* Solaris/GCC needs this header file to get the proper endianness. Normally, this
* header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
* is included which does not include this header file.
*/
# include <sys/isa_defs.h>
# endif
#elif defined(_AIX) || defined(__TOS_AIX__)
# define U_PLATFORM U_PF_AIX
#elif defined(_hpux) || defined(hpux) || defined(__hpux)
# define U_PLATFORM U_PF_HPUX
#elif defined(sgi) || defined(__sgi)
# define U_PLATFORM U_PF_IRIX
#elif defined(__QNX__) || defined(__QNXNTO__)
# define U_PLATFORM U_PF_QNX
#elif defined(__TOS_MVS__)
# define U_PLATFORM U_PF_OS390
#elif defined(__OS400__) || defined(__TOS_OS400__)
# define U_PLATFORM U_PF_OS400
#elif defined(__EMSCRIPTEN__)
# define U_PLATFORM U_PF_EMSCRIPTEN
#else
# define U_PLATFORM U_PF_UNKNOWN
#endif
/**
* \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
* Otherwise undefined.
* @internal
*/
/* Commented out because this is already set in mh-cygwin-msvc
#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
# define CYGWINMSVC
#endif
*/
#ifdef U_IN_DOXYGEN
# define CYGWINMSVC
#endif
/**
* \def U_PLATFORM_USES_ONLY_WIN32_API
* Defines whether the platform uses only the Win32 API.
* Set to 1 for Windows/MSVC and MinGW but not Cygwin.
* @internal
*/
#ifdef U_PLATFORM_USES_ONLY_WIN32_API
/* Use the predefined value. */
#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
# define U_PLATFORM_USES_ONLY_WIN32_API 1
#else
/* Cygwin implements POSIX. */
# define U_PLATFORM_USES_ONLY_WIN32_API 0
#endif
/**
* \def U_PLATFORM_HAS_WIN32_API
* Defines whether the Win32 API is available on the platform.
* Set to 1 for Windows/MSVC, MinGW and Cygwin.
* @internal
*/
#ifdef U_PLATFORM_HAS_WIN32_API
/* Use the predefined value. */
#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
# define U_PLATFORM_HAS_WIN32_API 1
#else
# define U_PLATFORM_HAS_WIN32_API 0
#endif
/**
* \def U_PLATFORM_HAS_WINUWP_API
* Defines whether target is intended for Universal Windows Platform API
* Set to 1 for Windows10 Release Solution Configuration
* @internal
*/
#ifdef U_PLATFORM_HAS_WINUWP_API
/* Use the predefined value. */
#else
# define U_PLATFORM_HAS_WINUWP_API 0
#endif
/**
* \def U_PLATFORM_IMPLEMENTS_POSIX
* Defines whether the platform implements (most of) the POSIX API.
* Set to 1 for Cygwin and most other platforms.
* @internal
*/
#ifdef U_PLATFORM_IMPLEMENTS_POSIX
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_PLATFORM_IMPLEMENTS_POSIX 0
#else
# define U_PLATFORM_IMPLEMENTS_POSIX 1
#endif
/**
* \def U_PLATFORM_IS_LINUX_BASED
* Defines whether the platform is Linux or one of its derivatives.
* @internal
*/
#ifdef U_PLATFORM_IS_LINUX_BASED
/* Use the predefined value. */
#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
# define U_PLATFORM_IS_LINUX_BASED 1
#else
# define U_PLATFORM_IS_LINUX_BASED 0
#endif
/**
* \def U_PLATFORM_IS_DARWIN_BASED
* Defines whether the platform is Darwin or one of its derivatives.
* @internal
*/
#ifdef U_PLATFORM_IS_DARWIN_BASED
/* Use the predefined value. */
#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
# define U_PLATFORM_IS_DARWIN_BASED 1
#else
# define U_PLATFORM_IS_DARWIN_BASED 0
#endif
/**
* \def U_HAVE_STDINT_H
* Defines whether stdint.h is available. It is a C99 standard header.
* We used to include inttypes.h which includes stdint.h but we usually do not need
* the additional definitions from inttypes.h.
* @internal
*/
#ifdef U_HAVE_STDINT_H
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
/* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
# define U_HAVE_STDINT_H 1
# else
# define U_HAVE_STDINT_H 0
# endif
#elif U_PLATFORM == U_PF_SOLARIS
/* Solaris has inttypes.h but not stdint.h. */
# define U_HAVE_STDINT_H 0
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
# define U_HAVE_STDINT_H 0
#else
# define U_HAVE_STDINT_H 1
#endif
/**
* \def U_HAVE_INTTYPES_H
* Defines whether inttypes.h is available. It is a C99 standard header.
* We include inttypes.h where it is available but stdint.h is not.
* @internal
*/
#ifdef U_HAVE_INTTYPES_H
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_SOLARIS
/* Solaris has inttypes.h but not stdint.h. */
# define U_HAVE_INTTYPES_H 1
#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
/* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
# define U_HAVE_INTTYPES_H 1
#else
/* Most platforms have both inttypes.h and stdint.h, or neither. */
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif
/*===========================================================================*/
/** @{ Compiler and environment features */
/*===========================================================================*/
/**
* \def U_GCC_MAJOR_MINOR
* Indicates whether the compiler is gcc (test for != 0),
* and if so, contains its major (times 100) and minor version numbers.
* If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
*
* For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
* use "#if U_GCC_MAJOR_MINOR >= 406".
* @internal
*/
#ifdef __GNUC__
# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
#else
# define U_GCC_MAJOR_MINOR 0
#endif
/**
* \def U_IS_BIG_ENDIAN
* Determines the endianness of the platform.
* @internal
*/
#ifdef U_IS_BIG_ENDIAN
/* Use the predefined value. */
#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
/* gcc */
# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
# define U_IS_BIG_ENDIAN 1
#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
# define U_IS_BIG_ENDIAN 0
#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
/* These platforms do not appear to predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
/* HPPA do not appear to predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
/* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
# define U_IS_BIG_ENDIAN 1
#else
# define U_IS_BIG_ENDIAN 0
#endif
/**
* \def U_HAVE_PLACEMENT_NEW
* Determines whether to override placement new and delete for STL.
* @stable ICU 2.6
*/
#ifdef U_HAVE_PLACEMENT_NEW
/* Use the predefined value. */
#elif defined(__BORLANDC__)
# define U_HAVE_PLACEMENT_NEW 0
#else
# define U_HAVE_PLACEMENT_NEW 1
#endif
/**
* \def U_HAVE_DEBUG_LOCATION_NEW
* Define this to define the MFC debug version of the operator new.
*
* @stable ICU 3.4
*/
#ifdef U_HAVE_DEBUG_LOCATION_NEW
/* Use the predefined value. */
#elif defined(_MSC_VER)
# define U_HAVE_DEBUG_LOCATION_NEW 1
#else
# define U_HAVE_DEBUG_LOCATION_NEW 0
#endif
/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
#ifdef __has_attribute
# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
#else
# define UPRV_HAS_ATTRIBUTE(x) 0
#endif
#ifdef __has_cpp_attribute
# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
#else
# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
#endif
#ifdef __has_declspec_attribute
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
#else
# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
#endif
#ifdef __has_builtin
# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
#else
# define UPRV_HAS_BUILTIN(x) 0
#endif
#ifdef __has_feature
# define UPRV_HAS_FEATURE(x) __has_feature(x)
#else
# define UPRV_HAS_FEATURE(x) 0
#endif
#ifdef __has_extension
# define UPRV_HAS_EXTENSION(x) __has_extension(x)
#else
# define UPRV_HAS_EXTENSION(x) 0
#endif
#ifdef __has_warning
# define UPRV_HAS_WARNING(x) __has_warning(x)
#else
# define UPRV_HAS_WARNING(x) 0
#endif
/**
* \def U_MALLOC_ATTR
* Attribute to mark functions as malloc-like
* @internal
*/
#if defined(__GNUC__) && __GNUC__>=3
# define U_MALLOC_ATTR __attribute__ ((__malloc__))
#else
# define U_MALLOC_ATTR
#endif
/**
* \def U_ALLOC_SIZE_ATTR
* Attribute to specify the size of the allocated buffer for malloc-like functions
* @internal
*/
#if (defined(__GNUC__) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
UPRV_HAS_ATTRIBUTE(alloc_size)
# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
#else
# define U_ALLOC_SIZE_ATTR(X)
# define U_ALLOC_SIZE_ATTR2(X,Y)
#endif
/**
* \def U_CPLUSPLUS_VERSION
* 0 if no C++; 1, 11, 14, ... if C++.
* Support for specific features cannot always be determined by the C++ version alone.
* @internal
*/
#ifdef U_CPLUSPLUS_VERSION
# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
# undef U_CPLUSPLUS_VERSION
# define U_CPLUSPLUS_VERSION 0
# endif
/* Otherwise use the predefined value. */
#elif !defined(__cplusplus)
# define U_CPLUSPLUS_VERSION 0
#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
# define U_CPLUSPLUS_VERSION 14
#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
# define U_CPLUSPLUS_VERSION 11
#else
// C++98 or C++03
# define U_CPLUSPLUS_VERSION 1
#endif
#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// add in std::nullptr_t
namespace std {
typedef decltype(nullptr) nullptr_t;
};
#endif
/**
* \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty.
* Some code, especially STL containers, uses move semantics of objects only
* if the move constructor and the move operator are declared as not throwing exceptions.
* @internal
*/
#ifdef U_NOEXCEPT
/* Use the predefined value. */
#else
# define U_NOEXCEPT noexcept
#endif
/**
* \def U_FALLTHROUGH
* Annotate intentional fall-through between switch labels.
* http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
* @internal
*/
#ifndef __cplusplus
// Not for C.
#elif defined(U_FALLTHROUGH)
// Use the predefined value.
#elif defined(__clang__)
// Test for compiler vs. feature separately.
// Other compilers might choke on the feature test.
# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
(UPRV_HAS_FEATURE(cxx_attributes) && \
UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
# define U_FALLTHROUGH [[clang::fallthrough]]
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 7)
# define U_FALLTHROUGH __attribute__((fallthrough))
#endif
#ifndef U_FALLTHROUGH
# define U_FALLTHROUGH
#endif
/** @} */
/*===========================================================================*/
/** @{ Character data types */
/*===========================================================================*/
/**
* U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
* @stable ICU 2.0
*/
#define U_ASCII_FAMILY 0
/**
* U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
* @stable ICU 2.0
*/
#define U_EBCDIC_FAMILY 1
/**
* \def U_CHARSET_FAMILY
*
* <p>These definitions allow to specify the encoding of text
* in the char data type as defined by the platform and the compiler.
* It is enough to determine the code point values of "invariant characters",
* which are the ones shared by all encodings that are in use
* on a given platform.</p>
*
* <p>Those "invariant characters" should be all the uppercase and lowercase
* latin letters, the digits, the space, and "basic punctuation".
* Also, '\\n', '\\r', '\\t' should be available.</p>
*
* <p>The list of "invariant characters" is:<br>
* \code
* A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
* \endcode
* <br>
* (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
*
* <p>This matches the IBM Syntactic Character Set (CS 640).</p>
*
* <p>In other words, all the graphic characters in 7-bit ASCII should
* be safely accessible except the following:</p>
*
* \code
* '\' <backslash>
* '[' <left bracket>
* ']' <right bracket>
* '{' <left brace>
* '}' <right brace>
* '^' <circumflex>
* '~' <tilde>
* '!' <exclamation mark>
* '#' <number sign>
* '|' <vertical line>
* '$' <dollar sign>
* '@' <commercial at>
* '`' <grave accent>
* \endcode
* @stable ICU 2.0
*/
#ifdef U_CHARSET_FAMILY
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
#else
# define U_CHARSET_FAMILY U_ASCII_FAMILY
#endif
/**
* \def U_CHARSET_IS_UTF8
*
* Hardcode the default charset to UTF-8.
*
* If this is set to 1, then
* - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
* contain UTF-8 text, regardless of what the system API uses
* - some ICU code will use fast functions like u_strFromUTF8()
* rather than the more general and more heavy-weight conversion API (ucnv.h)
* - ucnv_getDefaultName() always returns "UTF-8"
* - ucnv_setDefaultName() is disabled and will not change the default charset
* - static builds of ICU are smaller
* - more functionality is available with the UCONFIG_NO_CONVERSION build-time
* configuration option (see unicode/uconfig.h)
* - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
*
* @stable ICU 4.2
* @see UCONFIG_NO_CONVERSION
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
U_PLATFORM == U_PF_EMSCRIPTEN
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0
#endif
/** @} */
/*===========================================================================*/
/** @{ Information about wchar support */
/*===========================================================================*/
/**
* \def U_HAVE_WCHAR_H
* Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
*
* @stable ICU 2.0
*/
#ifdef U_HAVE_WCHAR_H
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
/*
* Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
* The type and header existed, but the library functions did not work as expected.
* The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
*/
# define U_HAVE_WCHAR_H 0
#else
# define U_HAVE_WCHAR_H 1
#endif
/**
* \def U_SIZEOF_WCHAR_T
* U_SIZEOF_WCHAR_T==sizeof(wchar_t)
*
* @stable ICU 2.0
*/
#ifdef U_SIZEOF_WCHAR_T
/* Use the predefined value. */
#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
/*
* Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
* Newer Mac OS X has size 4.
*/
# define U_SIZEOF_WCHAR_T 1
#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
# define U_SIZEOF_WCHAR_T 2
#elif U_PLATFORM == U_PF_AIX
/*
* AIX 6.1 information, section "Wide character data representation":
* "... the wchar_t datatype is 32-bit in the 64-bit environment and
* 16-bit in the 32-bit environment."
* and
* "All locales use Unicode for their wide character code values (process code),
* except the IBM-eucTW codeset."
*/
# ifdef __64BIT__
# define U_SIZEOF_WCHAR_T 4
# else
# define U_SIZEOF_WCHAR_T 2
# endif
#elif U_PLATFORM == U_PF_OS390
/*
* z/OS V1R11 information center, section "LP64 | ILP32":
* "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
* Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
*/
# ifdef _LP64
# define U_SIZEOF_WCHAR_T 4
# else
# define U_SIZEOF_WCHAR_T 2
# endif
#elif U_PLATFORM == U_PF_OS400
# if defined(__UTF32__)
/*
* LOCALETYPE(*LOCALEUTF) is specified.
* Wide-character strings are in UTF-32,
* narrow-character strings are in UTF-8.
*/
# define U_SIZEOF_WCHAR_T 4
# elif defined(__UCS2__)
/*
* LOCALETYPE(*LOCALEUCS2) is specified.
* Wide-character strings are in UCS-2,
* narrow-character strings are in EBCDIC.
*/
# define U_SIZEOF_WCHAR_T 2
# else
/*
* LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
* Wide-character strings are in 16-bit EBCDIC,
* narrow-character strings are in EBCDIC.
*/
# define U_SIZEOF_WCHAR_T 2
# endif
#else
# define U_SIZEOF_WCHAR_T 4
#endif
#ifndef U_HAVE_WCSCPY
#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
#endif
/** @} */
/**
* \def U_HAVE_CHAR16_T
* Defines whether the char16_t type is available for UTF-16
* and u"abc" UTF-16 string literals are supported.
* This is a new standard type and standard string literal syntax in C++0x
* but has been available in some compilers before.
* @internal
*/
#ifdef U_HAVE_CHAR16_T
/* Use the predefined value. */
#else
/*
* Notes:
* Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef
* and does not support u"abc" string literals.
* Visual Studio 2015 (_MSC_VER>=1900) and above adds support for
* both char16_t and u"abc" string literals.
* gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
* does not support u"abc" string literals.
* C++11 and C11 require support for UTF-16 literals
* TODO: Fix for plain C. Doesn't work on Mac.
*/
# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
# define U_HAVE_CHAR16_T 1
# else
# define U_HAVE_CHAR16_T 0
# endif
#endif
/**
* @{
* \def U_DECLARE_UTF16
* Do not use this macro because it is not defined on all platforms.
* Use the UNICODE_STRING or U_STRING_DECL macros instead.
* @internal
*/
#ifdef U_DECLARE_UTF16
/* Use the predefined value. */
#elif U_HAVE_CHAR16_T \
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|| (defined(U_IN_DOXYGEN))
# define U_DECLARE_UTF16(string) u ## string
#elif U_SIZEOF_WCHAR_T == 2 \
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
# define U_DECLARE_UTF16(string) L ## string
#else
/* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
#endif
/** @} */
/*===========================================================================*/
/** @{ Symbol import-export control */
/*===========================================================================*/
#ifdef U_EXPORT
/* Use the predefined value. */
#elif defined(U_STATIC_IMPLEMENTATION)
# define U_EXPORT
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
# define U_EXPORT __declspec(dllexport)
#elif defined(__GNUC__)
# define U_EXPORT __attribute__((visibility("default")))
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
# define U_EXPORT __global
/*#elif defined(__HP_aCC) || defined(__HP_cc)
# define U_EXPORT __declspec(dllexport)*/
#else
# define U_EXPORT
#endif
/* U_CALLCONV is related to U_EXPORT2 */
#ifdef U_EXPORT2
/* Use the predefined value. */
#elif defined(_MSC_VER)
# define U_EXPORT2 __cdecl
#else
# define U_EXPORT2
#endif
#ifdef U_IMPORT
/* Use the predefined value. */
#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \
UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
/* Windows needs to export/import data. */
# define U_IMPORT __declspec(dllimport)
#else
# define U_IMPORT
#endif
/**
* \def U_CALLCONV
* Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
* in callback function typedefs to make sure that the calling convention
* is compatible.
*
* This is only used for non-ICU-API functions.
* When a function is a public ICU API,
* you must use the U_CAPI and U_EXPORT2 qualifiers.
*
* Please note, you need to use U_CALLCONV after the *.
*
* NO : "static const char U_CALLCONV *func( . . . )"
* YES: "static const char* U_CALLCONV func( . . . )"
*
* @stable ICU 2.0
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV __cdecl
#else
# define U_CALLCONV U_EXPORT2
#endif
/**
* \def U_CALLCONV_FPTR
* Similar to U_CALLCONV, but only used on function pointers.
* @internal
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV_FPTR U_CALLCONV
#else
# define U_CALLCONV_FPTR
#endif
/** @} */
#endif // _PLATFORM_H

View File

@ -1,130 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : ptypes.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
******************************************************************************
*/
/**
* \file
* \brief C API: Definitions of integer types of various widths
*/
#ifndef _PTYPES_H
#define _PTYPES_H
/**
* \def __STDC_LIMIT_MACROS
* According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
* macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
* We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
* that uses such limit macros.
* @internal
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
/* NULL, size_t, wchar_t */
#include <stddef.h>
/*
* If all compilers provided all of the C99 headers and types,
* we would just unconditionally #include <stdint.h> here
* and not need any of the stuff after including platform.h.
*/
/* Find out if we have stdint.h etc. */
#include "unicode/platform.h"
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <stdint.h> header, you may
need to edit the typedefs in the #else section below.
Use #if...#else...#endif with predefined compiler macros if possible. */
#if U_HAVE_STDINT_H
/*
* We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
* <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
* which we almost never use, plus stuff like imaxabs() which we never use.
*/
#include <stdint.h>
#if U_PLATFORM == U_PF_OS390
/* The features header is needed to get (u)int64_t sometimes. */
#include <features.h>
/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
#if !defined(__uint8_t)
#define __uint8_t 1
typedef unsigned char uint8_t;
#endif
#endif /* U_PLATFORM == U_PF_OS390 */
#elif U_HAVE_INTTYPES_H
# include <inttypes.h>
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
/// \cond
#if ! U_HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! U_HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! U_HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! U_HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! U_HAVE_INT32_T
typedef signed int int32_t;
#endif
#if ! U_HAVE_UINT32_T
typedef unsigned int uint32_t;
#endif
#if ! U_HAVE_INT64_T
#ifdef _MSC_VER
typedef signed __int64 int64_t;
#else
typedef signed long long int64_t;
#endif
#endif
#if ! U_HAVE_UINT64_T
#ifdef _MSC_VER
typedef unsigned __int64 uint64_t;
#else
typedef unsigned long long uint64_t;
#endif
#endif
/// \endcond
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
#endif /* _PTYPES_H */

View File

@ -1,183 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1997-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putil.h
*
* Date Name Description
* 05/14/98 nos Creation (content moved here from utypes.h).
* 06/17/99 erm Added IEEE_754
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* 04/15/99 stephen Converted to C
* 11/15/99 helena Integrated S/390 changes for IEEE support.
* 01/11/00 helena Added u_getVersion.
******************************************************************************
*/
#ifndef PUTIL_H
#define PUTIL_H
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Platform Utilities
*/
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* library. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
/**
* Return the ICU data directory.
* The data directory is where common format ICU data files (.dat files)
* are loaded from. Note that normal use of the built-in ICU
* facilities does not require loading of an external data file;
* unless you are adding custom data to ICU, the data directory
* does not need to be set.
*
* The data directory is determined as follows:
* If u_setDataDirectory() has been called, that is it, otherwise
* if the ICU_DATA environment variable is set, use that, otherwise
* If a data directory was specified at ICU build time
* <code>
* \code
* #define ICU_DATA_DIR "path"
* \endcode
* </code> use that,
* otherwise no data directory is available.
*
* @return the data directory, or an empty string ("") if no data directory has
* been specified.
*
* @stable ICU 2.0
*/
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
/**
* Set the ICU data directory.
* The data directory is where common format ICU data files (.dat files)
* are loaded from. Note that normal use of the built-in ICU
* facilities does not require loading of an external data file;
* unless you are adding custom data to ICU, the data directory
* does not need to be set.
*
* This function should be called at most once in a process, before the
* first ICU operation (e.g., u_init()) that will require the loading of an
* ICU data file.
* This function is not thread-safe. Use it before calling ICU APIs from
* multiple threads.
*
* @param directory The directory to be set.
*
* @see u_init
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
#ifndef U_HIDE_INTERNAL_API
/**
* Return the time zone files override directory, or an empty string if
* no directory was specified. Certain time zone resources will be preferentially
* loaded from individual files in this directory.
*
* @return the time zone data override directory.
* @internal
*/
U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
/**
* Set the time zone files override directory.
* This function is not thread safe; it must not be called concurrently with
* u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
* This function should only be called before using any ICU service that
* will access the time zone data.
* @internal
*/
U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
#endif /* U_HIDE_INTERNAL_API */
/**
* @{
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
* @stable ICU 2.0
*/
#if U_PLATFORM_USES_ONLY_WIN32_API
# define U_FILE_SEP_CHAR '\\'
# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING "\\"
# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ";"
#else
# define U_FILE_SEP_CHAR '/'
# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ':'
# define U_FILE_SEP_STRING "/"
# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ":"
#endif
/** @} */
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param cs Input string, points to <code>length</code>
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for <code>length</code>
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, int32_t length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see U_CHARSET_FAMILY.
*
* @param us Input string, points to <code>length</code>
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for <code>length</code>
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*
* @see U_CHARSET_FAMILY
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
#endif

View File

@ -1,266 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**************************************************************************
* Copyright (C) 1999-2012, International Business Machines Corporation and
* others. All Rights Reserved.
**************************************************************************
* Date Name Description
* 11/17/99 aliu Creation. Ported from java. Modified to
* match current UnicodeString API. Forced
* to use name "handleReplaceBetween" because
* of existing methods in UnicodeString.
**************************************************************************
*/
#ifndef REP_H
#define REP_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
/**
* \file
* \brief C++ API: Replaceable String
*/
U_NAMESPACE_BEGIN
class UnicodeString;
/**
* <code>Replaceable</code> is an abstract base class representing a
* string of characters that supports the replacement of a range of
* itself with a new string of characters. It is used by APIs that
* change a piece of text while retaining metadata. Metadata is data
* other than the Unicode characters returned by char32At(). One
* example of metadata is style attributes; another is an edit
* history, marking each character with an author and revision number.
*
* <p>An implicit aspect of the <code>Replaceable</code> API is that
* during a replace operation, new characters take on the metadata of
* the old characters. For example, if the string "the <b>bold</b>
* font" has range (4, 8) replaced with "strong", then it becomes "the
* <b>strong</b> font".
*
* <p><code>Replaceable</code> specifies ranges using a start
* offset and a limit offset. The range of characters thus specified
* includes the characters at offset start..limit-1. That is, the
* start offset is inclusive, and the limit offset is exclusive.
*
* <p><code>Replaceable</code> also includes API to access characters
* in the string: <code>length()</code>, <code>charAt()</code>,
* <code>char32At()</code>, and <code>extractBetween()</code>.
*
* <p>For a subclass to support metadata, typical behavior of
* <code>replace()</code> is the following:
* <ul>
* <li>Set the metadata of the new text to the metadata of the first
* character replaced</li>
* <li>If no characters are replaced, use the metadata of the
* previous character</li>
* <li>If there is no previous character (i.e. start == 0), use the
* following character</li>
* <li>If there is no following character (i.e. the replaceable was
* empty), use default metadata.<br>
* <li>If the code point U+FFFF is seen, it should be interpreted as
* a special marker having no metadata<li>
* </li>
* </ul>
* If this is not the behavior, the subclass should document any differences.
* @author Alan Liu
* @stable ICU 2.0
*/
class U_COMMON_API Replaceable : public UObject {
public:
/**
* Destructor.
* @stable ICU 2.0
*/
virtual ~Replaceable();
/**
* Returns the number of 16-bit code units in the text.
* @return number of 16-bit code units in text
* @stable ICU 1.8
*/
inline int32_t length() const;
/**
* Returns the 16-bit code unit at the given offset into the text.
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return 16-bit code unit of text at given offset
* @stable ICU 1.8
*/
inline char16_t charAt(int32_t offset) const;
/**
* Returns the 32-bit code point at the given 16-bit offset into
* the text. This assumes the text is stored as 16-bit code units
* with surrogate pairs intermixed. If the offset of a leading or
* trailing code unit of a surrogate pair is given, return the
* code point of the surrogate pair.
*
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return 32-bit code point of text at given offset
* @stable ICU 1.8
*/
inline UChar32 char32At(int32_t offset) const;
/**
* Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
* into the UnicodeString <tt>target</tt>.
* @param start offset of first character which will be copied
* @param limit offset immediately following the last character to
* be copied
* @param target UnicodeString into which to copy characters.
* @return A reference to <TT>target</TT>
* @stable ICU 2.1
*/
virtual void extractBetween(int32_t start,
int32_t limit,
UnicodeString& target) const = 0;
/**
* Replaces a substring of this object with the given text. If the
* characters being replaced have metadata, the new characters
* that replace them should be given the same metadata.
*
* <p>Subclasses must ensure that if the text between start and
* limit is equal to the replacement text, that replace has no
* effect. That is, any metadata
* should be unaffected. In addition, subclasses are encouraged to
* check for initial and trailing identical characters, and make a
* smaller replacement if possible. This will preserve as much
* metadata as possible.
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= length()</code>.
* @param text the text to replace characters <code>start</code>
* to <code>limit - 1</code>
* @stable ICU 2.0
*/
virtual void handleReplaceBetween(int32_t start,
int32_t limit,
const UnicodeString& text) = 0;
// Note: All other methods in this class take the names of
// existing UnicodeString methods. This method is the exception.
// It is named differently because all replace methods of
// UnicodeString return a UnicodeString&. The 'between' is
// required in order to conform to the UnicodeString naming
// convention; API taking start/length are named <operation>, and
// those taking start/limit are named <operationBetween>. The
// 'handle' is added because 'replaceBetween' and
// 'doReplaceBetween' are already taken.
/**
* Copies a substring of this object, retaining metadata.
* This method is used to duplicate or reorder substrings.
* The destination index must not overlap the source range.
*
* @param start the beginning index, inclusive; <code>0 <= start <=
* limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit <=
* length()</code>.
* @param dest the destination index. The characters from
* <code>start..limit-1</code> will be copied to <code>dest</code>.
* Implementations of this method may assume that <code>dest <= start ||
* dest >= limit</code>.
* @stable ICU 2.0
*/
virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
/**
* Returns true if this object contains metadata. If a
* Replaceable object has metadata, calls to the Replaceable API
* must be made so as to preserve metadata. If it does not, calls
* to the Replaceable API may be optimized to improve performance.
* The default implementation returns true.
* @return true if this object contains metadata
* @stable ICU 2.2
*/
virtual UBool hasMetaData() const;
/**
* Clone this object, an instance of a subclass of Replaceable.
* Clones can be used concurrently in multiple threads.
* If a subclass does not implement clone(), or if an error occurs,
* then NULL is returned.
* The caller must delete the clone.
*
* @return a clone of this object
*
* @see getDynamicClassID
* @stable ICU 2.6
*/
virtual Replaceable *clone() const;
protected:
/**
* Default constructor.
* @stable ICU 2.4
*/
inline Replaceable();
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
Replaceable &Replaceable::operator=(const Replaceable &);
*/
/**
* Virtual version of length().
* @stable ICU 2.4
*/
virtual int32_t getLength() const = 0;
/**
* Virtual version of charAt().
* @stable ICU 2.4
*/
virtual char16_t getCharAt(int32_t offset) const = 0;
/**
* Virtual version of char32At().
* @stable ICU 2.4
*/
virtual UChar32 getChar32At(int32_t offset) const = 0;
};
inline Replaceable::Replaceable() {}
inline int32_t
Replaceable::length() const {
return getLength();
}
inline char16_t
Replaceable::charAt(int32_t offset) const {
return getCharAt(offset);
}
inline UChar32
Replaceable::char32At(int32_t offset) const {
return getChar32At(offset);
}
// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif

View File

@ -1,41 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2009-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: std_string.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009feb19
* created by: Markus W. Scherer
*/
#ifndef __STD_STRING_H__
#define __STD_STRING_H__
/**
* \file
* \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
* header and for related definitions.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
// https://bugs.llvm.org/show_bug.cgi?id=13364
#if defined(__GLIBCXX__)
namespace std { class type_info; }
#endif
#include <string>
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __STD_STRING_H__

View File

@ -1,281 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*/
#ifndef STRENUM_H
#define STRENUM_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/uobject.h"
#include "unicode/unistr.h"
/**
* \file
* \brief C++ API: String Enumeration
*/
U_NAMESPACE_BEGIN
/**
* Base class for 'pure' C++ implementations of uenum api. Adds a
* method that returns the next UnicodeString since in C++ this can
* be a common storage format for strings.
*
* <p>The model is that the enumeration is over strings maintained by
* a 'service.' At any point, the service might change, invalidating
* the enumerator (though this is expected to be rare). The iterator
* returns an error if this has occurred. Lack of the error is no
* guarantee that the service didn't change immediately after the
* call, so the returned string still might not be 'valid' on
* subsequent use.</p>
*
* <p>Strings may take the form of const char*, const char16_t*, or const
* UnicodeString*. The type you get is determine by the variant of
* 'next' that you call. In general the StringEnumeration is
* optimized for one of these types, but all StringEnumerations can
* return all types. Returned strings are each terminated with a NUL.
* Depending on the service data, they might also include embedded NUL
* characters, so API is provided to optionally return the true
* length, counting the embedded NULs but not counting the terminating
* NUL.</p>
*
* <p>The pointers returned by next, unext, and snext become invalid
* upon any subsequent call to the enumeration's destructor, next,
* unext, snext, or reset.</p>
*
* ICU 2.8 adds some default implementations and helper functions
* for subclasses.
*
* @stable ICU 2.4
*/
class U_COMMON_API StringEnumeration : public UObject {
public:
/**
* Destructor.
* @stable ICU 2.4
*/
virtual ~StringEnumeration();
/**
* Clone this object, an instance of a subclass of StringEnumeration.
* Clones can be used concurrently in multiple threads.
* If a subclass does not implement clone(), or if an error occurs,
* then NULL is returned.
* The caller must delete the clone.
*
* @return a clone of this object
*
* @see getDynamicClassID
* @stable ICU 2.8
*/
virtual StringEnumeration *clone() const;
/**
* <p>Return the number of elements that the iterator traverses. If
* the iterator is out of sync with its service, status is set to
* U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
*
* <p>The return value will not change except possibly as a result of
* a subsequent call to reset, or if the iterator becomes out of sync.</p>
*
* <p>This is a convenience function. It can end up being very
* expensive as all the items might have to be pre-fetched
* (depending on the storage format of the data being
* traversed).</p>
*
* @param status the error code.
* @return number of elements in the iterator.
*
* @stable ICU 2.4 */
virtual int32_t count(UErrorCode& status) const = 0;
/**
* <p>Returns the next element as a NUL-terminated char*. If there
* are no more elements, returns NULL. If the resultLength pointer
* is not NULL, the length of the string (not counting the
* terminating NUL) is returned at that address. If an error
* status is returned, the value at resultLength is undefined.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* <p>If the native service string is a char16_t* string, it is
* converted to char* with the invariant converter. If the
* conversion fails (because a character cannot be converted) then
* status is set to U_INVARIANT_CONVERSION_ERROR and the return
* value is undefined (though not NULL).</p>
*
* Starting with ICU 2.8, the default implementation calls snext()
* and handles the conversion.
* Either next() or snext() must be implemented differently by a subclass.
*
* @param status the error code.
* @param resultLength a pointer to receive the length, can be NULL.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const char* next(int32_t *resultLength, UErrorCode& status);
/**
* <p>Returns the next element as a NUL-terminated char16_t*. If there
* are no more elements, returns NULL. If the resultLength pointer
* is not NULL, the length of the string (not counting the
* terminating NUL) is returned at that address. If an error
* status is returned, the value at resultLength is undefined.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* Starting with ICU 2.8, the default implementation calls snext()
* and handles the conversion.
*
* @param status the error code.
* @param resultLength a ponter to receive the length, can be NULL.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
/**
* <p>Returns the next element a UnicodeString*. If there are no
* more elements, returns NULL.</p>
*
* <p>The returned pointer is owned by this iterator and must not be
* deleted by the caller. The pointer is valid until the next call
* to next, unext, snext, reset, or the enumerator's destructor.</p>
*
* <p>If the iterator is out of sync with its service, status is set
* to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
*
* Starting with ICU 2.8, the default implementation calls next()
* and handles the conversion.
* Either next() or snext() must be implemented differently by a subclass.
*
* @param status the error code.
* @return a pointer to the string, or NULL.
*
* @stable ICU 2.4
*/
virtual const UnicodeString* snext(UErrorCode& status);
/**
* <p>Resets the iterator. This re-establishes sync with the
* service and rewinds the iterator to start at the first
* element.</p>
*
* <p>Previous pointers returned by next, unext, or snext become
* invalid, and the value returned by count might change.</p>
*
* @param status the error code.
*
* @stable ICU 2.4
*/
virtual void reset(UErrorCode& status) = 0;
/**
* Compares this enumeration to other to check if both are equal
*
* @param that The other string enumeration to compare this object to
* @return true if the enumerations are equal. false if not.
* @stable ICU 3.6
*/
virtual UBool operator==(const StringEnumeration& that)const;
/**
* Compares this enumeration to other to check if both are not equal
*
* @param that The other string enumeration to compare this object to
* @return true if the enumerations are equal. false if not.
* @stable ICU 3.6
*/
virtual UBool operator!=(const StringEnumeration& that)const;
protected:
/**
* UnicodeString field for use with default implementations and subclasses.
* @stable ICU 2.8
*/
UnicodeString unistr;
/**
* char * default buffer for use with default implementations and subclasses.
* @stable ICU 2.8
*/
char charsBuffer[32];
/**
* char * buffer for use with default implementations and subclasses.
* Allocated in constructor and in ensureCharsCapacity().
* @stable ICU 2.8
*/
char *chars;
/**
* Capacity of chars, for use with default implementations and subclasses.
* @stable ICU 2.8
*/
int32_t charsCapacity;
/**
* Default constructor for use with default implementations and subclasses.
* @stable ICU 2.8
*/
StringEnumeration();
/**
* Ensures that chars is at least as large as the requested capacity.
* For use with default implementations and subclasses.
*
* @param capacity Requested capacity.
* @param status ICU in/out error code.
* @stable ICU 2.8
*/
void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
/**
* Converts s to Unicode and sets unistr to the result.
* For use with default implementations and subclasses,
* especially for implementations of snext() in terms of next().
* This is provided with a helper function instead of a default implementation
* of snext() to avoid potential infinite loops between next() and snext().
*
* For example:
* \code
* const UnicodeString* snext(UErrorCode& status) {
* int32_t resultLength=0;
* const char *s=next(&resultLength, status);
* return setChars(s, resultLength, status);
* }
* \endcode
*
* @param s String to be converted to Unicode.
* @param length Length of the string.
* @param status ICU in/out error code.
* @return A pointer to unistr.
* @stable ICU 2.8
*/
UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
};
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
/* STRENUM_H */
#endif

View File

@ -1,190 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// stringoptions.h
// created: 2017jun08 Markus W. Scherer
#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Bit set option bit constants for various string and character processing functions.
*/
/**
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @stable ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
/**
* Titlecase sentences rather than words.
* (Titlecase only the first character of each sentence, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @stable ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the character at each
* (possibly adjusted) BreakIterator index and
* lowercase all other characters up to the next iterator index.
* With this option, the other characters will not be modified.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing BreakIterator indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
* and titlecase that one.
*
* Other characters are lowercased.
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see U_TITLECASE_NO_LOWERCASE
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
* Option bit for titlecasing APIs that take an options bit set.
*
* This used to be the default index adjustment in ICU.
* Since ICU 60, the default index adjustment is to the next character that is
* a letter, number, symbol, or private use code point.
* (Uncased modifier letters are skipped.)
* The difference in behavior is small for word titlecasing,
* but the new adjustment is much better for whole-string and sentence titlecasing:
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @stable ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @stable ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @stable ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

View File

@ -1,343 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Copyright (C) 2009-2013, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2001 and onwards Google Inc.
// Author: Sanjay Ghemawat
// This code is a contribution of Google code, and the style used here is
// a compromise between the original Google code and the ICU coding guidelines.
// For example, data types are ICU-ified (size_t,int->int32_t),
// and API comments doxygen-ified, but function names and behavior are
// as in the original, if possible.
// Assertion-style error handling, not available in ICU, was changed to
// parameter "pinning" similar to UnicodeString.
//
// In addition, this is only a partial port of the original Google code,
// limited to what was needed so far. The (nearly) complete original code
// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
// (see ICU ticket 6765, r25517).
#ifndef __STRINGPIECE_H__
#define __STRINGPIECE_H__
/**
* \file
* \brief C++ API: StringPiece: Read-only byte string wrapper class.
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include <cstddef>
#include <type_traits>
#include "unicode/uobject.h"
#include "unicode/std_string.h"
// Arghh! I wish C++ literals were "string".
U_NAMESPACE_BEGIN
/**
* A string-like object that points to a sized piece of memory.
*
* We provide non-explicit singleton constructors so users can pass
* in a "const char*" or a "string" wherever a "StringPiece" is
* expected.
*
* Functions or methods may use StringPiece parameters to accept either a
* "const char*" or a "string" value that will be implicitly converted to a
* StringPiece.
*
* Systematic usage of StringPiece is encouraged as it will reduce unnecessary
* conversions from "const char*" to "string" and back again.
*
* @stable ICU 4.2
*/
class U_COMMON_API StringPiece : public UMemory {
private:
const char* ptr_;
int32_t length_;
public:
/**
* Default constructor, creates an empty StringPiece.
* @stable ICU 4.2
*/
StringPiece() : ptr_(nullptr), length_(0) { }
/**
* Constructs from a NUL-terminated const char * pointer.
* @param str a NUL-terminated const char * pointer
* @stable ICU 4.2
*/
StringPiece(const char* str);
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a NUL-terminated const char8_t * pointer.
* @param str a NUL-terminated const char8_t * pointer
* @stable ICU 67
*/
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
#endif
/**
* Constructs an empty StringPiece.
* Needed for type disambiguation from multiple other overloads.
* @param p nullptr
* @stable ICU 67
*/
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
/**
* Constructs from a std::string.
* @stable ICU 4.2
*/
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a std::u8string.
* @stable ICU 67
*/
StringPiece(const std::u8string& str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) { }
#endif
/**
* Constructs from some other implementation of a string piece class, from any
* C++ record type that has these two methods:
*
* \code{.cpp}
*
* struct OtherStringPieceClass {
* const char* data(); // or const char8_t*
* size_t size();
* };
*
* \endcode
*
* The other string piece class will typically be std::string_view from C++17
* or absl::string_view from Abseil.
*
* Starting with C++20, data() may also return a const char8_t* pointer,
* as from std::u8string_view.
*
* @param str the other string piece
* @stable ICU 65
*/
template <typename T,
typename = typename std::enable_if<
(std::is_same<decltype(T().data()), const char*>::value
#if defined(__cpp_char8_t)
|| std::is_same<decltype(T().data()), const char8_t*>::value
#endif
) &&
std::is_same<decltype(T().size()), size_t>::value>::type>
StringPiece(T str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) {}
/**
* Constructs from a const char * pointer and a specified length.
* @param offset a const char * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
* @stable ICU 4.2
*/
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a const char8_t * pointer and a specified length.
* @param str a const char8_t * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
* @stable ICU 67
*/
StringPiece(const char8_t* str, int32_t len) :
StringPiece(reinterpret_cast<const char*>(str), len) {}
#endif
/**
* Substring of another StringPiece.
* @param x the other StringPiece
* @param pos start position in x; must be non-negative and <= x.length().
* @stable ICU 4.2
*/
StringPiece(const StringPiece& x, int32_t pos);
/**
* Substring of another StringPiece.
* @param x the other StringPiece
* @param pos start position in x; must be non-negative and <= x.length().
* @param len length of the substring;
* must be non-negative and will be pinned to at most x.length() - pos.
* @stable ICU 4.2
*/
StringPiece(const StringPiece& x, int32_t pos, int32_t len);
/**
* Returns the string pointer. May be nullptr if it is empty.
*
* data() may return a pointer to a buffer with embedded NULs, and the
* returned buffer may or may not be null terminated. Therefore it is
* typically a mistake to pass data() to a routine that expects a NUL
* terminated string.
* @return the string pointer
* @stable ICU 4.2
*/
const char* data() const { return ptr_; }
/**
* Returns the string length. Same as length().
* @return the string length
* @stable ICU 4.2
*/
int32_t size() const { return length_; }
/**
* Returns the string length. Same as size().
* @return the string length
* @stable ICU 4.2
*/
int32_t length() const { return length_; }
/**
* Returns whether the string is empty.
* @return true if the string is empty
* @stable ICU 4.2
*/
UBool empty() const { return length_ == 0; }
/**
* Sets to an empty string.
* @stable ICU 4.2
*/
void clear() { ptr_ = nullptr; length_ = 0; }
/**
* Reset the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be nul terminated.
* @param len the length of the new data
* @stable ICU 4.8
*/
void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
/**
* Reset the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
* @stable ICU 4.8
*/
void set(const char* str);
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Resets the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be NUL-terminated.
* @param len the length of the new data
* @stable ICU 67
*/
inline void set(const char8_t* xdata, int32_t len) {
set(reinterpret_cast<const char*>(xdata), len);
}
/**
* Resets the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
* @stable ICU 67
*/
inline void set(const char8_t* str) {
set(reinterpret_cast<const char*>(str));
}
#endif
/**
* Removes the first n string units.
* @param n prefix length, must be non-negative and <=length()
* @stable ICU 4.2
*/
void remove_prefix(int32_t n) {
if (n >= 0) {
if (n > length_) {
n = length_;
}
ptr_ += n;
length_ -= n;
}
}
/**
* Removes the last n string units.
* @param n suffix length, must be non-negative and <=length()
* @stable ICU 4.2
*/
void remove_suffix(int32_t n) {
if (n >= 0) {
if (n <= length_) {
length_ -= n;
} else {
length_ = 0;
}
}
}
/**
* Searches the StringPiece for the given search string (needle);
* @param needle The string for which to search.
* @param offset Where to start searching within this string (haystack).
* @return The offset of needle in haystack, or -1 if not found.
* @stable ICU 67
*/
int32_t find(StringPiece needle, int32_t offset);
/**
* Compares this StringPiece with the other StringPiece, with semantics
* similar to std::string::compare().
* @param other The string to compare to.
* @return below zero if this < other; above zero if this > other; 0 if this == other.
* @stable ICU 67
*/
int32_t compare(StringPiece other);
/**
* Maximum integer, used as a default value for substring methods.
* @stable ICU 4.2
*/
static const int32_t npos; // = 0x7fffffff;
/**
* Returns a substring of this StringPiece.
* @param pos start position; must be non-negative and <= length().
* @param len length of the substring;
* must be non-negative and will be pinned to at most length() - pos.
* @return the substring StringPiece
* @stable ICU 4.2
*/
StringPiece substr(int32_t pos, int32_t len = npos) const {
return StringPiece(*this, pos, len);
}
};
/**
* Global operator == for StringPiece
* @param x The first StringPiece to compare.
* @param y The second StringPiece to compare.
* @return true if the string data is equal
* @stable ICU 4.8
*/
U_EXPORT UBool U_EXPORT2
operator==(const StringPiece& x, const StringPiece& y);
/**
* Global operator != for StringPiece
* @param x The first StringPiece to compare.
* @param y The second StringPiece to compare.
* @return true if the string data is not equal
* @stable ICU 4.8
*/
inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
return !(x == y);
}
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif // __STRINGPIECE_H__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,465 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_err.h:
*/
/**
* \file
* \brief C UConverter predefined error callbacks
*
* <h2>Error Behaviour Functions</h2>
* Defines some error behaviour functions called by ucnv_{from,to}Unicode
* These are provided as part of ICU and many are stable, but they
* can also be considered only as an example of what can be done with
* callbacks. You may of course write your own.
*
* If you want to write your own, you may also find the functions from
* ucnv_cb.h useful when writing your own callbacks.
*
* These functions, although public, should NEVER be called directly.
* They should be used as parameters to the ucnv_setFromUCallback
* and ucnv_setToUCallback functions, to set the behaviour of a converter
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
*
* usage example: 'STOP' doesn't need any context, but newContext
* could be set to something other than 'NULL' if needed. The available
* contexts in this header can modify the default behavior of the callback.
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setFromUCallBack(myConverter,
* UCNV_FROM_U_CALLBACK_STOP,
* NULL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
* and ucnv_setToUCallBack would need to be called in order to change
* that behavior too.
*
* Here is an example with a context:
*
* \code
* UErrorCode err = U_ZERO_ERROR;
* UConverter *myConverter = ucnv_open("ibm-949", &err);
* const void *oldContext;
* UConverterFromUCallback oldAction;
*
*
* if (U_SUCCESS(err))
* {
* ucnv_setToUCallBack(myConverter,
* UCNV_TO_U_CALLBACK_SUBSTITUTE,
* UCNV_SUB_STOP_ON_ILLEGAL,
* &oldAction,
* &oldContext,
* &status);
* }
* \endcode
*
* The code above tells "myConverter" to stop when it encounters an
* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
* Codepage -> Unicode. Any unmapped and legal characters will be
* substituted to be the default substitution character.
*/
#ifndef UCNV_ERR_H
#define UCNV_ERR_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_CONVERSION
/** Forward declaring the UConverter structure. @stable ICU 2.0 */
struct UConverter;
/** @stable ICU 2.0 */
typedef struct UConverter UConverter;
/**
* FROM_U, TO_U context options for sub callback
* @stable ICU 2.0
*/
#define UCNV_SUB_STOP_ON_ILLEGAL "i"
/**
* FROM_U, TO_U context options for skip callback
* @stable ICU 2.0
*/
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_ICU NULL
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_JAVA "J"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
* TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_C "C"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_DEC "D"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_XML_HEX "X"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
* @stable ICU 2.0
*/
#define UCNV_ESCAPE_UNICODE "U"
/**
* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
* a backslash, 1..6 hex digits, and a space)
* @stable ICU 4.0
*/
#define UCNV_ESCAPE_CSS2 "S"
/**
* The process condition code to be used with the callbacks.
* Codes which are greater than UCNV_IRREGULAR should be
* passed on to any chained callbacks.
* @stable ICU 2.0
*/
typedef enum {
UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
\\x81\\x2E is illegal in SJIS because \\x2E
is not a valid trail byte for the \\x81
lead byte.
Also, starting with Unicode 3.0.1, non-shortest byte sequences
in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
are also illegal, not just irregular.
The error code U_ILLEGAL_CHAR_FOUND will be set. */
UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
are irregular UTF-8 byte sequences for single surrogate
code points.
The error code U_INVALID_CHAR_FOUND will be set. */
UCNV_RESET = 3, /**< The callback is called with this reason when a
'reset' has occurred. Callback should reset all
state. */
UCNV_CLOSE = 4, /**< Called when the converter is closed. The
callback should release any allocated memory.*/
UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
converter. the pointer available as the
'context' is an alias to the original converters'
context pointer. If the context must be owned
by the new converter, the callback must clone
the data and call ucnv_setFromUCallback
(or setToUCallback) with the correct pointer.
@stable ICU 2.2
*/
} UConverterCallbackReason;
/**
* The structure for the fromUnicode callback function parameter.
* @stable ICU 2.0
*/
typedef struct {
uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterFromUnicodeArgs;
/**
* The structure for the toUnicode callback function parameter.
* @stable ICU 2.0
*/
typedef struct {
uint16_t size; /**< The size of this struct @stable ICU 2.0 */
UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
} UConverterToUnicodeArgs;
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
*
* @param context Pointer to the callback's private data
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err This should always be set to a failure status prior to calling.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
*
* @param context Pointer to the callback's private data
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err This should always be set to a failure status prior to calling.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
* skips only UNASSINGED_SEQUENCE depending on the context parameter
* simply ignoring those characters.
*
* @param context The function currently recognizes the callback options:
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Skips any ILLEGAL_SEQUENCE
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
* UNASSIGNED_SEQUENCE depending on context parameter, with the
* current substitution string for the converter. This is the default
* callback.
*
* @param context The function currently recognizes the callback options:
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Substitutes any ILLEGAL_SEQUENCE
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @see ucnv_setSubstChars
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
* hexadecimal representation of the illegal codepoints
*
* @param context The function currently recognizes the callback options:
* <ul>
* <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* %UD84D%UDC56</li>
* <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \\uD84D\\uDC56</li>
* <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
* representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \\U00023456</li>
* <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
* representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
* In the Event the converter doesn't support the characters {&amp;,#}[0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* &amp;#144470; and Zero padding is ignored.</li>
* <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
* representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
* In the Event the converter doesn't support the characters {&,#,x}[0-9],
* it will substitute the illegal sequence with the substitution characters.
* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
* \htmlonly&amp;#x23456;\endhtmlonly</li>
* </ul>
* @param fromUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
const void *context,
UConverterFromUnicodeArgs *fromUArgs,
const UChar* codeUnits,
int32_t length,
UChar32 codePoint,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
* skips only UNASSINGED_SEQUENCE depending on the context parameter
* simply ignoring those characters.
*
* @param context The function currently recognizes the callback options:
* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Skips any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
* UNASSIGNED_SEQUENCE depending on context parameter, with the
* Unicode substitution character, U+FFFD.
*
* @param context The function currently recognizes the callback options:
* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
* returning the error code back to the caller immediately.
* NULL: Substitutes any ILLEGAL_SEQUENCE
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
/**
* DO NOT CALL THIS FUNCTION DIRECTLY!
* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
* hexadecimal representation of the illegal bytes
* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
*
* @param context This function currently recognizes the callback options:
* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
* @param toUArgs Information about the conversion in progress
* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
* @param length Size (in bytes) of the concerned codepage sequence
* @param reason Defines the reason the callback was invoked
* @param err Return value will be set to success if the callback was handled,
* otherwise this value will be set to a failure status.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
const void *context,
UConverterToUnicodeArgs *toUArgs,
const char* codeUnits,
int32_t length,
UConverterCallbackReason reason,
UErrorCode * err);
#endif
#endif
/*UCNV_ERR_H*/

View File

@ -1,456 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002sep19
* created by: Markus W. Scherer
*/
#ifndef __UCONFIG_H__
#define __UCONFIG_H__
/*!
* \file
* \brief User-configurable settings
*
* Miscellaneous switches:
*
* A number of macros affect a variety of minor aspects of ICU.
* Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
* and moved here to make them easier to find.
*
* Switches for excluding parts of ICU library code modules:
*
* Changing these macros allows building partial, smaller libraries for special purposes.
* By default, all modules are built.
* The switches are fairly coarse, controlling large modules.
* Basic services cannot be turned off.
*
* Building with any of these options does not guarantee that the
* ICU build process will completely work. It is recommended that
* the ICU libraries and data be built using the normal build.
* At that time you should remove the data used by those services.
* After building the ICU data library, you should rebuild the ICU
* libraries with these switches customized to your needs.
*
* @stable ICU 2.4
*/
/**
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
* prior to determining default settings for uconfig variables.
*
* @internal ICU 4.0
*/
#if defined(UCONFIG_USE_LOCAL)
#include "uconfig_local.h"
#endif
/**
* \def U_DEBUG
* Determines whether to include debugging code.
* Automatically set on Windows, but most compilers do not have
* related predefined macros.
* @internal
*/
#ifdef U_DEBUG
/* Use the predefined value. */
#elif defined(_DEBUG)
/*
* _DEBUG is defined by Visual Studio debug compilation.
* Do *not* test for its NDEBUG macro: It is an orthogonal macro
* which disables assert().
*/
# define U_DEBUG 1
# else
# define U_DEBUG 0
#endif
/**
* Determines whether to enable auto cleanup of libraries.
* @internal
*/
#ifndef UCLN_NO_AUTO_CLEANUP
#define UCLN_NO_AUTO_CLEANUP 1
#endif
/**
* \def U_DISABLE_RENAMING
* Determines whether to disable renaming or not.
* @internal
*/
#ifndef U_DISABLE_RENAMING
#define U_DISABLE_RENAMING 0
#endif
/**
* \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
* Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
* utypes.h includes those headers if this macro is defined to 0.
* Otherwise, each those headers must be included explicitly when using one of their macros.
* Defaults to 0 for backward compatibility, except inside ICU.
* @stable ICU 49
*/
#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
/* Use the predefined value. */
#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
#else
# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
#endif
/**
* \def U_OVERRIDE_CXX_ALLOCATION
* Determines whether to override new and delete.
* ICU is normally built such that all of its C++ classes, via their UMemory base,
* override operators new and delete to use its internal, customizable,
* non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
*
* This is especially important when the application and its libraries use multiple heaps.
* For example, on Windows, this allows the ICU DLL to be used by
* applications that statically link the C Runtime library.
*
* @stable ICU 2.2
*/
#ifndef U_OVERRIDE_CXX_ALLOCATION
#define U_OVERRIDE_CXX_ALLOCATION 1
#endif
/**
* \def U_ENABLE_TRACING
* Determines whether to enable tracing.
* @internal
*/
#ifndef U_ENABLE_TRACING
#define U_ENABLE_TRACING 0
#endif
/**
* \def UCONFIG_ENABLE_PLUGINS
* Determines whether to enable ICU plugins.
* @internal
*/
#ifndef UCONFIG_ENABLE_PLUGINS
#define UCONFIG_ENABLE_PLUGINS 0
#endif
/**
* \def U_ENABLE_DYLOAD
* Whether to enable Dynamic loading in ICU.
* @internal
*/
#ifndef U_ENABLE_DYLOAD
#define U_ENABLE_DYLOAD 1
#endif
/**
* \def U_CHECK_DYLOAD
* Whether to test Dynamic loading as an OS capability.
* @internal
*/
#ifndef U_CHECK_DYLOAD
#define U_CHECK_DYLOAD 1
#endif
/**
* \def U_DEFAULT_SHOW_DRAFT
* Do we allow ICU users to use the draft APIs by default?
* @internal
*/
#ifndef U_DEFAULT_SHOW_DRAFT
#define U_DEFAULT_SHOW_DRAFT 1
#endif
/*===========================================================================*/
/* Custom icu entry point renaming */
/*===========================================================================*/
/**
* \def U_HAVE_LIB_SUFFIX
* 1 if a custom library suffix is set.
* @internal
*/
#ifdef U_HAVE_LIB_SUFFIX
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
# define U_HAVE_LIB_SUFFIX 1
#endif
/**
* \def U_LIB_SUFFIX_C_NAME_STRING
* Defines the library suffix as a string with C syntax.
* @internal
*/
#ifdef U_LIB_SUFFIX_C_NAME_STRING
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME)
# define CONVERT_TO_STRING(s) #s
# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
#else
# define U_LIB_SUFFIX_C_NAME_STRING ""
#endif
/* common/i18n library switches --------------------------------------------- */
/**
* \def UCONFIG_ONLY_COLLATION
* This switch turns off modules that are not needed for collation.
*
* It does not turn off legacy conversion because that is necessary
* for ICU to work on EBCDIC platforms (for the default converter).
* If you want "only collation" and do not build for EBCDIC,
* then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_ONLY_COLLATION
# define UCONFIG_ONLY_COLLATION 0
#endif
#if UCONFIG_ONLY_COLLATION
/* common library */
# define UCONFIG_NO_BREAK_ITERATION 1
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_NO_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_FORMATTING 1
# define UCONFIG_NO_TRANSLITERATION 1
# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
#endif
/* common library switches -------------------------------------------------- */
/**
* \def UCONFIG_NO_FILE_IO
* This switch turns off all file access in the common library
* where file access is only used for data loading.
* ICU data must then be provided in the form of a data DLL (or with an
* equivalent way to link to the data residing in an executable,
* as in building a combined library with both the common library's code and
* the data), or via udata_setCommonData().
* Application data must be provided via udata_setAppData() or by using
* "open" functions that take pointers to data, for example ucol_openBinary().
*
* File access is not used at all in the i18n library.
*
* File access cannot be turned off for the icuio library or for the ICU
* test suites and ICU tools.
*
* @stable ICU 3.6
*/
#ifndef UCONFIG_NO_FILE_IO
# define UCONFIG_NO_FILE_IO 0
#endif
#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
# error Contradictory file io switches in uconfig.h.
#endif
/**
* \def UCONFIG_NO_CONVERSION
* ICU will not completely build (compiling the tools fails) with this
* switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
* in utypes.h if char* strings in your environment are always in UTF-8.
*
* @stable ICU 3.2
* @see U_CHARSET_IS_UTF8
*/
#ifndef UCONFIG_NO_CONVERSION
# define UCONFIG_NO_CONVERSION 0
#endif
#if UCONFIG_NO_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 1
#endif
/**
* \def UCONFIG_ONLY_HTML_CONVERSION
* This switch turns off all of the converters NOT listed in
* the HTML encoding standard:
* http://www.w3.org/TR/encoding/#names-and-labels
*
* This is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 55
*/
#ifndef UCONFIG_ONLY_HTML_CONVERSION
# define UCONFIG_ONLY_HTML_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_LEGACY_CONVERSION
* This switch turns off all converters except for
* - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
* - US-ASCII
* - ISO-8859-1
*
* Turning off legacy conversion is not possible on EBCDIC platforms
* because they need ibm-37 or ibm-1047 default converters.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_LEGACY_CONVERSION
# define UCONFIG_NO_LEGACY_CONVERSION 0
#endif
/**
* \def UCONFIG_NO_NORMALIZATION
* This switch turns off normalization.
* It implies turning off several other services as well, for example
* collation and IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0
#endif
#if UCONFIG_NO_NORMALIZATION
/* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1
/* IDNA (UTS #46) is implemented via normalization */
# define UCONFIG_NO_IDNA 1
/* i18n library */
# if UCONFIG_ONLY_COLLATION
# error Contradictory collation switches in uconfig.h.
# endif
# define UCONFIG_NO_COLLATION 1
# define UCONFIG_NO_TRANSLITERATION 1
#endif
/**
* \def UCONFIG_NO_BREAK_ITERATION
* This switch turns off break iteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_BREAK_ITERATION
# define UCONFIG_NO_BREAK_ITERATION 0
#endif
/**
* \def UCONFIG_NO_IDNA
* This switch turns off IDNA.
*
* @stable ICU 2.6
*/
#ifndef UCONFIG_NO_IDNA
# define UCONFIG_NO_IDNA 0
#endif
/**
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
* Determines the default UMessagePatternApostropheMode.
* See the documentation for that enum.
*
* @stable ICU 4.8
*/
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
#endif
/**
* \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
* On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
* if the Windows platform APIs are used for LCID<->Locale Name conversions.
* Otherwise, only the built-in ICU tables are used.
*
* @internal ICU 64
*/
#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
#endif
/* i18n library switches ---------------------------------------------------- */
/**
* \def UCONFIG_NO_COLLATION
* This switch turns off collation and collation-based string search.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_COLLATION
# define UCONFIG_NO_COLLATION 0
#endif
/**
* \def UCONFIG_NO_FORMATTING
* This switch turns off formatting and calendar/timezone services.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_FORMATTING
# define UCONFIG_NO_FORMATTING 0
#endif
/**
* \def UCONFIG_NO_TRANSLITERATION
* This switch turns off transliteration.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_TRANSLITERATION
# define UCONFIG_NO_TRANSLITERATION 0
#endif
/**
* \def UCONFIG_NO_REGULAR_EXPRESSIONS
* This switch turns off regular expressions.
*
* @stable ICU 2.4
*/
#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
#endif
/**
* \def UCONFIG_NO_SERVICE
* This switch turns off service registration.
*
* @stable ICU 3.2
*/
#ifndef UCONFIG_NO_SERVICE
# define UCONFIG_NO_SERVICE 0
#endif
/**
* \def UCONFIG_HAVE_PARSEALLINPUT
* This switch turns on the "parse all input" attribute. Binary incompatible.
*
* @internal
*/
#ifndef UCONFIG_HAVE_PARSEALLINPUT
# define UCONFIG_HAVE_PARSEALLINPUT 1
#endif
/**
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
* This switch turns off filtered break iteration code.
*
* @internal
*/
#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
#endif
#endif // __UCONFIG_H__

View File

@ -1,159 +0,0 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucpmap.h
// created: 2018sep03 Markus W. Scherer
#ifndef __UCPMAP_H__
#define __UCPMAP_H__
#include "unicode/utypes.h"
U_CDECL_BEGIN
/**
* \file
*
* This file defines an abstract map from Unicode code points to integer values.
*
* @see UCPMap
* @see UCPTrie
* @see UMutableCPTrie
*/
/**
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
*
* @see UCPTrie
* @see UMutableCPTrie
* @stable ICU 63
*/
typedef struct UCPMap UCPMap;
/**
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
* Most users should use UCPMAP_RANGE_NORMAL.
*
* @see ucpmap_getRange
* @see ucptrie_getRange
* @see umutablecptrie_getRange
* @stable ICU 63
*/
enum UCPMapRangeOption {
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
* Most users should use this option.
* @stable ICU 63
*/
UCPMAP_RANGE_NORMAL,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_LEAD(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @stable ICU 63
*/
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that all surrogates (U+D800..U+DFFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_SURROGATE(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @stable ICU 63
*/
UCPMAP_RANGE_FIXED_ALL_SURROGATES
};
#ifndef U_IN_DOXYGEN
typedef enum UCPMapRangeOption UCPMapRangeOption;
#endif
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
*
* @param map the map
* @param c the code point
* @return the map value,
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
* @stable ICU 63
*/
U_CAPI uint32_t U_EXPORT2
ucpmap_get(const UCPMap *map, UChar32 c);
/**
* Callback function type: Modifies a map value.
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
* The modified value will be returned by the getRange function.
*
* Can be used to ignore some of the value bits,
* make a filter for one of several values,
* return a value index computed from the map value, etc.
*
* @param context an opaque pointer, as passed into the getRange function
* @param value a value from the map
* @return the modified value
* @stable ICU 63
*/
typedef uint32_t U_CALLCONV
UCPMapValueFilter(const void *context, uint32_t value);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a map.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* If the UCPMapValueFilter function pointer is not NULL, then
* the value to be delivered is passed through that function, and the return value is the end
* of the range where all values are modified to the same actual value.
* The value is unchanged if that function pointer is NULL.
*
* Example:
* \code
* UChar32 start = 0, end;
* uint32_t value;
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
* NULL, NULL, &value)) >= 0) {
* // Work with the range start..end and its value.
* start = end + 1;
* }
* \endcode
*
* @param map the map
* @param start range start
* @param option defines whether surrogates are treated normally,
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
* @param filter a pointer to a function that may modify the map data value,
* or NULL if the values from the map are to be used unmodified
* @param context an opaque pointer that is passed on to the filter function
* @param pValue if not NULL, receives the value that every code point start..end has;
* may have been modified by filter(context, map value)
* if that function pointer is not NULL
* @return the range end code point, or -1 if start is not a valid code point
* @stable ICU 63
*/
U_CAPI UChar32 U_EXPORT2
ucpmap_getRange(const UCPMap *map, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
U_CDECL_END
#endif

View File

@ -1,440 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1999-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: udata.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct25
* created by: Markus W. Scherer
*/
#ifndef __UDATA_H__
#define __UDATA_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
#endif // U_SHOW_CPLUSPLUS_API
U_CDECL_BEGIN
/**
* \file
* \brief C API: Data loading interface
*
* <h2>Information about data loading interface</h2>
*
* This API is used to find and efficiently load data for ICU and applications
* using ICU. It provides an abstract interface that specifies a data type and
* name to find and load the data. Normally this API is used by other ICU APIs
* to load required data out of the ICU data library, but it can be used to
* load data out of other places.
*
* See the User Guide Data Management chapter.
*/
#ifndef U_HIDE_INTERNAL_API
/**
* Character used to separate package names from tree names
* @internal ICU 3.0
*/
#define U_TREE_SEPARATOR '-'
/**
* String used to separate package names from tree names
* @internal ICU 3.0
*/
#define U_TREE_SEPARATOR_STRING "-"
/**
* Character used to separate parts of entry names
* @internal ICU 3.0
*/
#define U_TREE_ENTRY_SEP_CHAR '/'
/**
* String used to separate parts of entry names
* @internal ICU 3.0
*/
#define U_TREE_ENTRY_SEP_STRING "/"
/**
* Alias for standard ICU data
* @internal ICU 3.0
*/
#define U_ICUDATA_ALIAS "ICUDATA"
#endif /* U_HIDE_INTERNAL_API */
/**
* UDataInfo contains the properties about the requested data.
* This is meta data.
*
* <p>This structure may grow in the future, indicated by the
* <code>size</code> field.</p>
*
* <p>ICU data must be at least 8-aligned, and should be 16-aligned.
* The UDataInfo struct begins 4 bytes after the start of the data item,
* so it is 4-aligned.
*
* <p>The platform data property fields help determine if a data
* file can be efficiently used on a given machine.
* The particular fields are of importance only if the data
* is affected by the properties - if there is integer data
* with word sizes > 1 byte, char* text, or UChar* text.</p>
*
* <p>The implementation for the <code>udata_open[Choice]()</code>
* functions may reject data based on the value in <code>isBigEndian</code>.
* No other field is used by the <code>udata</code> API implementation.</p>
*
* <p>The <code>dataFormat</code> may be used to identify
* the kind of data, e.g. a converter table.</p>
*
* <p>The <code>formatVersion</code> field should be used to
* make sure that the format can be interpreted.
* It may be a good idea to check only for the one or two highest
* of the version elements to allow the data memory to
* get more or somewhat rearranged contents, for as long
* as the using code can still interpret the older contents.</p>
*
* <p>The <code>dataVersion</code> field is intended to be a
* common place to store the source version of the data;
* for data from the Unicode character database, this could
* reflect the Unicode version.</p>
*
* @stable ICU 2.0
*/
typedef struct {
/** sizeof(UDataInfo)
* @stable ICU 2.0 */
uint16_t size;
/** unused, set to 0
* @stable ICU 2.0*/
uint16_t reservedWord;
/* platform data properties */
/** 0 for little-endian machine, 1 for big-endian
* @stable ICU 2.0 */
uint8_t isBigEndian;
/** see U_CHARSET_FAMILY values in utypes.h
* @stable ICU 2.0*/
uint8_t charsetFamily;
/** sizeof(UChar), one of { 1, 2, 4 }
* @stable ICU 2.0*/
uint8_t sizeofUChar;
/** unused, set to 0
* @stable ICU 2.0*/
uint8_t reservedByte;
/** data format identifier
* @stable ICU 2.0*/
uint8_t dataFormat[4];
/** versions: [0] major [1] minor [2] milli [3] micro
* @stable ICU 2.0*/
uint8_t formatVersion[4];
/** versions: [0] major [1] minor [2] milli [3] micro
* @stable ICU 2.0*/
uint8_t dataVersion[4];
} UDataInfo;
/* API for reading data -----------------------------------------------------*/
/**
* Forward declaration of the data memory type.
* @stable ICU 2.0
*/
typedef struct UDataMemory UDataMemory;
/**
* Callback function for udata_openChoice().
* @param context parameter passed into <code>udata_openChoice()</code>.
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
* It may be <code>NULL</code>.
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
* @param pInfo A pointer to the <code>UDataInfo</code> structure
* of data that has been loaded and will be returned
* by <code>udata_openChoice()</code> if this function
* returns <code>true</code>.
* @return true if the current data memory is acceptable
* @stable ICU 2.0
*/
typedef UBool U_CALLCONV
UDataMemoryIsAcceptable(void *context,
const char *type, const char *name,
const UDataInfo *pInfo);
/**
* Convenience function.
* This function works the same as <code>udata_openChoice</code>
* except that any data that matches the type and name
* is assumed to be acceptable.
* @param path Specifies an absolute path and/or a basename for the
* finding of the data in the file system.
* <code>NULL</code> for ICU data.
* @param type A string that specifies the type of data to be loaded.
* For example, resource bundles are loaded with type "res",
* conversion tables with type "cnv".
* This may be <code>NULL</code> or empty.
* @param name A string that specifies the name of the data.
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
* if an error occurs. Call <code>udata_getMemory()</code>
* to get a pointer to the actual data.
*
* @see udata_openChoice
* @stable ICU 2.0
*/
U_CAPI UDataMemory * U_EXPORT2
udata_open(const char *path, const char *type, const char *name,
UErrorCode *pErrorCode);
/**
* Data loading function.
* This function is used to find and load efficiently data for
* ICU and applications using ICU.
* It provides an abstract interface that allows to specify a data
* type and name to find and load the data.
*
* <p>The implementation depends on platform properties and user preferences
* and may involve loading shared libraries (DLLs), mapping
* files into memory, or fopen()/fread() files.
* It may also involve using static memory or database queries etc.
* Several or all data items may be combined into one entity
* (DLL, memory-mappable file).</p>
*
* <p>The data is always preceded by a header that includes
* a <code>UDataInfo</code> structure.
* The caller's <code>isAcceptable()</code> function is called to make
* sure that the data is useful. It may be called several times if it
* rejects the data and there is more than one location with data
* matching the type and name.</p>
*
* <p>If <code>path==NULL</code>, then ICU data is loaded.
* Otherwise, it is separated into a basename and a basename-less directory string.
* The basename is used as the data package name, and the directory is
* logically prepended to the ICU data directory string.</p>
*
* <p>For details about ICU data loading see the User Guide
* Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
*
* @param path Specifies an absolute path and/or a basename for the
* finding of the data in the file system.
* <code>NULL</code> for ICU data.
* @param type A string that specifies the type of data to be loaded.
* For example, resource bundles are loaded with type "res",
* conversion tables with type "cnv".
* This may be <code>NULL</code> or empty.
* @param name A string that specifies the name of the data.
* @param isAcceptable This function is called to verify that loaded data
* is useful for the client code. If it returns false
* for all data items, then <code>udata_openChoice()</code>
* will return with an error.
* @param context Arbitrary parameter to be passed into isAcceptable.
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
* if an error occurs. Call <code>udata_getMemory()</code>
* to get a pointer to the actual data.
* @stable ICU 2.0
*/
U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char *path, const char *type, const char *name,
UDataMemoryIsAcceptable *isAcceptable, void *context,
UErrorCode *pErrorCode);
/**
* Close the data memory.
* This function must be called to allow the system to
* release resources associated with this data memory.
* @param pData The pointer to data memory object
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
udata_close(UDataMemory *pData);
/**
* Get the pointer to the actual data inside the data memory.
* The data is read-only.
*
* ICU data must be at least 8-aligned, and should be 16-aligned.
*
* @param pData The pointer to data memory object
* @stable ICU 2.0
*/
U_CAPI const void * U_EXPORT2
udata_getMemory(UDataMemory *pData);
/**
* Get the information from the data memory header.
* This allows to get access to the header containing
* platform data properties etc. which is not part of
* the data itself and can therefore not be accessed
* via the pointer that <code>udata_getMemory()</code> returns.
*
* @param pData pointer to the data memory object
* @param pInfo pointer to a UDataInfo object;
* its <code>size</code> field must be set correctly,
* typically to <code>sizeof(UDataInfo)</code>.
*
* <code>*pInfo</code> will be filled with the UDataInfo structure
* in the data memory object. If this structure is smaller than
* <code>pInfo->size</code>, then the <code>size</code> will be
* adjusted and only part of the structure will be filled.
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
/**
* This function bypasses the normal ICU data loading process and
* allows you to force ICU's system data to come out of a user-specified
* area in memory.
*
* ICU data must be at least 8-aligned, and should be 16-aligned.
* See https://unicode-org.github.io/icu/userguide/icudata
*
* The format of this data is that of the icu common data file, as is
* generated by the pkgdata tool with mode=common or mode=dll.
* You can read in a whole common mode file and pass the address to the start of the
* data, or (with the appropriate link options) pass in the pointer to
* the data that has been loaded from a dll by the operating system,
* as shown in this code:
*
* extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
* // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
* UErrorCode status = U_ZERO_ERROR;
*
* udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
*
* It is important that the declaration be as above. The entry point
* must not be declared as an extern void*.
*
* Starting with ICU 4.4, it is possible to set several data packages,
* one per call to this function.
* udata_open() will look for data in the multiple data packages in the order
* in which they were set.
* The position of the linked-in or default-name ICU .data package in the
* search list depends on when the first data item is loaded that is not contained
* in the already explicitly set packages.
* If data was loaded implicitly before the first call to this function
* (for example, via opening a converter, constructing a UnicodeString
* from default-codepage data, using formatting or collation APIs, etc.),
* then the default data will be first in the list.
*
* This function has no effect on application (non ICU) data. See udata_setAppData()
* for similar functionality for application data.
*
* @param data pointer to ICU common data
* @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
udata_setCommonData(const void *data, UErrorCode *err);
/**
* This function bypasses the normal ICU data loading process for application-specific
* data and allows you to force the it to come out of a user-specified
* pointer.
*
* ICU data must be at least 8-aligned, and should be 16-aligned.
* See https://unicode-org.github.io/icu/userguide/icudata
*
* The format of this data is that of the icu common data file, like 'icudt26l.dat'
* or the corresponding shared library (DLL) file.
* The application must read in or otherwise construct an image of the data and then
* pass the address of it to this function.
*
*
* Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
* data with the specifed path that has already been opened, or
* if setAppData with the same path has already been called.
* Any such calls to setAppData will have no effect.
*
*
* @param packageName the package name by which the application will refer
* to (open) this data
* @param data pointer to the data
* @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
* @see udata_setCommonData
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
/**
* Possible settings for udata_setFileAccess()
* @see udata_setFileAccess
* @stable ICU 3.4
*/
typedef enum UDataFileAccess {
/** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
UDATA_FILES_FIRST,
/** An alias for the default access mode. @stable ICU 3.4 */
UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
/** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
UDATA_ONLY_PACKAGES,
/** ICU loads data from packages first, and only from single files
if the data cannot be found in a package. @stable ICU 3.4 */
UDATA_PACKAGES_FIRST,
/** ICU does not access the file system for data loading. @stable ICU 3.4 */
UDATA_NO_FILES,
#ifndef U_HIDE_DEPRECATED_API
/**
* Number of real UDataFileAccess values.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UDATA_FILE_ACCESS_COUNT
#endif // U_HIDE_DEPRECATED_API
} UDataFileAccess;
/**
* This function may be called to control how ICU loads data. It must be called
* before any ICU data is loaded, including application data loaded with
* ures/ResourceBundle or udata APIs. This function is not multithread safe.
* The results of calling it while other threads are loading data are undefined.
* @param access The type of file access to be used
* @param status Error code.
* @see UDataFileAccess
* @stable ICU 3.4
*/
U_CAPI void U_EXPORT2
udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
U_CDECL_END
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUDataMemoryPointer
* "Smart pointer" class, closes a UDataMemory via udata_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
U_NAMESPACE_END
#endif // U_SHOW_CPLUSPLUS_API
#endif

View File

@ -1,209 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uenum.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:2
*
* created on: 2002jul08
* created by: Vladimir Weinstein
*/
#ifndef __UENUM_H
#define __UENUM_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/localpointer.h"
U_NAMESPACE_BEGIN
class StringEnumeration;
U_NAMESPACE_END
#endif // U_SHOW_CPLUSPLUS_API
/**
* \file
* \brief C API: String Enumeration
*/
/**
* An enumeration object.
* For usage in C programs.
* @stable ICU 2.2
*/
struct UEnumeration;
/** structure representing an enumeration object instance @stable ICU 2.2 */
typedef struct UEnumeration UEnumeration;
/**
* Disposes of resources in use by the iterator. If en is NULL,
* does nothing. After this call, any char* or UChar* pointer
* returned by uenum_unext() or uenum_next() is invalid.
* @param en UEnumeration structure pointer
* @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_close(UEnumeration* en);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUEnumerationPointer
* "Smart pointer" class, closes a UEnumeration via uenum_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @stable ICU 4.4
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
U_NAMESPACE_END
#endif
/**
* Returns the number of elements that the iterator traverses. If
* the iterator is out-of-sync with its service, status is set to
* U_ENUM_OUT_OF_SYNC_ERROR.
* This is a convenience function. It can end up being very
* expensive as all the items might have to be pre-fetched (depending
* on the type of data being traversed). Use with caution and only
* when necessary.
* @param en UEnumeration structure pointer
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
* iterator is out of sync.
* @return number of elements in the iterator
* @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
uenum_count(UEnumeration* en, UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a char* string,
* it is converted to UChar* with the invariant converter.
* The result is terminated by (UChar)0.
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_CAPI const UChar* U_EXPORT2
uenum_unext(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Returns the next element in the iterator's list. If there are
* no more elements, returns NULL. If the iterator is out-of-sync
* with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
* NULL is returned. If the native service string is a UChar*
* string, it is converted to char* with the invariant converter.
* The result is terminated by (char)0. If the conversion fails
* (because a character cannot be converted) then status is set to
* U_INVARIANT_CONVERSION_ERROR and the return value is undefined
* (but non-NULL).
* @param en the iterator object
* @param resultLength pointer to receive the length of the result
* (not including the terminating \\0).
* If the pointer is NULL it is ignored.
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service. Set to
* U_INVARIANT_CONVERSION_ERROR if the underlying native string is
* UChar* and conversion to char* with the invariant converter
* fails. This error pertains only to current string, so iteration
* might be able to continue successfully.
* @return a pointer to the string. The string will be
* zero-terminated. The return pointer is owned by this iterator
* and must not be deleted by the caller. The pointer is valid
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
* @stable ICU 2.2
*/
U_CAPI const char* U_EXPORT2
uenum_next(UEnumeration* en,
int32_t* resultLength,
UErrorCode* status);
/**
* Resets the iterator to the current list of service IDs. This
* re-establishes sync with the service and rewinds the iterator
* to start at the first element.
* @param en the iterator object
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
* @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_reset(UEnumeration* en, UErrorCode* status);
#if U_SHOW_CPLUSPLUS_API
/**
* Given a StringEnumeration, wrap it in a UEnumeration. The
* StringEnumeration is adopted; after this call, the caller must not
* delete it (regardless of error status).
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
* @param ec the error code.
* @return a UEnumeration wrapping the adopted StringEnumeration.
* @stable ICU 4.2
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
#endif
/**
* Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
* @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
* @see uenum_close
* @stable ICU 50
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
UErrorCode* ec);
/**
* Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
* Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
* \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
* @param strings array of char* strings (each null terminated). All storage is owned by the caller.
* @param count length of the array
* @param ec error code
* @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
* @see uenum_close
* @stable ICU 50
*/
U_CAPI UEnumeration* U_EXPORT2
uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
UErrorCode* ec);
#endif

View File

@ -1,709 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 2002-2011 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: uiter.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan18
* created by: Markus W. Scherer
*/
#ifndef __UITER_H__
#define __UITER_H__
/**
* \file
* \brief C API: Unicode Character Iteration
*
* @see UCharIterator
*/
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
class CharacterIterator;
class Replaceable;
U_NAMESPACE_END
#endif
U_CDECL_BEGIN
struct UCharIterator;
typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
/**
* Origin constants for UCharIterator.getIndex() and UCharIterator.move().
* @see UCharIteratorMove
* @see UCharIterator
* @stable ICU 2.1
*/
typedef enum UCharIteratorOrigin {
UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
} UCharIteratorOrigin;
/** Constants for UCharIterator. @stable ICU 2.6 */
enum {
/**
* Constant value that may be returned by UCharIteratorMove
* indicating that the final UTF-16 index is not known, but that the move succeeded.
* This can occur when moving relative to limit or length, or
* when moving relative to the current index after a setState()
* when the current UTF-16 index is not known.
*
* It would be very inefficient to have to count from the beginning of the text
* just to get the current/limit/length index after moving relative to it.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
*
* @stable ICU 2.6
*/
UITER_UNKNOWN_INDEX=-2
};
/**
* Constant for UCharIterator getState() indicating an error or
* an unknown state.
* Returned by uiter_getState()/UCharIteratorGetState
* when an error occurs.
* Also, some UCharIterator implementations may not be able to return
* a valid state for each position. This will be clearly documented
* for each such iterator (none of the public ones here).
*
* @stable ICU 2.6
*/
#define UITER_NO_STATE ((uint32_t)0xffffffff)
/**
* Function type declaration for UCharIterator.getIndex().
*
* Gets the current position, or the start or limit of the
* iteration range.
*
* This function may perform slowly for UITER_CURRENT after setState() was called,
* or for UITER_LENGTH, because an iterator implementation may have to count
* UChars if the underlying storage is not UTF-16.
*
* @param iter the UCharIterator structure ("this pointer")
* @param origin get the 0, start, limit, length, or current index
* @return the requested index, or U_SENTINEL in an error condition
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.move().
*
* Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
*
* Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
* Out of bounds movement will be pinned to the start or limit.
*
* This function may perform slowly for moving relative to UITER_LENGTH
* because an iterator implementation may have to count the rest of the
* UChars if the native storage is not UTF-16.
*
* When moving relative to the limit or length, or
* relative to the current position after setState() was called,
* move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
* determination of the actual UTF-16 index.
* The actual index can be determined with getIndex(UITER_CURRENT)
* which will count the UChars if necessary.
* See UITER_UNKNOWN_INDEX for details.
*
* @param iter the UCharIterator structure ("this pointer")
* @param delta can be positive, zero, or negative
* @param origin move relative to the 0, start, limit, length, or current index
* @return the new index, or U_SENTINEL on an error condition,
* or UITER_UNKNOWN_INDEX when the index is not known.
*
* @see UCharIteratorOrigin
* @see UCharIterator
* @see UITER_UNKNOWN_INDEX
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
/**
* Function type declaration for UCharIterator.hasNext().
*
* Check if current() and next() can still
* return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether current() and next() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.hasPrevious().
*
* Check if previous() can still return another code unit.
*
* @param iter the UCharIterator structure ("this pointer")
* @return boolean value for whether previous() can still return another code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UBool U_CALLCONV
UCharIteratorHasPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.current().
*
* Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorCurrent(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.next().
*
* Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code unit (and post-increment the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorNext(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.previous().
*
* Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code unit (after pre-decrementing the current index)
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef UChar32 U_CALLCONV
UCharIteratorPrevious(UCharIterator *iter);
/**
* Function type declaration for UCharIterator.reservedFn().
* Reserved for future use.
*
* @param iter the UCharIterator structure ("this pointer")
* @param something some integer argument
* @return some integer
*
* @see UCharIterator
* @stable ICU 2.1
*/
typedef int32_t U_CALLCONV
UCharIteratorReserved(UCharIterator *iter, int32_t something);
/**
* Function type declaration for UCharIterator.getState().
*
* Get the "state" of the iterator in the form of a single 32-bit word.
* It is recommended that the state value be calculated to be as small as
* is feasible. For strings with limited lengths, fewer than 32 bits may
* be sufficient.
*
* This is used together with setState()/UCharIteratorSetState
* to save and restore the iterator position more efficiently than with
* getIndex()/move().
*
* The iterator state is defined as a uint32_t value because it is designed
* for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
* of the character iterator.
*
* With some UCharIterator implementations (e.g., UTF-8),
* getting and setting the UTF-16 index with existing functions
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
* relatively slow because the iterator has to "walk" from a known index
* to the requested one.
* This takes more time the farther it needs to go.
*
* An opaque state value allows an iterator implementation to provide
* an internal index (UTF-8: the source byte array index) for
* fast, constant-time restoration.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorSetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
typedef uint32_t U_CALLCONV
UCharIteratorGetState(const UCharIterator *iter);
/**
* Function type declaration for UCharIterator.setState().
*
* Restore the "state" of the iterator using a state word from a getState() call.
* The iterator object need not be the same one as for which getState() was called,
* but it must be of the same type (set up using the same uiter_setXYZ function)
* and it must iterate over the same string
* (binary identical regardless of memory address).
* For more about the state word see UCharIteratorGetState.
*
* After calling setState(), a getIndex(UITER_CURRENT) may be slow because
* the UTF-16 index may not be restored as well, but the iterator can deliver
* the correct text contents and move relative to the current position
* without performance degradation.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorGetState
* @stable ICU 2.6
*/
typedef void U_CALLCONV
UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* C API for code unit iteration.
* This can be used as a C wrapper around
* CharacterIterator, Replaceable, or implemented using simple strings, etc.
*
* There are two roles for using UCharIterator:
*
* A "provider" sets the necessary function pointers and controls the "protected"
* fields of the UCharIterator structure. A "provider" passes a UCharIterator
* into C APIs that need a UCharIterator as an abstract, flexible string interface.
*
* Implementations of such C APIs are "callers" of UCharIterator functions;
* they only use the "public" function pointers and never access the "protected"
* fields directly.
*
* The current() and next() functions only check the current index against the
* limit, and previous() only checks the current index against the start,
* to see if the iterator already reached the end of the iteration range.
*
* The assumption - in all iterators - is that the index is moved via the API,
* which means it won't go out of bounds, or the index is modified by
* user code that knows enough about the iterator implementation to set valid
* index values.
*
* UCharIterator functions return code unit values 0..0xffff,
* or U_SENTINEL if the iteration bounds are reached.
*
* @stable ICU 2.1
*/
struct UCharIterator {
/**
* (protected) Pointer to string or wrapped object or similar.
* Not used by caller.
* @stable ICU 2.1
*/
const void *context;
/**
* (protected) Length of string or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t length;
/**
* (protected) Start index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t start;
/**
* (protected) Current index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t index;
/**
* (protected) Limit index or similar.
* Not used by caller.
* @stable ICU 2.1
*/
int32_t limit;
/**
* (protected) Used by UTF-8 iterators and possibly others.
* @stable ICU 2.1
*/
int32_t reservedField;
/**
* (public) Returns the current position or the
* start or limit index of the iteration range.
*
* @see UCharIteratorGetIndex
* @stable ICU 2.1
*/
UCharIteratorGetIndex *getIndex;
/**
* (public) Moves the current position relative to the start or limit of the
* iteration range, or relative to the current position itself.
* The movement is expressed in numbers of code units forward
* or backward by specifying a positive or negative delta.
*
* @see UCharIteratorMove
* @stable ICU 2.1
*/
UCharIteratorMove *move;
/**
* (public) Check if current() and next() can still
* return another code unit.
*
* @see UCharIteratorHasNext
* @stable ICU 2.1
*/
UCharIteratorHasNext *hasNext;
/**
* (public) Check if previous() can still return another code unit.
*
* @see UCharIteratorHasPrevious
* @stable ICU 2.1
*/
UCharIteratorHasPrevious *hasPrevious;
/**
* (public) Return the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorCurrent
* @stable ICU 2.1
*/
UCharIteratorCurrent *current;
/**
* (public) Return the code unit at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @see UCharIteratorNext
* @stable ICU 2.1
*/
UCharIteratorNext *next;
/**
* (public) Decrement the index and return the code unit from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @see UCharIteratorPrevious
* @stable ICU 2.1
*/
UCharIteratorPrevious *previous;
/**
* (public) Reserved for future use. Currently NULL.
*
* @see UCharIteratorReserved
* @stable ICU 2.1
*/
UCharIteratorReserved *reservedFn;
/**
* (public) Return the state of the iterator, to be restored later with setState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorGet
* @stable ICU 2.6
*/
UCharIteratorGetState *getState;
/**
* (public) Restore the iterator state from the state word from a call
* to getState().
* This function pointer is NULL if the iterator does not implement it.
*
* @see UCharIteratorSet
* @stable ICU 2.6
*/
UCharIteratorSetState *setState;
};
/**
* Helper function for UCharIterator to get the code point
* at the current index.
*
* Return the code point that includes the code unit at the current position,
* or U_SENTINEL if there is none (index is at the limit).
* If the current code unit is a lead or trail surrogate,
* then the following or preceding surrogate is used to form
* the code point value.
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point
*
* @see UCharIterator
* @see U16_GET
* @see UnicodeString::char32At()
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_current32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the next code point.
*
* Return the code point at the current index and increment
* the index (post-increment, like s[i++]),
* or return U_SENTINEL if there is none (index is at the limit).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the current code point (and post-increment the current index)
*
* @see UCharIterator
* @see U16_NEXT
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_next32(UCharIterator *iter);
/**
* Helper function for UCharIterator to get the previous code point.
*
* Decrement the index and return the code point from there
* (pre-decrement, like s[--i]),
* or return U_SENTINEL if there is none (index is at the start).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the previous code point (after pre-decrementing the current index)
*
* @see UCharIterator
* @see U16_PREV
* @stable ICU 2.1
*/
U_CAPI UChar32 U_EXPORT2
uiter_previous32(UCharIterator *iter);
/**
* Get the "state" of the iterator in the form of a single 32-bit word.
* This is a convenience function that calls iter->getState(iter)
* if iter->getState is not NULL;
* if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
*
* Some UCharIterator implementations may not be able to return
* a valid state for each position, in which case they return UITER_NO_STATE instead.
* This will be clearly documented for each such iterator (none of the public ones here).
*
* @param iter the UCharIterator structure ("this pointer")
* @return the state word
*
* @see UCharIterator
* @see UCharIteratorGetState
* @see UITER_NO_STATE
* @stable ICU 2.6
*/
U_CAPI uint32_t U_EXPORT2
uiter_getState(const UCharIterator *iter);
/**
* Restore the "state" of the iterator using a state word from a getState() call.
* This is a convenience function that calls iter->setState(iter, state, pErrorCode)
* if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
*
* @param iter the UCharIterator structure ("this pointer")
* @param state the state word from a getState() call
* on a same-type, same-string iterator
* @param pErrorCode Must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see UCharIterator
* @see UCharIteratorSetState
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
/**
* Set up a UCharIterator to iterate over a string.
*
* Sets the UCharIterator function pointers for iteration over the string s
* with iteration boundaries start=index=0 and length=limit=string length.
* The "provider" may set the start, index, and limit values at any time
* within the range 0..length.
* The length field will be ignored.
*
* The string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s String to iterate over
* @param length Length of s, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-16BE string
* (byte vector with a big-endian pair of bytes per UChar).
*
* Everything works just like with a normal UChar iterator (uiter_setString),
* except that UChars are assembled from byte pairs,
* and that the length argument here indicates an even number of bytes.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-16BE string to iterate over
* @param length Length of s as an even number of bytes, or -1 if NUL-terminated
* (NUL means pair of 0 bytes at even index from s)
*
* @see UCharIterator
* @see uiter_setString
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
/**
* Set up a UCharIterator to iterate over a UTF-8 string.
*
* Sets the UCharIterator function pointers for iteration over the UTF-8 string s
* with UTF-8 iteration boundaries 0 and length.
* The implementation counts the UTF-16 index on the fly and
* lazily evaluates the UTF-16 length of the text.
*
* The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
* When the reservedField is not 0, then it contains a supplementary code point
* and the UTF-16 index is between the two corresponding surrogates.
* At that point, the UTF-8 index is behind that code point.
*
* The UTF-8 string pointer s is set into UCharIterator.context without copying
* or reallocating the string contents.
*
* getState() returns a state value consisting of
* - the current UTF-8 source byte index (bits 31..1)
* - a flag (bit 0) that indicates whether the UChar position is in the middle
* of a surrogate pair
* (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
*
* getState() cannot also encode the UTF-16 index in the state value.
* move(relative to limit or length), or
* move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
*
* @param iter UCharIterator structure to be set for iteration
* @param s UTF-8 string to iterate over
* @param length Length of s in bytes, or -1 if NUL-terminated
*
* @see UCharIterator
* @stable ICU 2.6
*/
U_CAPI void U_EXPORT2
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
#if U_SHOW_CPLUSPLUS_API
/**
* Set up a UCharIterator to wrap around a C++ CharacterIterator.
*
* Sets the UCharIterator function pointers for iteration using the
* CharacterIterator charIter.
*
* The CharacterIterator pointer charIter is set into UCharIterator.context
* without copying or cloning the CharacterIterator object.
* The other "protected" UCharIterator fields are set to 0 and will be ignored.
* The iteration index and boundaries are controlled by the CharacterIterator.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param charIter CharacterIterator to wrap
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
/**
* Set up a UCharIterator to iterate over a C++ Replaceable.
*
* Sets the UCharIterator function pointers for iteration over the
* Replaceable rep with iteration boundaries start=index=0 and
* length=limit=rep->length().
* The "provider" may set the start, index, and limit values at any time
* within the range 0..length=rep->length().
* The length field will be ignored.
*
* The Replaceable pointer rep is set into UCharIterator.context without copying
* or cloning/reallocating the Replaceable object.
*
* getState() simply returns the current index.
* move() will always return the final index.
*
* @param iter UCharIterator structure to be set for iteration
* @param rep Replaceable to iterate over
*
* @see UCharIterator
* @stable ICU 2.1
*/
U_CAPI void U_EXPORT2
uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
#endif
U_CDECL_END
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,491 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: umachine.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*
* This file defines basic types and constants for ICU to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*/
#ifndef __UMACHINE_H__
#define __UMACHINE_H__
/**
* \file
* \brief Basic types and constants for UTF
*
* <h2> Basic types and constants for UTF </h2>
* This file defines basic types and constants for utf.h to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*
*/
/*==========================================================================*/
/* Include platform-dependent definitions */
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
/*
* ANSI C headers:
* stddef.h defines wchar_t
*/
#include <stdbool.h>
#include <stddef.h>
/*==========================================================================*/
/* For C wrappers, we use the symbol U_CAPI. */
/* This works properly if the includer is C or C++. */
/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
/*==========================================================================*/
/**
* \def U_CFUNC
* This is used in a declaration of a library private ICU C function.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_BEGIN
* This is used to begin a declaration of a library private ICU C API.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_END
* This is used to end a declaration of a library private ICU C API
* @stable ICU 2.4
*/
#ifdef __cplusplus
# define U_CFUNC extern "C"
# define U_CDECL_BEGIN extern "C" {
# define U_CDECL_END }
#else
# define U_CFUNC extern
# define U_CDECL_BEGIN
# define U_CDECL_END
#endif
#ifndef U_ATTRIBUTE_DEPRECATED
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for GCC specific attributes
* @internal
*/
#if U_GCC_MAJOR_MINOR >= 302
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for Visual C++ specific attributes
* @internal
*/
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
# define U_ATTRIBUTE_DEPRECATED
#endif
#endif
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
#define U_STABLE U_CAPI
/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
#define U_DRAFT U_CAPI
/** This is used to declare a function as a deprecated public ICU C API */
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
#define U_OBSOLETE U_CAPI
/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
#define U_INTERNAL U_CAPI
/**
* \def U_OVERRIDE
* Defined to the C++11 "override" keyword if available.
* Denotes a class or member which is an override of the base class.
* May result in an error if it applied to something not an override.
* @internal
*/
#ifndef U_OVERRIDE
#define U_OVERRIDE override
#endif
/**
* \def U_FINAL
* Defined to the C++11 "final" keyword if available.
* Denotes a class or member which may not be overridden in subclasses.
* May result in an error if subclasses attempt to override.
* @internal
*/
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
#define U_FINAL final
#endif
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation
// with a trailing ; creating an empty statement after the block or else omit
// this trailing ; using the knowledge that the macro would expand to { }.
//
// But doing so doesn't work well with macros that look like functions and
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
// switches to the standard solution of wrapping such macros in do { } while.
//
// This will however break existing code that depends on being able to invoke
// these macros without a trailing ; so to be able to remain compatible with
// such code the wrapper is itself defined as macros so that it's possible to
// build ICU 65 and later with the old macro behaviour, like this:
//
// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
// runConfigureICU ...
//
/**
* \def UPRV_BLOCK_MACRO_BEGIN
* Defined as the "do" keyword by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_BEGIN
#define UPRV_BLOCK_MACRO_BEGIN do
#endif
/**
* \def UPRV_BLOCK_MACRO_END
* Defined as "while (false)" by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_END
#define UPRV_BLOCK_MACRO_END while (false)
#endif
/*==========================================================================*/
/* limits for int32_t etc., like in POSIX inttypes.h */
/*==========================================================================*/
#ifndef INT8_MIN
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MIN ((int8_t)(-128))
#endif
#ifndef INT16_MIN
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MIN ((int16_t)(-32767-1))
#endif
#ifndef INT32_MIN
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MIN ((int32_t)(-2147483647-1))
#endif
#ifndef INT8_MAX
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MAX ((int8_t)(127))
#endif
#ifndef INT16_MAX
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MAX ((int16_t)(32767))
#endif
#ifndef INT32_MAX
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MAX ((int32_t)(2147483647))
#endif
#ifndef UINT8_MAX
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT8_MAX ((uint8_t)(255U))
#endif
#ifndef UINT16_MAX
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT16_MAX ((uint16_t)(65535U))
#endif
#ifndef UINT32_MAX
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT32_MAX ((uint32_t)(4294967295U))
#endif
#if defined(U_INT64_T_UNAVAILABLE)
# error int64_t is required for decimal format and rule-based number format.
#else
# ifndef INT64_C
/**
* Provides a platform independent way to specify a signed 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
* @stable ICU 2.8
*/
# define INT64_C(c) c ## LL
# endif
# ifndef UINT64_C
/**
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
* @stable ICU 2.8
*/
# define UINT64_C(c) c ## ULL
# endif
# ifndef U_INT64_MIN
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
# endif
# ifndef U_INT64_MAX
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
# endif
# ifndef U_UINT64_MAX
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
# endif
#endif
/*==========================================================================*/
/* Boolean data type */
/*==========================================================================*/
/**
* The ICU boolean type, a signed-byte integer.
* ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
* Also provides a fixed type definition, as opposed to
* type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
*
* @stable ICU 2.0
*/
typedef int8_t UBool;
/**
* \def U_DEFINE_FALSE_AND_TRUE
* Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
* These obsolete macros sometimes break compilation of other code that
* defines enum constants or similar with these names.
* C++ has long defined bool/false/true.
* C99 also added definitions for these, although as macros; see stdbool.h.
*
* You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
*
* @internal ICU 68
*/
#ifdef U_DEFINE_FALSE_AND_TRUE
// Use the predefined value.
#elif defined(U_COMBINED_IMPLEMENTATION) || \
defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
defined(U_TOOLUTIL_IMPLEMENTATION)
// Inside ICU: Keep FALSE & TRUE available.
# define U_DEFINE_FALSE_AND_TRUE 1
#else
// Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
# define U_DEFINE_FALSE_AND_TRUE 0
#endif
#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
#ifndef TRUE
/**
* The TRUE value of a UBool.
*
* @deprecated ICU 68 Use standard "true" instead.
*/
# define TRUE 1
#endif
#ifndef FALSE
/**
* The FALSE value of a UBool.
*
* @deprecated ICU 68 Use standard "false" instead.
*/
# define FALSE 0
#endif
#endif // U_DEFINE_FALSE_AND_TRUE
/*==========================================================================*/
/* Unicode data types */
/*==========================================================================*/
/* wchar_t-related definitions -------------------------------------------- */
/*
* \def U_WCHAR_IS_UTF16
* Defined if wchar_t uses UTF-16.
*
* @stable ICU 2.0
*/
/*
* \def U_WCHAR_IS_UTF32
* Defined if wchar_t uses UTF-32.
*
* @stable ICU 2.0
*/
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
# ifdef __STDC_ISO_10646__
# if (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# elif (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif defined __UCS2__
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# endif
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
# if (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
# define U_WCHAR_IS_UTF32
# elif U_PLATFORM_HAS_WIN32_API
# define U_WCHAR_IS_UTF16
# endif
#endif
/* UChar and UChar32 definitions -------------------------------------------- */
/** Number of bytes in a UChar. @stable ICU 2.0 */
#define U_SIZEOF_UCHAR 2
/**
* \def U_CHAR16_IS_TYPEDEF
* If 1, then char16_t is a typedef and not a real type (yet)
* @internal
*/
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
/**
* \var UChar
*
* The base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
*
* UChar is configurable by defining the macro UCHAR_TYPE
* on the preprocessor or compiler command line:
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
*
* The default is UChar=char16_t.
*
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
*
* In C, char16_t is a simple typedef of uint_least16_t.
* ICU requires uint_least16_t=uint16_t for data memory mapping.
* On macOS, char16_t is not available because the uchar.h standard header is missing.
*
* @stable ICU 4.4
*/
#if 1
// #if 1 is normal. UChar defaults to char16_t in C++.
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
// so we only #define it to uint16_t if it is undefined so far.
#elif !defined(UCHAR_TYPE)
# define UCHAR_TYPE uint16_t
#endif
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Inside the ICU library code, never configurable.
typedef char16_t UChar;
#elif defined(UCHAR_TYPE)
typedef UCHAR_TYPE UChar;
#elif (U_CPLUSPLUS_VERSION >= 11)
typedef char16_t UChar;
#else
typedef uint16_t UChar;
#endif
/**
* \var OldUChar
* Default ICU 58 definition of UChar.
* A base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
*
* Define OldUChar to be wchar_t if that is 16 bits wide.
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
*
* This makes the definition of OldUChar platform-dependent
* but allows direct string type compatibility with platforms with
* 16-bit wchar_t types.
*
* This is how UChar was defined in ICU 58, for transition convenience.
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
* @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;
#elif defined(__CHAR16_TYPE__)
typedef __CHAR16_TYPE__ OldUChar;
#else
typedef uint16_t OldUChar;
#endif
/**
* Define UChar32 as a type for single Unicode code points.
* UChar32 is a signed 32-bit integer (same as int32_t).
*
* The Unicode code point range is 0..0x10ffff.
* All other values (negative or >=0x110000) are illegal as Unicode code points.
* They may be used as sentinel values to indicate "done", "error"
* or similar non-code point conditions.
*
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
* or else to be uint32_t.
* That is, the definition of UChar32 was platform-dependent.
*
* @see U_SENTINEL
* @stable ICU 2.4
*/
typedef int32_t UChar32;
/**
* This value is intended for sentinel values for APIs that
* (take or) return single code points (UChar32).
* It is outside of the Unicode code point range 0..0x10ffff.
*
* For example, a "done" or "error" value in a new API
* could be indicated with U_SENTINEL.
*
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
* values, mostly 0xffff.
* Those may need to be distinguished from
* actual U+ffff text contents by calling functions like
* CharacterIterator::hasNext() or UnicodeString::length().
*
* @return -1
* @see UChar32
* @stable ICU 2.4
*/
#define U_SENTINEL (-1)
#include "unicode/urename.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,324 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: uobject.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jun26
* created by: Markus W. Scherer
*/
#ifndef __UOBJECT_H__
#define __UOBJECT_H__
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
#include "unicode/platform.h"
/**
* \file
* \brief C++ API: Common ICU base class UObject.
*/
/**
* \def U_NO_THROW
* Since ICU 64, use U_NOEXCEPT instead.
*
* Previously, define this to define the throw() specification so
* certain functions do not throw any exceptions
*
* UMemory operator new methods should have the throw() specification
* appended to them, so that the compiler adds the additional NULL check
* before calling constructors. Without, if <code>operator new</code> returns NULL the
* constructor is still called, and if the constructor references member
* data, (which it typically does), the result is a segmentation violation.
*
* @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422.
*/
#ifndef U_NO_THROW
#define U_NO_THROW U_NOEXCEPT
#endif
/*===========================================================================*/
/* UClassID-based RTTI */
/*===========================================================================*/
/**
* UClassID is used to identify classes without using the compiler's RTTI.
* This was used before C++ compilers consistently supported RTTI.
* ICU 4.6 requires compiler RTTI to be turned on.
*
* Each class hierarchy which needs
* to implement polymorphic clone() or operator==() defines two methods,
* described in detail below. UClassID values can be compared using
* operator==(). Nothing else should be done with them.
*
* \par
* In class hierarchies that implement "poor man's RTTI",
* each concrete subclass implements getDynamicClassID() in the same way:
*
* \code
* class Derived {
* public:
* virtual UClassID getDynamicClassID() const
* { return Derived::getStaticClassID(); }
* }
* \endcode
*
* Each concrete class implements getStaticClassID() as well, which allows
* clients to test for a specific type.
*
* \code
* class Derived {
* public:
* static UClassID U_EXPORT2 getStaticClassID();
* private:
* static char fgClassID;
* }
*
* // In Derived.cpp:
* UClassID Derived::getStaticClassID()
* { return (UClassID)&Derived::fgClassID; }
* char Derived::fgClassID = 0; // Value is irrelevant
* \endcode
* @stable ICU 2.0
*/
typedef void* UClassID;
U_NAMESPACE_BEGIN
/**
* UMemory is the common ICU base class.
* All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
*
* This is primarily to make it possible and simple to override the
* C++ memory management by adding new/delete operators to this base class.
*
* To override ALL ICU memory management, including that from plain C code,
* replace the allocation functions declared in cmemory.h
*
* UMemory does not contain any virtual functions.
* Common "boilerplate" functions are defined in UObject.
*
* @stable ICU 2.4
*/
class U_COMMON_API UMemory {
public:
/* test versions for debugging shaper heap memory problems */
#ifdef SHAPER_MEMORY_DEBUG
static void * NewArray(int size, int count);
static void * GrowArray(void * array, int newSize );
static void FreeArray(void * array );
#endif
#if U_OVERRIDE_CXX_ALLOCATION
/**
* Override for ICU4C C++ memory management.
* simple, non-class types are allocated using the macros in common/cmemory.h
* (uprv_malloc(), uprv_free(), uprv_realloc());
* they or something else could be used here to implement C++ new/delete
* for ICU4C C++ classes
* @stable ICU 2.4
*/
static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* See new().
* @stable ICU 2.4
*/
static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* simple, non-class types are allocated using the macros in common/cmemory.h
* (uprv_malloc(), uprv_free(), uprv_realloc());
* they or something else could be used here to implement C++ new/delete
* for ICU4C C++ classes
* @stable ICU 2.4
*/
static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT;
/**
* Override for ICU4C C++ memory management.
* See delete().
* @stable ICU 2.4
*/
static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT;
#if U_HAVE_PLACEMENT_NEW
/**
* Override for ICU4C C++ memory management for STL.
* See new().
* @stable ICU 2.6
*/
static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; }
/**
* Override for ICU4C C++ memory management for STL.
* See delete().
* @stable ICU 2.6
*/
static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {}
#endif /* U_HAVE_PLACEMENT_NEW */
#if U_HAVE_DEBUG_LOCATION_NEW
/**
* This method overrides the MFC debug version of the operator new
*
* @param size The requested memory size
* @param file The file where the allocation was requested
* @param line The line where the allocation was requested
*/
static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT;
/**
* This method provides a matching delete for the MFC debug new
*
* @param p The pointer to the allocated memory
* @param file The file where the allocation was requested
* @param line The line where the allocation was requested
*/
static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT;
#endif /* U_HAVE_DEBUG_LOCATION_NEW */
#endif /* U_OVERRIDE_CXX_ALLOCATION */
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
UMemory &UMemory::operator=(const UMemory &);
*/
};
/**
* UObject is the common ICU "boilerplate" class.
* UObject inherits UMemory (starting with ICU 2.4),
* and all other public ICU C++ classes
* are derived from UObject (starting with ICU 2.2).
*
* UObject contains common virtual functions, in particular a virtual destructor.
*
* The clone() function is not available in UObject because it is not
* implemented by all ICU classes.
* Many ICU services provide a clone() function for their class trees,
* defined on the service's C++ base class
* (which itself is a subclass of UObject).
*
* @stable ICU 2.2
*/
class U_COMMON_API UObject : public UMemory {
public:
/**
* Destructor.
*
* @stable ICU 2.2
*/
virtual ~UObject();
/**
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
* The base class implementation returns a dummy value.
*
* Use compiler RTTI rather than ICU's "poor man's RTTI".
* Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
*
* @stable ICU 2.2
*/
virtual UClassID getDynamicClassID() const;
protected:
// the following functions are protected to prevent instantiation and
// direct use of UObject itself
// default constructor
// inline UObject() {}
// copy constructor
// inline UObject(const UObject &other) {}
#if 0
// TODO Sometime in the future. Implement operator==().
// (This comment inserted in 2.2)
// some or all of the following "boilerplate" functions may be made public
// in a future ICU4C release when all subclasses implement them
// assignment operator
// (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
// commented out because the implementation is the same as a compiler's default
// UObject &operator=(const UObject &other) { return *this; }
// comparison operators
virtual inline UBool operator==(const UObject &other) const { return this==&other; }
inline UBool operator!=(const UObject &other) const { return !operator==(other); }
// clone() commented out from the base class:
// some compilers do not support co-variant return types
// (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
// see also UObject class documentation.
// virtual UObject *clone() const;
#endif
/*
* Assignment operator not declared. The compiler will provide one
* which does nothing since this class does not contain any data members.
* API/code coverage may show the assignment operator as present and
* untested - ignore.
* Subclasses need this assignment operator if they use compiler-provided
* assignment operators of their own. An alternative to not declaring one
* here would be to declare and empty-implement a protected or public one.
UObject &UObject::operator=(const UObject &);
*/
};
#ifndef U_HIDE_INTERNAL_API
/**
* This is a simple macro to add ICU RTTI to an ICU object implementation.
* This does not go into the header. This should only be used in *.cpp files.
*
* @param myClass The name of the class that needs RTTI defined.
* @internal
*/
#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
UClassID U_EXPORT2 myClass::getStaticClassID() { \
static char classID = 0; \
return (UClassID)&classID; \
} \
UClassID myClass::getDynamicClassID() const \
{ return myClass::getStaticClassID(); }
/**
* This macro adds ICU RTTI to an ICU abstract class implementation.
* This macro should be invoked in *.cpp files. The corresponding
* header should declare getStaticClassID.
*
* @param myClass The name of the class that needs RTTI defined.
* @internal
*/
#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
UClassID U_EXPORT2 myClass::getStaticClassID() { \
static char classID = 0; \
return (UClassID)&classID; \
}
#endif /* U_HIDE_INTERNAL_API */
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,734 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf16.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep09
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 16-bit Unicode handling macros
*
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (https://unicode-org.github.io/icu/userguide/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF16_H__
#define __UTF16_H__
#include <stdbool.h>
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit alone encode a code point (BMP, not a surrogate)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
/**
* Is this code unit a lead surrogate (U+d800..U+dbff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
/**
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
/**
* Is this code unit a surrogate (U+d800..U+dfff)?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a lead surrogate?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a trail surrogate?
* @param c 16-bit code unit
* @return true or false
* @stable ICU 4.2
*/
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
/**
* Helper constant for U16_GET_SUPPLEMENTARY.
* @internal
*/
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
/**
* Get a supplementary code point value (U+10000..U+10ffff)
* from its lead and trail surrogates.
* The result is undefined if the input values are not
* lead and trail surrogates.
*
* @param lead lead surrogate (U+d800..U+dbff)
* @param trail trail surrogate (U+dc00..U+dfff)
* @return supplementary code point (U+10000..U+10ffff)
* @stable ICU 2.4
*/
#define U16_GET_SUPPLEMENTARY(lead, trail) \
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
/**
* Get the lead surrogate (0xd800..0xdbff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return lead surrogate (U+d800..U+dbff) for supplementary
* @stable ICU 2.4
*/
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
/**
* Get the trail surrogate (0xdc00..0xdfff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return trail surrogate (U+dc00..U+dfff) for supplementary
* @stable ICU 2.4
*/
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
/**
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
* @param c 32-bit code point
* @return 1 or 2
* @stable ICU 2.4
*/
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
/**
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
* @return 2
* @stable ICU 2.4
*/
#define U16_MAX_LENGTH 2
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
* The result is undefined if the offset points to a single, unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_GET
* @stable ICU 2.4
*/
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
if(U16_IS_SURROGATE_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
} else { \
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 2.4
*/
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 60
*/
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset points to a single, unpaired lead surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_NEXT
* @stable ICU 2.4
*/
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 60
*/
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const UChar * string buffer
* @param i string offset
* @param c code point to append
* @see U16_APPEND
* @stable ICU 2.4
*/
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a surrogate pair is written, checks for sufficient space in the string.
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to true.
*
* @param s const UChar * string buffer
* @param i string offset, must be i<capacity
* @param capacity size of the string buffer
* @param c code point to append
* @param isError output UBool set to true if an error occurs, otherwise not modified
* @see U16_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} else /* c>0x10ffff or not enough space */ { \
(isError)=true; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_FWD_1
* @stable ICU 2.4
*/
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @see U16_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_FWD_N
* @stable ICU 2.4
*/
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U16_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U16_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_START
* @stable ICU 2.4
*/
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i
* @see U16_SET_CP_START_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_PREV
* @stable ICU 2.4
*/
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
uint16_t __c2; \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 60
*/
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_BACK_1
* @stable ICU 2.4
*/
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @see U16_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_BACK_N
* @stable ICU 2.4
*/
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start start of string
* @param i string offset, must be start<i
* @param n number of code points to skip
* @see U16_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U16_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)-1])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, start<=i<=length
* @param length int32_t string length
* @see U16_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@ -1,882 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (https://unicode-org.github.io/icu/userguide/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF8_H__
#define __UTF8_H__
#include <stdbool.h>
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* internal definitions ----------------------------------------------------- */
/**
* Counts the trail bytes for a UTF-8 lead byte.
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) \
(U8_IS_LEAD(leadByte) ? \
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
/**
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* Lead byte E0..EF bits 3..0 are used as byte index,
* first trail byte bits 7..5 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD3_AND_T1
* @internal
*/
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
/**
* Internal 3-byte UTF-8 validity check.
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
/**
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* First trail byte bits 7..4 are used as byte index,
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD4_AND_T1
* @internal
*/
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
/**
* Internal 4-byte UTF-8 validity check.
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
/**
* Function for handling "append code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
/**
* Function for handling "previous code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
/**
* Function for handling "skip backward one code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_CAPI int32_t U_EXPORT2
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
/**
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
// 0x32=0xf4-0xc2
/**
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
* @param c 8-bit code unit (byte)
* @return true or false
* @stable ICU 2.4
*/
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
/**
* How many code units (bytes) are used for the UTF-8 encoding
* of this Unicode code point?
* @param c 32-bit code point
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
* @stable ICU 2.4
*/
#define U8_LENGTH(c) \
((uint32_t)(c)<=0x7f ? 1 : \
((uint32_t)(c)<=0x7ff ? 2 : \
((uint32_t)(c)<=0xd7ff ? 3 : \
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
((uint32_t)(c)<=0xffff ? 3 : 4)\
) \
) \
) \
)
/**
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
* @return 4
* @stable ICU 2.4
*/
#define U8_MAX_LENGTH 4
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
* The result is undefined if the offset points to an illegal UTF-8
* byte sequence.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_GET
* @stable ICU 2.4
*/
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_unsafe_index=(int32_t)(i); \
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_GET_UNSAFE
* @stable ICU 2.4
*/
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_GET() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_GET
* @stable ICU 51
*/
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* The result is undefined if the offset points to a trail byte
* or an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_NEXT
* @stable ICU 2.4
*/
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
if((c)<0xe0) { \
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
} else if((c)<0xf0) { \
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
(i)+=2; \
} else { \
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
(i)+=3; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_NEXT() if that distinction is important.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
(c)=(sub); /* ill-formed*/ \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const uint8_t * string buffer
* @param i string offset
* @param c code point to append
* @see U8_APPEND
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a non-ASCII code point is written, checks for sufficient space in the string.
* If the code point is not valid or trail bytes do not fit,
* then isError is set to true.
*
* @param s const uint8_t * string buffer
* @param i int32_t string offset, must be i<capacity
* @param capacity int32_t size of the string buffer
* @param c UChar32 code point to append
* @param isError output UBool set to true if an error occurs, otherwise not modified
* @see U8_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(isError)=true; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_FWD_1
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @see U8_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __b=(s)[(i)++]; \
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
uint8_t __t1=(s)[i]; \
if((0xe0<=__b && __b<0xf0)) { \
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} else if(__b<0xe0) { \
if(U8_IS_TRAIL(__t1)) { \
++(i); \
} \
} else /* c>=0xf0 */ { \
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_FWD_N
* @stable ICU 2.4
*/
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U8_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_START
* @stable ICU 2.4
*/
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[i])) { --(i); } \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* In all other cases (the string ends with a complete sequence, or it is not
* possible for any further trail byte to extend the trailing sequence)
* the length remains unchanged.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length (usually start<=length)
* @see U8_SET_CP_START
* @stable ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_PREV
* @stable ICU 2.4
*/
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(U8_IS_TRAIL(c)) { \
uint8_t __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
__b=(s)[--(i)]; \
if(__b>=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
break; \
} else { \
(c)|=(UChar32)(__b&0x3f)<<__shift; \
++__count; \
__shift+=6; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_PREV() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_PREV
* @stable ICU 51
*/
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_BACK_1
* @stable ICU 2.4
*/
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[--(i)])) {} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @see U8_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[--(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_BACK_N
* @stable ICU 2.4
*/
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t index of the start of the string
* @param i int32_t string offset, must be start<i
* @param n number of code points to skip
* @see U8_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U8_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
U8_BACK_1_UNSAFE(s, i); \
U8_FWD_1_UNSAFE(s, i); \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i<=length
* @param length int32_t string length
* @see U8_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0)) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@ -1,732 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1996-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* FILE NAME : UTYPES.H (formerly ptypes.h)
*
* Date Name Description
* 12/11/96 helena Creation.
* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
* uint8, uint16, and uint32.
* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
* well as C++.
* Modified to use memcpy() for uprv_arrayCopy() fns.
* 04/14/97 aliu Added TPlatformUtilities.
* 05/07/97 aliu Added import/export specifiers (replacing the old
* broken EXT_CLASS). Added version number for our
* code. Cleaned up header.
* 6/20/97 helena Java class name change.
* 08/11/98 stephen UErrorCode changed from typedef to enum
* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
* 04/20/99 stephen Cleaned up & reworked for autoconf.
* Renamed to utypes.h.
* 05/05/99 stephen Changed to use <inttypes.h>
* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
*******************************************************************************
*/
#ifndef UTYPES_H
#define UTYPES_H
#include "unicode/umachine.h"
#include "unicode/uversion.h"
#include "unicode/uconfig.h"
#include <float.h>
#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
# include "unicode/utf.h"
#endif
/*!
* \file
* \brief Basic definitions for ICU, for both C and C++ APIs
*
* This file defines basic types, constants, and enumerations directly or
* indirectly by including other header files, especially utf.h for the
* basic character and string definitions and umachine.h for consistent
* integer and other types.
*/
/**
* \def U_SHOW_CPLUSPLUS_API
* @internal
*/
#ifdef __cplusplus
# ifndef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 1
# endif
#else
# undef U_SHOW_CPLUSPLUS_API
# define U_SHOW_CPLUSPLUS_API 0
#endif
/** @{ API visibility control */
/**
* \def U_HIDE_DRAFT_API
* Define this to 1 to request that draft API be "hidden"
* @internal
*/
/**
* \def U_HIDE_INTERNAL_API
* Define this to 1 to request that internal API be "hidden"
* @internal
*/
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
#define U_HIDE_DRAFT_API 1
#endif
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
#define U_HIDE_INTERNAL_API 1
#endif
/** @} */
/*===========================================================================*/
/* ICUDATA naming scheme */
/*===========================================================================*/
/**
* \def U_ICUDATA_TYPE_LETTER
*
* This is a platform-dependent string containing one letter:
* - b for big-endian, ASCII-family platforms
* - l for little-endian, ASCII-family platforms
* - e for big-endian, EBCDIC-family platforms
* This letter is part of the common data file name.
* @stable ICU 2.0
*/
/**
* \def U_ICUDATA_TYPE_LITLETTER
* The non-string form of U_ICUDATA_TYPE_LETTER
* @stable ICU 2.0
*/
#if U_CHARSET_FAMILY
# if U_IS_BIG_ENDIAN
/* EBCDIC - should always be BE */
# define U_ICUDATA_TYPE_LETTER "e"
# define U_ICUDATA_TYPE_LITLETTER e
# else
# error "Don't know what to do with little endian EBCDIC!"
# define U_ICUDATA_TYPE_LETTER "x"
# define U_ICUDATA_TYPE_LITLETTER x
# endif
#else
# if U_IS_BIG_ENDIAN
/* Big-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "b"
# define U_ICUDATA_TYPE_LITLETTER b
# else
/* Little-endian ASCII */
# define U_ICUDATA_TYPE_LETTER "l"
# define U_ICUDATA_TYPE_LITLETTER l
# endif
#endif
/**
* A single string literal containing the icudata stub name. i.e. 'icudt18e' for
* ICU 1.8.x on EBCDIC, etc..
* @stable ICU 2.0
*/
#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
#ifndef U_HIDE_INTERNAL_API
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
#define U_USE_USRDATA 0 /**< @internal */
#endif /* U_HIDE_INTERNAL_API */
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
* Defined as a literal, not a string.
* Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
* from the corresponding macro invocation, _before_ other macro substitutions.
* Need a nested \#defines to get the actual version numbers rather than
* the literal text U_ICU_VERSION_MAJOR_NUM into the name.
* The net result will be something of the form
* \#define U_ICU_ENTRY_POINT icudt19_dat
* @stable ICU 2.4
*/
#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
#ifndef U_HIDE_INTERNAL_API
/**
* Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
* @internal
*/
#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
/**
* Do not use.
* @internal
*/
#ifndef U_DEF_ICUDATA_ENTRY_POINT
/* affected by symbol renaming. See platform.h */
#ifndef U_LIB_SUFFIX_C_NAME
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
#else
#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
#endif
#endif
#endif /* U_HIDE_INTERNAL_API */
/**
* \def NULL
* Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
* @stable ICU 2.0
*/
#ifndef NULL
#ifdef __cplusplus
#define NULL nullptr
#else
#define NULL ((void *)0)
#endif
#endif
/*===========================================================================*/
/* Calendar/TimeZone data types */
/*===========================================================================*/
/**
* Date and Time data type.
* This is a primitive data type that holds the date and time
* as the number of milliseconds since 1970-jan-01, 00:00 UTC.
* UTC leap seconds are ignored.
* @stable ICU 2.0
*/
typedef double UDate;
/** The number of milliseconds per second @stable ICU 2.0 */
#define U_MILLIS_PER_SECOND (1000)
/** The number of milliseconds per minute @stable ICU 2.0 */
#define U_MILLIS_PER_MINUTE (60000)
/** The number of milliseconds per hour @stable ICU 2.0 */
#define U_MILLIS_PER_HOUR (3600000)
/** The number of milliseconds per day @stable ICU 2.0 */
#define U_MILLIS_PER_DAY (86400000)
/**
* Maximum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MAX DBL_MAX
/**
* Minimum UDate value
* @stable ICU 4.8
*/
#define U_DATE_MIN -U_DATE_MAX
/*===========================================================================*/
/* Shared library/DLL import-export API control */
/*===========================================================================*/
/*
* Control of symbol import/export.
* ICU is separated into three libraries.
*/
/**
* \def U_COMBINED_IMPLEMENTATION
* Set to export library symbols from inside the ICU library
* when all of ICU is in a single library.
* This can be set as a compiler option while building ICU, and it
* needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
* @stable ICU 2.0
*/
/**
* \def U_DATA_API
* Set to export library symbols from inside the stubdata library,
* and to import them from outside.
* @stable ICU 3.0
*/
/**
* \def U_COMMON_API
* Set to export library symbols from inside the common library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_I18N_API
* Set to export library symbols from inside the i18n library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUT_API
* Set to export library symbols from inside the layout engine library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_LAYOUTEX_API
* Set to export library symbols from inside the layout extensions library,
* and to import them from outside.
* @stable ICU 2.6
*/
/**
* \def U_IO_API
* Set to export library symbols from inside the ustdio library,
* and to import them from outside.
* @stable ICU 2.0
*/
/**
* \def U_TOOLUTIL_API
* Set to export library symbols from inside the toolutil library,
* and to import them from outside.
* @stable ICU 3.4
*/
#ifdef U_IN_DOXYGEN
// This definition is required when generating the API docs.
#define U_COMBINED_IMPLEMENTATION 1
#endif
#if defined(U_COMBINED_IMPLEMENTATION)
#define U_DATA_API U_EXPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_EXPORT
#elif defined(U_STATIC_IMPLEMENTATION)
#define U_DATA_API
#define U_COMMON_API
#define U_I18N_API
#define U_LAYOUT_API
#define U_LAYOUTEX_API
#define U_IO_API
#define U_TOOLUTIL_API
#elif defined(U_COMMON_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_EXPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_I18N_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_EXPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUT_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_EXPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_LAYOUTEX_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_EXPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_IO_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_EXPORT
#define U_TOOLUTIL_API U_IMPORT
#elif defined(U_TOOLUTIL_IMPLEMENTATION)
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_EXPORT
#else
#define U_DATA_API U_IMPORT
#define U_COMMON_API U_IMPORT
#define U_I18N_API U_IMPORT
#define U_LAYOUT_API U_IMPORT
#define U_LAYOUTEX_API U_IMPORT
#define U_IO_API U_IMPORT
#define U_TOOLUTIL_API U_IMPORT
#endif
/**
* \def U_STANDARD_CPP_NAMESPACE
* Control of C++ Namespace
* @stable ICU 2.0
*/
#ifdef __cplusplus
#define U_STANDARD_CPP_NAMESPACE ::
#else
#define U_STANDARD_CPP_NAMESPACE
#endif
/*===========================================================================*/
/* UErrorCode */
/*===========================================================================*/
/**
* Standard ICU4C error code type, a substitute for exceptions.
*
* Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
* failure using U_SUCCESS() or U_FAILURE():
*
* UErrorCode errorCode = U_ZERO_ERROR;
* // call ICU API that needs an error code parameter.
* if (U_FAILURE(errorCode)) {
* // An error occurred. Handle it here.
* }
*
* C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
* suitable subclass.
*
* For more information, see:
* http://icu-project.org/userguide/conventions
*
* Note: By convention, ICU functions that take a reference (C++) or a pointer
* (C) to a UErrorCode first test:
*
* if (U_FAILURE(errorCode)) { return immediately; }
*
* so that in a chain of such functions the first one that sets an error code
* causes the following ones to not perform any operations.
*
* @stable ICU 2.0
*/
typedef enum UErrorCode {
/* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
* and is that way because VC++ debugger displays first encountered constant,
* which is not the what the code is used for
*/
U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UErrorCode warning value.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_ERROR_WARNING_LIMIT,
#endif // U_HIDE_DEPRECATED_API
U_ZERO_ERROR = 0, /**< No error, no warning. */
U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
It is very possible that a circular alias definition has occurred */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
#ifndef U_HIDE_DRAFT_API
/**
* The input is impractically long for an operation.
* It is rejected because it may lead to problems such as excessive
* processing time, stack depth, or heap memory requirements.
*
* @draft ICU 68
*/
U_INPUT_TOO_LONG_ERROR = 31,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest standard error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_STANDARD_ERROR_LIMIT = 32,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
*/
U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
U_MISSING_OPERATOR, /**< A rule contains no operator */
U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
U_MULTIPLE_CURSORS, /**< More than one cursor */
U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
U_TRAILING_BACKSLASH, /**< A dangling backslash */
U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal Transliterator error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_PARSE_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
*/
U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10114,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
*/
U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal BreakIterator error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_BRK_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
*/
U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
#ifndef U_HIDE_DEPRECATED_API
U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
#endif /* U_HIDE_DEPRECATED_API */
U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal regular expression error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
#endif // U_HIDE_DEPRECATED_API
/*
* Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
*/
U_IDNA_PROHIBITED_ERROR=0x10400,
U_IDNA_ERROR_START=0x10400,
U_IDNA_UNASSIGNED_ERROR,
U_IDNA_CHECK_BIDI_ERROR,
U_IDNA_STD3_ASCII_RULES_ERROR,
U_IDNA_ACE_PREFIX_ERROR,
U_IDNA_VERIFICATION_ERROR,
U_IDNA_LABEL_TOO_LONG_ERROR,
U_IDNA_ZERO_LENGTH_LABEL_ERROR,
U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal IDNA error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_IDNA_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/*
* Aliases for StringPrep
*/
U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
/*
* Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
*/
U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal plug-in error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_PLUGIN_ERROR_LIMIT,
#endif // U_HIDE_DEPRECATED_API
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
#endif // U_HIDE_DEPRECATED_API
} UErrorCode;
/* Use the following to determine if an UErrorCode represents */
/* operational success or failure. */
#ifdef __cplusplus
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
static
inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
static
inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
#else
/**
* Does the error code indicate success?
* @stable ICU 2.0
*/
# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
/**
* Does the error code indicate a failure?
* @stable ICU 2.0
*/
# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
#endif
/**
* Return a string for a UErrorCode value.
* The string will be the same as the name of the error code constant
* in the UErrorCode enum above.
* @stable ICU 2.0
*/
U_CAPI const char * U_EXPORT2
u_errorName(UErrorCode code);
#endif /* _UTYPES */

View File

@ -1,198 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2000-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
* file name: uvernum.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* Created by: Vladimir Weinstein
* Updated by: Steven R. Loomis
*
*/
/**
* \file
* \brief C API: definitions of ICU version numbers
*
* This file is included by uversion.h and other files. This file contains only
* macros and definitions. The actual version numbers are defined here.
*/
/*
* IMPORTANT: When updating version, the following things need to be done:
* source/common/unicode/uvernum.h - this file: update major, minor,
* patchlevel, suffix, version, short version constants, namespace,
* renaming macro, and copyright
*
* The following files need to be updated as well, which can be done
* by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
*
*
* source/common/common_uwp.vcxproj
* source/common/common.vcxproj - update 'Output file name' on the link tab so
* that it contains the new major/minor combination
* source/i18n/i18n.vcxproj - same as for the common.vcxproj
* source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj
* source/layoutex/layoutex.vcproj - same
* source/stubdata/stubdata.vcproj - same as for the common.vcxproj
* source/io/io.vcproj - same as for the common.vcxproj
* source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
* the new major/minor combination and the Unicode version.
*/
#ifndef UVERNUM_H
#define UVERNUM_H
/** The standard copyright notice that gets compiled into each library.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_COPYRIGHT_STRING \
" Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
/** The current ICU major version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 69
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_PATCHLEVEL_NUM 0
/** The current ICU build level version as an integer.
* This value is for use by ICU clients. It defaults to 0.
* @stable ICU 4.0
*/
#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
#define U_ICU_VERSION_BUILDLEVEL_NUM 0
#endif
/** Glued version suffix for renamers
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _69
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
* @internal
*/
/**
* \def U_DEF_ICU_ENTRY_POINT_RENAME
* @internal
*/
/** Glued version suffix function for renamers
* This value will change in the subsequent releases of ICU.
* If a custom suffix (such as matching library suffixes) is desired, this can be modified.
* Note that if present, platform.h may contain an earlier definition of this macro.
* \def U_ICU_ENTRY_POINT_RENAME
* @stable ICU 4.2
*/
/**
* Disable the version suffix. Use the custom suffix if exists.
* \def U_DISABLE_VERSION_SUFFIX
* @internal
*/
#ifndef U_DISABLE_VERSION_SUFFIX
#define U_DISABLE_VERSION_SUFFIX 0
#endif
#ifndef U_ICU_ENTRY_POINT_RENAME
#ifdef U_HAVE_LIB_SUFFIX
# if !U_DISABLE_VERSION_SUFFIX
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
# else
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
# endif
#else
# if !U_DISABLE_VERSION_SUFFIX
# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
# else
# define U_ICU_ENTRY_POINT_RENAME(x) x
# endif
#endif
#endif
/** The current ICU library version as a dotted-decimal string. The patchlevel
* only appears in this string if it non-zero.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "69.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
* This value will change in subsequent releases of ICU.
*
* Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
* into one string without dots ("48").
* Since ICU 49, it is the double-digit major ICU version number.
* See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "69"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "69.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
* ICU collation framework version information
* Version info that can be obtained from a collator is affected by these
* numbers in a secret and magic way. Please use collator version as whole
*===========================================================================
*/
/**
* Collation runtime version (sort key generator, strcoll).
* If the version is different, sort keys for the same string could be different.
* This value may change in subsequent releases of ICU.
* @stable ICU 2.4
*/
#define UCOL_RUNTIME_VERSION 9
/**
* Collation builder code version.
* When this is different, the same tailoring might result
* in assigning different collation elements to code points.
* This value may change in subsequent releases of ICU.
* @stable ICU 2.4
*/
#define UCOL_BUILDER_VERSION 9
#ifndef U_HIDE_DEPRECATED_API
/**
* Constant 1.
* This was intended to be the version of collation tailorings,
* but instead the tailoring data carries a version number.
* @deprecated ICU 54
*/
#define UCOL_TAILORINGS_VERSION 1
#endif /* U_HIDE_DEPRECATED_API */
#endif

View File

@ -1,187 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
* file name: uversion.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* Created by: Vladimir Weinstein
*
* Gets included by utypes.h and Windows .rc files
*/
/**
* \file
* \brief C API: API for accessing ICU version numbers.
*/
/*===========================================================================*/
/* Main ICU version information */
/*===========================================================================*/
#ifndef UVERSION_H
#define UVERSION_H
#include "unicode/umachine.h"
/* Actual version info lives in uvernum.h */
#include "unicode/uvernum.h"
/** Maximum length of the copyright string.
* @stable ICU 2.4
*/
#define U_COPYRIGHT_STRING_LENGTH 128
/** An ICU version consists of up to 4 numbers from 0..255.
* @stable ICU 2.4
*/
#define U_MAX_VERSION_LENGTH 4
/** In a string, ICU version fields are delimited by dots.
* @stable ICU 2.4
*/
#define U_VERSION_DELIMITER '.'
/** The maximum length of an ICU version string.
* @stable ICU 2.4
*/
#define U_MAX_VERSION_STRING_LENGTH 20
/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
* To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
* @stable ICU 2.4
*/
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
/*===========================================================================*/
/* C++ namespace if supported. Versioned unless versioning is disabled. */
/*===========================================================================*/
/* Define C++ namespace symbols. */
#ifdef __cplusplus
/**
* \def U_NAMESPACE_BEGIN
* This is used to begin a declaration of a public ICU C++ API within
* versioned-ICU-namespace block.
*
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_END
* This is used to end a declaration of a public ICU C++ API.
* It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
*
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_USE
* This is used to specify that the rest of the code uses the
* public ICU C++ API namespace.
* @stable ICU 2.4
*/
/**
* \def U_NAMESPACE_QUALIFIER
* This is used to qualify that a function or class is part of
* the public ICU C++ API namespace.
*
* This macro is unnecessary since ICU 49 requires namespace support.
* You can just use "icu::" instead.
* @stable ICU 2.4
*/
# if U_DISABLE_RENAMING
# define U_ICU_NAMESPACE icu
namespace U_ICU_NAMESPACE { }
# else
# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
namespace U_ICU_NAMESPACE { }
namespace icu = U_ICU_NAMESPACE;
# endif
# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
# define U_NAMESPACE_END }
# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
# ifndef U_USING_ICU_NAMESPACE
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
# define U_USING_ICU_NAMESPACE 0
# else
# define U_USING_ICU_NAMESPACE 0
# endif
# endif
# if U_USING_ICU_NAMESPACE
U_NAMESPACE_USE
# endif
#endif /* __cplusplus */
/*===========================================================================*/
/* General version helper functions. Definitions in putil.c */
/*===========================================================================*/
/**
* Parse a string with dotted-decimal version information and
* fill in a UVersionInfo structure with the result.
* Definition of this function lives in putil.c
*
* @param versionArray The destination structure for the version information.
* @param versionString A string with dotted-decimal version information,
* with up to four non-negative number fields with
* values of up to 255 each.
* @stable ICU 2.4
*/
U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray, const char *versionString);
/**
* Parse a Unicode string with dotted-decimal version information and
* fill in a UVersionInfo structure with the result.
* Definition of this function lives in putil.c
*
* @param versionArray The destination structure for the version information.
* @param versionString A Unicode string with dotted-decimal version
* information, with up to four non-negative number
* fields with values of up to 255 each.
* @stable ICU 4.2
*/
U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
/**
* Write a string with dotted-decimal version information according
* to the input UVersionInfo.
* Definition of this function lives in putil.c
*
* @param versionArray The version information to be written as a string.
* @param versionString A string buffer that will be filled in with
* a string corresponding to the numeric version
* information in versionArray.
* The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
* @stable ICU 2.4
*/
U_CAPI void U_EXPORT2
u_versionToString(const UVersionInfo versionArray, char *versionString);
/**
* Gets the ICU release version. The version array stores the version information
* for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
* Definition of this function lives in putil.c
*
* @param versionArray the version # information, the result will be filled in
* @stable ICU 2.0
*/
U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray);
#endif

1
app/src/main/icu4c Submodule

@ -0,0 +1 @@
Subproject commit 30671f90a0950aad9b5c9eb570be1d779a2eee79