Lesson 35 - Get Compute Auth Token Working
This commit is contained in:
@@ -0,0 +1,622 @@
|
||||
// Copyright (C) 2007 Davis E. King (davis@dlib.net), and Nils Labugt
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_UNICODe_H_
|
||||
#define DLIB_UNICODe_H_
|
||||
|
||||
#include "../uintn.h"
|
||||
#include "../algs.h"
|
||||
#include "unicode_abstract.h"
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
typedef uint32 unichar;
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ < 4 && __GNUC_MINOR__ < 4
|
||||
struct unichar_traits
|
||||
{
|
||||
typedef dlib::unichar char_type;
|
||||
typedef dlib::unichar int_type;
|
||||
typedef std::streamoff off_type;
|
||||
typedef std::streampos pos_type;
|
||||
typedef std::mbstate_t state_type;
|
||||
|
||||
static void assign(char_type& c1, const char_type& c2) { c1 = c2; }
|
||||
static bool eq(const char_type& c1, const char_type& c2) { return c1 == c2; }
|
||||
static bool lt(const char_type& c1, const char_type& c2) { return c1 < c2; }
|
||||
static int compare(const char_type* s1, const char_type* s2, size_t n)
|
||||
{
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (s1[i] < s2[i])
|
||||
return -1;
|
||||
else if (s1[i] > s2[i])
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t length(const char_type* s)
|
||||
{
|
||||
size_t i = 0;
|
||||
while (s[i] != 0)
|
||||
++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
static const char_type* find(const char_type* s, size_t n,
|
||||
const char_type& a)
|
||||
{
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
if (s[i] == a)
|
||||
{
|
||||
return s+i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char_type* move(char_type* s1, const char_type* s2, size_t n)
|
||||
{
|
||||
return static_cast<char_type*>(std::memmove(s1, s2, sizeof(char_type)*n));
|
||||
}
|
||||
|
||||
static char_type* copy(char_type* s1, const char_type* s2, size_t n)
|
||||
{
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
s1[i] = s2[i];
|
||||
|
||||
return s1;
|
||||
}
|
||||
|
||||
static char_type* assign(char_type* s, size_t n, char_type a)
|
||||
{
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
s[i] = a;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
static int_type not_eof(const int_type& c)
|
||||
{
|
||||
if (!eq_int_type(c,eof()))
|
||||
return to_int_type(c);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char_type to_char_type(const int_type& c) { return static_cast<char_type>(c); }
|
||||
static int_type to_int_type(const char_type& c) { return zero_extend_cast<int_type>(c); }
|
||||
|
||||
static bool eq_int_type(const int_type& c1, const int_type& c2) { return c1 == c2; }
|
||||
|
||||
static int_type eof() { return static_cast<int_type>(EOF); }
|
||||
};
|
||||
|
||||
typedef std::basic_string<unichar, unichar_traits> ustring;
|
||||
#else
|
||||
typedef std::basic_string<unichar> ustring;
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace unicode_helpers
|
||||
{
|
||||
|
||||
template <
|
||||
typename charT
|
||||
>
|
||||
int u8_to_u32(
|
||||
charT& result,
|
||||
std::istream& in
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- if (there just wasn't any more data and we hit EOF) then
|
||||
- returns 0
|
||||
- else if (we decoded another character without error) then
|
||||
- #result == the decoded character
|
||||
- returns the number of bytes consumed to make this character
|
||||
- else
|
||||
- some error occurred
|
||||
- returns -1
|
||||
!*/
|
||||
{
|
||||
int val = in.get();
|
||||
if (val == EOF)
|
||||
return 0;
|
||||
|
||||
unichar ch[4];
|
||||
ch[0] = zero_extend_cast<unichar>(val);
|
||||
if ( ch[0] < 0x80 )
|
||||
{
|
||||
result = static_cast<charT>(ch[0]);
|
||||
return 1;
|
||||
}
|
||||
if ( ( ch[0] & ~0x3F ) == 0x80 )
|
||||
{
|
||||
// invalid leading byte
|
||||
return -1;
|
||||
}
|
||||
if ( ( ch[0] & ~0x1F ) == 0xC0 )
|
||||
{
|
||||
val = in.get();
|
||||
if ( val == EOF )
|
||||
return -1;
|
||||
|
||||
ch[1] = zero_extend_cast<unichar>(val);
|
||||
if ( ( ch[1] & ~0x3F ) != 0x80 )
|
||||
return -1; // invalid tail
|
||||
if ( ( ch[0] & ~0x01 ) == 0xC0 )
|
||||
return -1; // overlong form
|
||||
ch[0] &= 0x1F;
|
||||
ch[1] &= 0x3F;
|
||||
result = static_cast<charT>(( ch[0] << 6 ) | ch[1]);
|
||||
return 2;
|
||||
}
|
||||
if ( ( ch[0] & ~0x0F ) == 0xE0 )
|
||||
{
|
||||
for ( unsigned n = 1;n < 3;n++ )
|
||||
{
|
||||
val = in.get();
|
||||
if ( val == EOF )
|
||||
return -1;
|
||||
ch[n] = zero_extend_cast<unichar>(val);
|
||||
if ( ( ch[n] & ~0x3F ) != 0x80 )
|
||||
return -1; // invalid tail
|
||||
ch[n] &= 0x3F;
|
||||
}
|
||||
ch[0] &= 0x0F;
|
||||
result = static_cast<charT>(( ch[0] << 12 ) | ( ch[1] << 6 ) | ch[2]);
|
||||
if ( result < 0x0800 )
|
||||
return -1; // overlong form
|
||||
if ( result >= 0xD800 && result < 0xE000 )
|
||||
return -1; // invalid character (UTF-16 surrogate pairs)
|
||||
if ( result >= 0xFDD0 && result <= 0xFDEF )
|
||||
return -1; // noncharacter
|
||||
if ( result >= 0xFFFE )
|
||||
return -1; // noncharacter
|
||||
return 3;
|
||||
}
|
||||
if ( ( ch[0] & ~0x07 ) == 0xF0 )
|
||||
{
|
||||
for ( unsigned n = 1;n < 4;n++ )
|
||||
{
|
||||
val = in.get();
|
||||
if ( val == EOF )
|
||||
return -1;
|
||||
ch[n] = zero_extend_cast<unichar>(val);
|
||||
if ( ( ch[n] & ~0x3F ) != 0x80 )
|
||||
return -1; // invalid tail
|
||||
ch[n] &= 0x3F;
|
||||
}
|
||||
if ( ( ch[0] ^ 0xF6 ) < 4 )
|
||||
return -1;
|
||||
ch[0] &= 0x07;
|
||||
result = static_cast<charT>(( ch[0] << 18 ) | ( ch[1] << 12 ) | ( ch[2] << 6 ) | ch[3]);
|
||||
if ( result < 0x10000 )
|
||||
return -1; // overlong form
|
||||
if ( (result & 0xFFFF) >= 0xFFFE )
|
||||
return -1; // noncharacter
|
||||
return 4;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <typename charT>
|
||||
class basic_utf8_streambuf : public std::basic_streambuf<charT>
|
||||
{
|
||||
public:
|
||||
basic_utf8_streambuf (
|
||||
std::ifstream& fin_
|
||||
) :
|
||||
fin(fin_)
|
||||
{
|
||||
this->setg(in_buffer+max_putback,
|
||||
in_buffer+max_putback,
|
||||
in_buffer+max_putback);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
typedef typename std::basic_streambuf<charT>::int_type int_type;
|
||||
|
||||
// input functions
|
||||
int_type underflow(
|
||||
)
|
||||
{
|
||||
if (this->gptr() < this->egptr())
|
||||
{
|
||||
return zero_extend_cast<int_type>(*this->gptr());
|
||||
}
|
||||
|
||||
int num_put_back = static_cast<int>(this->gptr() - this->eback());
|
||||
if (num_put_back > max_putback)
|
||||
{
|
||||
num_put_back = max_putback;
|
||||
}
|
||||
|
||||
// copy the putback characters into the putback end of the in_buffer
|
||||
std::memmove(in_buffer+(max_putback-num_put_back), this->gptr()-num_put_back, num_put_back);
|
||||
|
||||
|
||||
// fill the buffer with characters
|
||||
int n = in_buffer_size-max_putback;
|
||||
int i;
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
charT ch;
|
||||
if (unicode_helpers::u8_to_u32(ch,fin) > 0)
|
||||
{
|
||||
(in_buffer+max_putback)[i] = ch;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
// an error occurred or we hit EOF
|
||||
return EOF;
|
||||
}
|
||||
|
||||
// reset in_buffer pointers
|
||||
this->setg (in_buffer+(max_putback-num_put_back),
|
||||
in_buffer+max_putback,
|
||||
in_buffer+max_putback+i);
|
||||
|
||||
return zero_extend_cast<int_type>(*this->gptr());
|
||||
}
|
||||
|
||||
private:
|
||||
std::ifstream& fin;
|
||||
static const int max_putback = 4;
|
||||
static const int in_buffer_size = 10;
|
||||
charT in_buffer[in_buffer_size];
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
#if defined(__GNUC__) && __GNUC__ >= 6
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
|
||||
#endif
|
||||
template <typename T>
|
||||
bool is_combining_char(
|
||||
const T ch_
|
||||
)
|
||||
{
|
||||
const unichar ch = zero_extend_cast<unichar>(ch_);
|
||||
if ( ch < 0x300 ) return false;
|
||||
if ( ch < 0x370 ) return true;
|
||||
|
||||
if ( ch < 0x800 )
|
||||
{
|
||||
if ( ch < 0x483 )return false;if ( ch < 0x48A )return true;
|
||||
|
||||
if ( ch < 0x591 )return false;if ( ch < 0x5D0 )
|
||||
{
|
||||
if ( ch == 0x5C0 )return false;
|
||||
if ( ch == 0x5C3 )return false;
|
||||
if ( ch == 0x5C6 )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0x610 )return false;if ( ch < 0x616 )return true;
|
||||
if ( ch < 0x64B )return false;if ( ch < 0x660 )return true;
|
||||
|
||||
if ( ch == 0x670 )return true;
|
||||
|
||||
if ( ch < 0x6D6 )return false;if ( ch < 0x6EE )
|
||||
{
|
||||
if ( ch == 0x6DD )return false;
|
||||
if ( ch == 0x6E5 )return false;
|
||||
if ( ch == 0x6E6 )return false;
|
||||
if ( ch == 0x6E9 )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch == 0x711 )return true;
|
||||
|
||||
if ( ch < 0x730 )return false;if ( ch < 0x74B )return true;
|
||||
if ( ch < 0x7A6 )return false;if ( ch < 0x7B1 )return true;
|
||||
if ( ch < 0x7EB )return false;if ( ch < 0x7F4 )return true;
|
||||
return false;
|
||||
}
|
||||
if ( ch < 0xA00 )
|
||||
{
|
||||
if ( ch < 0x901 )return false;if ( ch < 0x904 )return true;
|
||||
if ( ch < 0x93C )return false;if ( ch < 0x955 )
|
||||
{
|
||||
if ( ch == 0x93D )return false;
|
||||
if ( ch == 0x950 )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0x962 )return false;if ( ch < 0x964 )return true;
|
||||
if ( ch < 0x981 )return false;if ( ch < 0x984 )return true;
|
||||
if ( ch < 0x9BC )return false;if ( ch < 0x9D8 )
|
||||
{
|
||||
if ( ch == 0x9BD )return false;
|
||||
if ( ch == 0x9CE )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0x9E2 )return false;if ( ch < 0x9E4 )return true;
|
||||
return false;
|
||||
}
|
||||
if ( ch < 0xC00 )
|
||||
{
|
||||
if ( ch < 0xA01 )return false;if ( ch < 0xA04 )return true;
|
||||
if ( ch < 0xA3C )return false;if ( ch < 0xA4E )return true;
|
||||
if ( ch < 0xA70 )return false;if ( ch < 0xA72 )return true;
|
||||
if ( ch < 0xA81 )return false;if ( ch < 0xA84 )return true;
|
||||
if ( ch < 0xABC )return false;if ( ch < 0xACE )
|
||||
{
|
||||
if ( ch == 0xABD )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0xAE2 )return false;if ( ch < 0xAE4 )return true;
|
||||
if ( ch < 0xB01 )return false;if ( ch < 0xB04 )return true;
|
||||
if ( ch < 0xB3C )return false;if ( ch < 0xB58 )
|
||||
{
|
||||
if ( ch == 0xB3D )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch == 0xB82 )return true;
|
||||
|
||||
if ( ch < 0xBBE )return false;if ( ch < 0xBD8 )return true;
|
||||
|
||||
if ( ch == 0xBF4 )return true;
|
||||
if ( ch == 0xBF8 )return true;
|
||||
return false;
|
||||
}
|
||||
if(ch < 0xE00)
|
||||
{
|
||||
if ( ch < 0xC01 )return false;if ( ch < 0xC04 )return true;
|
||||
if ( ch < 0xC3E )return false;if ( ch < 0xC57 )return true;
|
||||
if ( ch < 0xC82 )return false;if ( ch < 0xC84 )return true;
|
||||
if ( ch < 0xCBC )return false;if ( ch < 0xCD7 )
|
||||
{
|
||||
if ( ch == 0xCBD )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0xCE2 )return false;if ( ch < 0xCE4 )return true;
|
||||
if ( ch < 0xD02 )return false;if ( ch < 0xD04 )return true;
|
||||
if ( ch < 0xD3E )return false;if ( ch < 0xD58 )return true;
|
||||
if ( ch < 0xD82 )return false;if ( ch < 0xD84 )return true;
|
||||
if ( ch < 0xDCA )return false;if ( ch < 0xDF4 )return true;
|
||||
return false;
|
||||
}
|
||||
if(ch < 0x1000)
|
||||
{
|
||||
if ( ch == 0xE31 )return true;
|
||||
|
||||
if ( ch < 0xE34 )return false;if ( ch < 0xE3B )return true;
|
||||
if ( ch < 0xE47 )return false;if ( ch < 0xE4F )return true;
|
||||
|
||||
if ( ch == 0xEB1 )return true;
|
||||
|
||||
if ( ch < 0xEB4 )return false;if ( ch < 0xEBD )return true;
|
||||
if ( ch < 0xEC8 )return false;if ( ch < 0xECE )return true;
|
||||
if ( ch < 0xF18 )return false;if ( ch < 0xF1A )return true;
|
||||
|
||||
if ( ch == 0xF35 )return true;
|
||||
if ( ch == 0xF37 )return true;
|
||||
if ( ch == 0xF39 )return true;
|
||||
|
||||
if ( ch < 0xF3E )return false;if ( ch < 0xF40 )return true;
|
||||
if ( ch < 0xF71 )return false;if ( ch < 0xF88 )
|
||||
{
|
||||
if ( ch == 0xF85 )return false;
|
||||
return true;
|
||||
}
|
||||
if ( ch < 0xF90 )return false;if ( ch < 0xFBD )return true;
|
||||
|
||||
if ( ch == 0xFC6 )return true;
|
||||
return false;
|
||||
}
|
||||
if ( ch < 0x1800 )
|
||||
{
|
||||
if ( ch < 0x102C )return false;if ( ch < 0x1040 )return true;
|
||||
if ( ch < 0x1056 )return false;if ( ch < 0x105A )return true;
|
||||
|
||||
if ( ch == 0x135F )return true;
|
||||
|
||||
if ( ch < 0x1712 )return false;if ( ch < 0x1715 )return true;
|
||||
if ( ch < 0x1732 )return false;if ( ch < 0x1735 )return true;
|
||||
if ( ch < 0x1752 )return false;if ( ch < 0x1754 )return true;
|
||||
if ( ch < 0x1772 )return false;if ( ch < 0x1774 )return true;
|
||||
if ( ch < 0x17B6 )return false;if ( ch < 0x17D4 )return true;
|
||||
|
||||
if ( ch == 0x17DD )return true;
|
||||
return false;
|
||||
}
|
||||
if(ch < 0x2000)
|
||||
{
|
||||
if ( ch < 0x180B )return false;if ( ch < 0x180E )return true;
|
||||
|
||||
if ( ch == 0x18A9 )return true;
|
||||
|
||||
if ( ch < 0x1920 )return false;if ( ch < 0x193C )return true;
|
||||
if ( ch < 0x19B0 )return false;if ( ch < 0x19C1 )return true;
|
||||
if ( ch < 0x19C8 )return false;if ( ch < 0x19CA )return true;
|
||||
if ( ch < 0x1A17 )return false;if ( ch < 0x1A1C )return true;
|
||||
if ( ch < 0x1B00 )return false;if ( ch < 0x1B05 )return true;
|
||||
if ( ch < 0x1B34 )return false;if ( ch < 0x1B45 )return true;
|
||||
if ( ch < 0x1B6B )return false;if ( ch < 0x1B74 )return true;
|
||||
if ( ch < 0x1DC0 )return false;if ( ch < 0x1E00 )return true;
|
||||
return false;
|
||||
}
|
||||
if ( ch < 0x20D0 )return false;if ( ch < 0x2100 )return true;
|
||||
if ( ch < 0x302A )return false;if ( ch < 0x3030 )return true;
|
||||
if ( ch < 0x3099 )return false;if ( ch < 0x309B )return true;
|
||||
|
||||
if ( ch == 0xA802 )return true;
|
||||
if ( ch == 0xA806 )return true;
|
||||
if ( ch == 0xA80B )return true;
|
||||
|
||||
if ( ch < 0xA823 )return false;if ( ch < 0xA828 )return true;
|
||||
|
||||
if ( ch == 0xFB1E )return true;
|
||||
|
||||
if ( ch < 0xFE00 )return false;if ( ch < 0xFE10 )return true;
|
||||
if ( ch < 0xFE20 )return false;if ( ch < 0xFE30 )return true;
|
||||
if ( ch < 0x10A01 )return false;if ( ch < 0x10A10 )return true;
|
||||
if ( ch < 0x10A38 )return false;if ( ch < 0x10A40 )return true;
|
||||
if ( ch < 0x1D165 )return false;if ( ch < 0x1D16A )return true;
|
||||
if ( ch < 0x1D16D )return false;if ( ch < 0x1D173 )return true;
|
||||
if ( ch < 0x1D17B )return false;if ( ch < 0x1D183 )return true;
|
||||
if ( ch < 0x1D185 )return false;if ( ch < 0x1D18C )return true;
|
||||
if ( ch < 0x1D1AA )return false;if ( ch < 0x1D1AE )return true;
|
||||
if ( ch < 0x1D242 )return false;if ( ch < 0x1D245 )return true;
|
||||
if ( ch < 0xE0100 )return false;if ( ch < 0xE01F0 )return true;
|
||||
return false;
|
||||
}
|
||||
#if defined(__GNUC__) && __GNUC__ >= 6
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class invalid_utf8_error : public error
|
||||
{
|
||||
public:
|
||||
invalid_utf8_error():error(EUTF8_TO_UTF32) {}
|
||||
};
|
||||
|
||||
inline const ustring convert_utf8_to_utf32 (
|
||||
const std::string& str
|
||||
)
|
||||
{
|
||||
using namespace unicode_helpers;
|
||||
ustring temp;
|
||||
std::istringstream sin(str);
|
||||
|
||||
temp.reserve(str.size());
|
||||
|
||||
int status;
|
||||
unichar ch;
|
||||
while ( (status = u8_to_u32(ch,sin)) > 0)
|
||||
temp.push_back(ch);
|
||||
|
||||
if (status < 0)
|
||||
throw invalid_utf8_error();
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
bool is_surrogate(unichar ch);
|
||||
|
||||
unichar surrogate_pair_to_unichar(unichar first, unichar second);
|
||||
|
||||
void unichar_to_surrogate_pair(unichar unicode, unichar &first, unichar &second);
|
||||
|
||||
|
||||
const ustring convert_wstring_to_utf32 (
|
||||
const std::wstring &wstr
|
||||
);
|
||||
|
||||
const std::wstring convert_utf32_to_wstring (
|
||||
const ustring &src
|
||||
);
|
||||
|
||||
const std::wstring convert_mbstring_to_wstring (
|
||||
const std::string &src
|
||||
);
|
||||
|
||||
const std::string convert_wstring_to_mbstring(
|
||||
const std::wstring &src
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename charT>
|
||||
class basic_utf8_ifstream : public std::basic_istream<charT>
|
||||
{
|
||||
public:
|
||||
|
||||
basic_utf8_ifstream (
|
||||
) : std::basic_istream<charT>(&buf), buf(fin) {}
|
||||
|
||||
basic_utf8_ifstream (
|
||||
const char* file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
) :
|
||||
std::basic_istream<charT>(&buf),
|
||||
buf(fin)
|
||||
{
|
||||
fin.open(file_name,mode);
|
||||
// make this have the same error state as fin
|
||||
this->clear(fin.rdstate());
|
||||
}
|
||||
|
||||
basic_utf8_ifstream (
|
||||
const std::string& file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
) :
|
||||
std::basic_istream<charT>(&buf),
|
||||
buf(fin)
|
||||
{
|
||||
fin.open(file_name.c_str(),mode);
|
||||
// make this have the same error state as fin
|
||||
this->clear(fin.rdstate());
|
||||
}
|
||||
|
||||
void open(
|
||||
const std::string& file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
)
|
||||
{
|
||||
open(file_name.c_str(),mode);
|
||||
}
|
||||
|
||||
void open (
|
||||
const char* file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
)
|
||||
{
|
||||
fin.close();
|
||||
fin.clear();
|
||||
fin.open(file_name,mode);
|
||||
// make this have the same error state as fin
|
||||
this->clear(fin.rdstate());
|
||||
}
|
||||
|
||||
void close (
|
||||
)
|
||||
{
|
||||
fin.close();
|
||||
// make this have the same error state as fin
|
||||
this->clear(fin.rdstate());
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::ifstream fin;
|
||||
unicode_helpers::basic_utf8_streambuf<charT> buf;
|
||||
};
|
||||
|
||||
typedef basic_utf8_ifstream<unichar> utf8_uifstream;
|
||||
typedef basic_utf8_ifstream<wchar_t> utf8_wifstream;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#ifdef NO_MAKEFILE
|
||||
#include "unicode.cpp"
|
||||
#endif
|
||||
|
||||
#endif // DLIB_UNICODe_H_
|
||||
|
||||
@@ -0,0 +1,233 @@
|
||||
// Copyright (C) 2007 Davis E. King (davis@dlib.net), and Nils Labugt
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_UNICODe_ABSTRACT_H_
|
||||
#ifdef DLIB_UNICODe_ABSTRACT_H_
|
||||
|
||||
#include "../uintn.h"
|
||||
#include "../error.h"
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
// a typedef for an unsigned 32bit integer to hold our UNICODE characters
|
||||
typedef uint32 unichar;
|
||||
|
||||
// a typedef for a string object to hold our UNICODE strings
|
||||
typedef std::basic_string<unichar> ustring;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
bool is_combining_char(
|
||||
const T ch_
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- if (ch_ is a unicode combining character) then
|
||||
- returns true
|
||||
- else
|
||||
- returns false
|
||||
!*/
|
||||
|
||||
bool is_surrogate(
|
||||
unichar ch
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- if (ch is a unicode surrogate character) then
|
||||
- returns true
|
||||
- else
|
||||
- returns false
|
||||
!*/
|
||||
|
||||
unichar surrogate_pair_to_unichar(
|
||||
unichar first,
|
||||
unichar second
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0xD800 <= first < 0xDC00
|
||||
- 0xDC00 <= second < 0xE000
|
||||
- is_surrogate(first) == true
|
||||
- is_surrogate(second) == true
|
||||
ensures
|
||||
- converts two surrogates into one unicode character
|
||||
!*/
|
||||
|
||||
void unichar_to_surrogate_pair(
|
||||
unichar ch,
|
||||
unichar& first,
|
||||
unichar& second
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- ch >= 0x10000 (i.e. is not in Basic Multilingual Plane)
|
||||
ensures
|
||||
- surrogate_pair_to_unichar(#first,#second) == ch
|
||||
(i.e. converts ch into two surrogate characters)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class invalid_utf8_error : public error
|
||||
{
|
||||
public:
|
||||
invalid_utf8_error():error(EUTF8_TO_UTF32) {}
|
||||
};
|
||||
|
||||
const ustring convert_utf8_to_utf32 (
|
||||
const std::string& str
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- if (str is a valid UTF-8 encoded string) then
|
||||
- returns a copy of str that has been converted into a
|
||||
unichar string
|
||||
- else
|
||||
- throws invalid_utf8_error
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const ustring convert_wstring_to_utf32 (
|
||||
const std::wstring &wstr
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- wstr is a valid UTF-16 string when sizeof(wchar_t) == 2
|
||||
- wstr is a valid UTF-32 string when sizeof(wchar_t) == 4
|
||||
ensures
|
||||
- converts wstr into UTF-32 string
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const std::wstring convert_utf32_to_wstring (
|
||||
const ustring &str
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- str is a valid UTF-32 encoded string
|
||||
ensures
|
||||
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
|
||||
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const std::wstring convert_mbstring_to_wstring (
|
||||
const std::string &str
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- str is a valid multibyte string whose encoding is same as current locale setting
|
||||
ensures
|
||||
- converts str into wstring whose encoding is UTF-16 when sizeof(wchar_t) == 2
|
||||
- converts str into wstring whose encoding is UTF-32 when sizeof(wchar_t) == 4
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const std::string convert_wstring_to_mbstring (
|
||||
const std::wstring &src
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- str is a valid wide character string string whose encoding is same as current
|
||||
locale setting
|
||||
ensures
|
||||
- returns a multibyte encoded version of the given string
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename charT
|
||||
>
|
||||
class basic_utf8_ifstream : public std::basic_istream<charT>
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object represents an input file stream much like the
|
||||
normal std::ifstream except that it knows how to read UTF-8
|
||||
data. So when you read characters out of this stream it will
|
||||
automatically convert them from the UTF-8 multibyte encoding
|
||||
into a fixed width wide character encoding.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
basic_utf8_ifstream (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- constructs an input stream that isn't yet associated with
|
||||
a file.
|
||||
!*/
|
||||
|
||||
basic_utf8_ifstream (
|
||||
const char* file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- tries to open the given file for reading by this stream
|
||||
- mode is interpreted exactly the same was as the open mode
|
||||
argument used by std::ifstream.
|
||||
!*/
|
||||
|
||||
basic_utf8_ifstream (
|
||||
const std::string& file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- tries to open the given file for reading by this stream
|
||||
- mode is interpreted exactly the same was as the open mode
|
||||
argument used by std::ifstream.
|
||||
!*/
|
||||
|
||||
void open(
|
||||
const std::string& file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- tries to open the given file for reading by this stream
|
||||
- mode is interpreted exactly the same was as the open mode
|
||||
argument used by std::ifstream.
|
||||
!*/
|
||||
|
||||
void open (
|
||||
const char* file_name,
|
||||
std::ios_base::openmode mode = std::ios::in
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- tries to open the given file for reading by this stream
|
||||
- mode is interpreted exactly the same was as the open mode
|
||||
argument used by std::ifstream.
|
||||
!*/
|
||||
|
||||
void close (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- any file opened by this stream has been closed
|
||||
!*/
|
||||
};
|
||||
|
||||
typedef basic_utf8_ifstream<unichar> utf8_uifstream;
|
||||
typedef basic_utf8_ifstream<wchar_t> utf8_wifstream;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_UNICODe_ABSTRACT_H_
|
||||
|
||||
|
||||
Reference in New Issue
Block a user