8 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 9 #define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 11 #include <boost/nowide/detail/utf.hpp> 13 #include <boost/cstdint.hpp> 14 #include <boost/static_assert.hpp> 21 BOOST_STATIC_ASSERT(
sizeof(std::mbstate_t) >= 2);
24 inline void copy_uint16_t(
void* dst,
const void* src)
26 unsigned char* cdst = static_cast<unsigned char*>(dst);
27 const unsigned char* csrc = static_cast<const unsigned char*>(src);
31 inline boost::uint16_t read_state(
const std::mbstate_t& src)
34 copy_uint16_t(&dst, &src);
37 inline void write_state(std::mbstate_t& dst,
const boost::uint16_t src)
39 copy_uint16_t(&dst, &src);
43 #if defined _MSC_VER && _MSC_VER < 1700 45 #define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 54 template<
typename CharType,
int CharSize = sizeof(CharType)>
58 template<
typename CharType>
59 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 2> :
public std::codecvt<CharType, char, std::mbstate_t>
62 BOOST_STATIC_ASSERT_MSG(
sizeof(CharType) >= 2,
"CharType must be able to store UTF16 code point");
64 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
68 typedef CharType uchar;
70 virtual std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const 72 if(detail::read_state(s) != 0)
73 return std::codecvt_base::error;
75 return std::codecvt_base::ok;
77 virtual int do_encoding()
const throw()
81 virtual int do_max_length()
const throw()
85 virtual bool do_always_noconv()
const throw()
90 virtual int do_length(std::mbstate_t
91 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
99 boost::uint16_t state = detail::read_state(std_state);
100 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 101 const char* save_from = from;
103 size_t save_max = max;
105 while(max > 0 && from < from_end)
107 const char* prev_from = from;
108 boost::uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
109 if(ch == detail::utf::illegal)
112 }
else if(ch == detail::utf::incomplete)
130 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 131 detail::write_state(std_state, state);
132 return static_cast<int>(from - save_from);
134 return static_cast<int>(save_max - max);
138 virtual std::codecvt_base::result do_in(std::mbstate_t& std_state,
140 const char* from_end,
141 const char*& from_next,
144 uchar*& to_next)
const 146 std::codecvt_base::result r = std::codecvt_base::ok;
153 boost::uint16_t state = detail::read_state(std_state);
154 while(to < to_end && from < from_end)
156 const char* from_saved = from;
158 uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
160 if(ch == detail::utf::illegal)
163 }
else if(ch == detail::utf::incomplete)
166 r = std::codecvt_base::partial;
172 *to++ = static_cast<CharType>(ch);
185 boost::uint16_t vh = static_cast<boost::uint16_t>(ch >> 10);
186 boost::uint16_t vl = ch & 0x3FF;
187 boost::uint16_t w1 = vh + 0xD800;
188 boost::uint16_t w2 = vl + 0xDC00;
192 *to++ = static_cast<CharType>(w1);
196 *to++ = static_cast<CharType>(w2);
203 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
204 r = std::codecvt_base::partial;
205 detail::write_state(std_state, state);
209 virtual std::codecvt_base::result do_out(std::mbstate_t& std_state,
211 const uchar* from_end,
212 const uchar*& from_next,
215 char*& to_next)
const 217 std::codecvt_base::result r = std::codecvt_base::ok;
225 boost::uint16_t state = detail::read_state(std_state);
226 while(to < to_end && from < from_end)
228 boost::uint32_t ch = 0;
234 boost::uint16_t w1 = state;
235 boost::uint16_t w2 = *from;
238 if(0xDC00 <= w2 && w2 <= 0xDFFF)
240 boost::uint16_t vh = w1 - 0xD800;
241 boost::uint16_t vl = w2 - 0xDC00;
242 ch = ((uint32_t(vh) << 10) | vl) + 0x10000;
250 if(0xD800 <= ch && ch <= 0xDBFF)
256 state = static_cast<boost::uint16_t>(ch);
259 }
else if(0xDC00 <= ch && ch <= 0xDFFF)
267 if(!detail::utf::is_valid_codepoint(ch))
269 r = std::codecvt_base::error;
272 int len = detail::utf::utf_traits<char>::width(ch);
273 if(to_end - to < len)
275 r = std::codecvt_base::partial;
278 to = detail::utf::utf_traits<char>::encode(ch, to);
284 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
285 r = std::codecvt_base::partial;
286 detail::write_state(std_state, state);
292 template<
typename CharType>
293 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 4> :
public std::codecvt<CharType, char, std::mbstate_t>
296 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
300 typedef CharType uchar;
302 virtual std::codecvt_base::result do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const 305 return std::codecvt_base::ok;
307 virtual int do_encoding()
const throw()
311 virtual int do_max_length()
const throw()
315 virtual bool do_always_noconv()
const throw()
320 virtual int do_length(std::mbstate_t
321 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
326 const char* from_end,
329 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 330 const char* start_from = from;
332 size_t save_max = max;
335 while(max > 0 && from < from_end)
337 const char* save_from = from;
338 boost::uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
339 if(ch == detail::utf::incomplete)
343 }
else if(ch == detail::utf::illegal)
349 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 350 return from - start_from;
352 return save_max - max;
356 virtual std::codecvt_base::result do_in(std::mbstate_t& ,
358 const char* from_end,
359 const char*& from_next,
362 uchar*& to_next)
const 364 std::codecvt_base::result r = std::codecvt_base::ok;
366 while(to < to_end && from < from_end)
368 const char* from_saved = from;
370 uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
372 if(ch == detail::utf::illegal)
375 }
else if(ch == detail::utf::incomplete)
377 r = std::codecvt_base::partial;
385 if(r == std::codecvt_base::ok && from != from_end)
386 r = std::codecvt_base::partial;
390 virtual std::codecvt_base::result do_out(std::mbstate_t& ,
392 const uchar* from_end,
393 const uchar*& from_next,
396 char*& to_next)
const 398 std::codecvt_base::result r = std::codecvt_base::ok;
399 while(to < to_end && from < from_end)
401 boost::uint32_t ch = 0;
403 if(!detail::utf::is_valid_codepoint(ch))
407 int len = detail::utf::utf_traits<char>::width(ch);
408 if(to_end - to < len)
410 r = std::codecvt_base::partial;
413 to = detail::utf::utf_traits<char>::encode(ch, to);
418 if(r == std::codecvt_base::ok && from != from_end)
419 r = std::codecvt_base::partial;
Definition: utf8_codecvt.hpp:55
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:16