/*******************************************************************/
/* u16ports: u16 variants of wide character string functions. */
/* Copyright (C) 2017 Z. Gilboa */
/* Released under the Standard MIT License; see COPYING.U16PORTS. */
/*******************************************************************/
#include <stdint.h>
#include <u16ports/u16ports.h>
/**
* scalar nickname utf-16 utf-8[0] utf-8[1] utf-8[2] utf-8[3]
* ------ -------- -------- -------- -------- -------- --------
* 00000000 7x 00000000 0xxxxxxx
* 0xxxxxxx 0xxxxxxx
*
* 00000yyy 5y6x 00000yyy 110yyyyy 10xxxxxx
* yyxxxxxx yyxxxxxx
*
* zzzzyyyy 4z6y6x zzzzyyyy 1110zzzz 10yyyyyy 10xxxxxx
* yyxxxxxx yyxxxxxx
*
* 000uuuuu 5u4z6y6x 110110ww 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
* zzzzyyyy wwzzzzyy
* yyxxxxxx 110111yy
* yyxxxxxx (where wwww = uuuuu - 1)
*
**/
size_t u16_wcstombs(char * s, const uint16_t * src, size_t n)
{
size_t ret;
ssize_t len;
char * dst;
uint16_t wx;
uint16_t wy;
uint16_t wz;
uint16_t ww;
uint16_t wy_low;
uint16_t wy_high;
uint16_t u_low;
uint16_t u_high;
uint16_t uuuuu;
if (!s) {
for (ret=0; *src; src++) {
if (*src <= 0x7F)
ret += 1;
else if (*src <= 0x7FF)
ret += 2;
else if ((*src < 0xD800) || (*src >= 0xE000))
ret += 3;
else if (*src++ >= 0xDC00)
return -1;
else if (*src < 0xDC00)
return -1;
else if (*src >= 0xE000)
return -1;
else
ret += 4;
}
return ret;
}
if ((len = (ssize_t)n) < 0)
return -1;
for (dst=s; *src; src++) {
if (*src <= 0x7F) {
*dst++ = (char)*src;
len--;
} else if (*src <= 0x7FF) {
if ((len = len - 2) < 0)
return -1;
wy = *src;
wy >>= 6;
wx = *src;
wx <<= 10;
wx >>= 10;
*dst++ = (char)(0xC0 | wy);
*dst++ = (char)(0x80 | wx);
} else if ((*src < 0xD800) || (*src >= 0xE000)) {
if ((len = len - 3) < 0)
return -1;
wz = *src;
wz >>= 12;
wy = *src;
wy <<= 4;
wy >>= 10;
wx = *src;
wx <<= 10;
wx >>= 10;
*dst++ = (char)(0xE0 | wz);
*dst++ = (char)(0x80 | wy);
*dst++ = (char)(0x80 | wx);
} else if (src[0] >= 0xDC00) {
return -1;
} else if (src[1] < 0xDC00) {
return -1;
} else if (src[1] >= 0xE000) {
return -1;
} else {
if ((len = len - 4) < 0)
return -1;
/* low two bytes */
wy_high = *src;
wy_high <<= 14;
wy_high >>= 10;
wz = *src;
wz <<= 10;
wz >>= 12;
ww = *src;
ww <<= 6;
ww >>= 12;
/* (surrogate pair) */
src++;
/* high two bytes */
wx = *src;
wx <<= 10;
wx >>= 10;
wy_low = *src;
wy_low <<= 6;
wy_low >>= 12;
/* uuuuu */
uuuuu = ww + 1;
u_low = uuuuu;
u_low >>= 2;
u_high = uuuuu;
u_high <<= 14;
u_high >>= 10;
*dst++ = (char)(0xF0 | u_low);
*dst++ = (char)(0x80 | u_high | wz);
*dst++ = (char)(0x80 | wy_low | wy_high);
*dst++ = (char)(0x80 | wx);
}
if (len == 0)
return dst - s;
}
if (len)
*dst = 0;
return dst - s;
}