Blame src/u16_wcstombs.c

587b67
/*******************************************************************/
587b67
/*  u16ports: u16 variants of wide character string functions.     */
587b67
/*  Copyright (C) 2017  Z. Gilboa                                  */
587b67
/*  Released under the Standard MIT License; see COPYING.U16PORTS. */
587b67
/*******************************************************************/
587b67
587b67
#include <stdint.h>
587b67
#include <u16ports/u16ports.h>
587b67
587b67
/**
587b67
 *  scalar	nickname	utf-16		utf-8[0]  utf-8[1]  utf-8[2]  utf-8[3]
587b67
 *  ------	--------	--------	--------  --------  --------  --------
587b67
 *  00000000	7x		00000000	0xxxxxxx
587b67
 *  0xxxxxxx			0xxxxxxx
587b67
 *
587b67
 *  00000yyy	5y6x		00000yyy	110yyyyy  10xxxxxx
587b67
 *  yyxxxxxx			yyxxxxxx
587b67
 *
587b67
 *  zzzzyyyy	4z6y6x		zzzzyyyy	1110zzzz  10yyyyyy  10xxxxxx
587b67
 *  yyxxxxxx			yyxxxxxx
587b67
 *
587b67
 *  000uuuuu	5u4z6y6x	110110ww	11110uuu  10uuzzzz  10yyyyyy  10xxxxxx
587b67
 *  zzzzyyyy			wwzzzzyy
587b67
 *  yyxxxxxx			110111yy
587b67
 *				yyxxxxxx        (where wwww = uuuuu - 1)
587b67
 *
587b67
**/
587b67
587b67
size_t u16_wcstombs(char * s, const uint16_t * src, size_t n)
587b67
{
587b67
	size_t		ret;
587b67
	ssize_t		len;
587b67
	char *		dst;
587b67
	uint16_t	wx;
587b67
	uint16_t	wy;
587b67
	uint16_t	wz;
587b67
	uint16_t	ww;
587b67
	uint16_t	wy_low;
587b67
	uint16_t	wy_high;
587b67
	uint16_t	u_low;
587b67
	uint16_t	u_high;
587b67
	uint16_t	uuuuu;
587b67
587b67
	if (!s) {
587b67
		for (ret=0; *src; src++) {
587b67
			if (*src <= 0x7F)
587b67
				ret += 1;
587b67
587b67
			else if (*src <= 0x7FF)
587b67
				ret += 2;
587b67
587b67
			else if ((*src < 0xD800) || (*src >= 0xE000))
587b67
				ret += 3;
587b67
587b67
			else if (*src++ >= 0xDC00)
587b67
				return -1;
587b67
587b67
			else if (*src < 0xDC00)
587b67
				return -1;
587b67
587b67
			else if (*src >= 0xE000)
587b67
				return -1;
587b67
587b67
			else
587b67
				ret += 4;
587b67
		}
587b67
587b67
		return ret;
587b67
	}
587b67
587b67
	if ((len = (ssize_t)n) < 0)
587b67
		return -1;
587b67
587b67
	for (dst=s; *src; src++) {
587b67
		if (*src <= 0x7F) {
587b67
			*dst++ = (char)*src;
587b67
			len--;
587b67
587b67
		} else if (*src <= 0x7FF) {
587b67
			if ((len = len - 2) < 0)
587b67
				return -1;
587b67
587b67
			wy  = *src;
587b67
			wy >>= 6;
587b67
587b67
			wx  = *src;
587b67
			wx <<= 10;
587b67
			wx >>= 10;
587b67
587b67
			*dst++ = (char)(0xC0 | wy);
587b67
			*dst++ = (char)(0x80 | wx);
587b67
587b67
		} else if ((*src < 0xD800) || (*src >= 0xE000)) {
587b67
			if ((len = len - 3) < 0)
587b67
				return -1;
587b67
587b67
			wz  = *src;
587b67
			wz >>= 12;
587b67
587b67
			wy  = *src;
587b67
			wy <<= 4;
587b67
			wy >>= 10;
587b67
587b67
			wx  = *src;
587b67
			wx <<= 10;
587b67
			wx >>= 10;
587b67
587b67
			*dst++ = (char)(0xE0 | wz);
587b67
			*dst++ = (char)(0x80 | wy);
587b67
			*dst++ = (char)(0x80 | wx);
587b67
587b67
		} else if (src[0] >= 0xDC00) {
587b67
			return -1;
587b67
587b67
		} else if (src[1] < 0xDC00) {
587b67
			return -1;
587b67
587b67
		} else if (src[1] >= 0xE000) {
587b67
			return -1;
587b67
587b67
		} else {
587b67
			if ((len = len - 4) < 0)
587b67
				return -1;
587b67
587b67
			/* low two bytes */
587b67
			wy_high   = *src;
587b67
			wy_high <<= 14;
587b67
			wy_high >>= 10;
587b67
587b67
			wz   = *src;
587b67
			wz <<= 10;
587b67
			wz >>= 12;
587b67
587b67
			ww   = *src;
587b67
			ww <<= 6;
587b67
			ww >>= 12;
587b67
587b67
			/* (surrogate pair) */
587b67
			src++;
587b67
587b67
			/* high two bytes */
587b67
			wx   = *src;
587b67
			wx <<= 10;
587b67
			wx >>= 10;
587b67
587b67
			wy_low   = *src;
587b67
			wy_low <<= 6;
587b67
			wy_low >>= 12;
587b67
587b67
			/* uuuuu */
587b67
			uuuuu    = ww + 1;
587b67
			u_low    = uuuuu;
587b67
			u_low  >>= 2;
587b67
587b67
			u_high  = uuuuu;
587b67
			u_high <<= 14;
587b67
			u_high >>= 10;
587b67
587b67
			*dst++ = (char)(0xF0 | u_low);
587b67
			*dst++ = (char)(0x80 | u_high | wz);
587b67
			*dst++ = (char)(0x80 | wy_low | wy_high);
587b67
			*dst++ = (char)(0x80 | wx);
587b67
		}
587b67
587b67
		if (len == 0)
587b67
			return dst - s;
587b67
	}
587b67
587b67
	if (len)
587b67
		*dst = 0;
587b67
587b67
	return dst - s;
587b67
}