Blob Blame History Raw
/*******************************************************************/
/*  u16ports: u16 variants of wide character string functions.     */
/*  Copyright (C) 2017  Z. Gilboa                                  */
/*  Released under the Standard MIT License; see COPYING.U16PORTS. */
/*******************************************************************/

#include <stdint.h>
#include <u16ports/u16ports.h>

/**
 *  scalar	nickname	utf-16		utf-8[0]  utf-8[1]  utf-8[2]  utf-8[3]
 *  ------	--------	--------	--------  --------  --------  --------
 *  00000000	7x		00000000	0xxxxxxx
 *  0xxxxxxx			0xxxxxxx
 *
 *  00000yyy	5y6x		00000yyy	110yyyyy  10xxxxxx
 *  yyxxxxxx			yyxxxxxx
 *
 *  zzzzyyyy	4z6y6x		zzzzyyyy	1110zzzz  10yyyyyy  10xxxxxx
 *  yyxxxxxx			yyxxxxxx
 *
 *  000uuuuu	5u4z6y6x	110110ww	11110uuu  10uuzzzz  10yyyyyy  10xxxxxx
 *  zzzzyyyy			wwzzzzyy
 *  yyxxxxxx			110111yy
 *				yyxxxxxx        (where wwww = uuuuu - 1)
 *
**/

size_t u16_wcstombs(char * s, const uint16_t * src, size_t n)
{
	size_t		ret;
	ssize_t		len;
	char *		dst;
	uint16_t	wx;
	uint16_t	wy;
	uint16_t	wz;
	uint16_t	ww;
	uint16_t	wy_low;
	uint16_t	wy_high;
	uint16_t	u_low;
	uint16_t	u_high;
	uint16_t	uuuuu;

	if (!s) {
		for (ret=0; *src; src++) {
			if (*src <= 0x7F)
				ret += 1;

			else if (*src <= 0x7FF)
				ret += 2;

			else if ((*src < 0xD800) || (*src >= 0xE000))
				ret += 3;

			else if (*src++ >= 0xDC00)
				return -1;

			else if (*src < 0xDC00)
				return -1;

			else if (*src >= 0xE000)
				return -1;

			else
				ret += 4;
		}

		return ret;
	}

	if ((len = (ssize_t)n) < 0)
		return -1;

	for (dst=s; *src; src++) {
		if (*src <= 0x7F) {
			*dst++ = (char)*src;
			len--;

		} else if (*src <= 0x7FF) {
			if ((len = len - 2) < 0)
				return -1;

			wy  = *src;
			wy >>= 6;

			wx  = *src;
			wx <<= 10;
			wx >>= 10;

			*dst++ = (char)(0xC0 | wy);
			*dst++ = (char)(0x80 | wx);

		} else if ((*src < 0xD800) || (*src >= 0xE000)) {
			if ((len = len - 3) < 0)
				return -1;

			wz  = *src;
			wz >>= 12;

			wy  = *src;
			wy <<= 4;
			wy >>= 10;

			wx  = *src;
			wx <<= 10;
			wx >>= 10;

			*dst++ = (char)(0xE0 | wz);
			*dst++ = (char)(0x80 | wy);
			*dst++ = (char)(0x80 | wx);

		} else if (src[0] >= 0xDC00) {
			return -1;

		} else if (src[1] < 0xDC00) {
			return -1;

		} else if (src[1] >= 0xE000) {
			return -1;

		} else {
			if ((len = len - 4) < 0)
				return -1;

			/* low two bytes */
			wy_high   = *src;
			wy_high <<= 14;
			wy_high >>= 10;

			wz   = *src;
			wz <<= 10;
			wz >>= 12;

			ww   = *src;
			ww <<= 6;
			ww >>= 12;

			/* (surrogate pair) */
			src++;

			/* high two bytes */
			wx   = *src;
			wx <<= 10;
			wx >>= 10;

			wy_low   = *src;
			wy_low <<= 6;
			wy_low >>= 12;

			/* uuuuu */
			uuuuu    = ww + 1;
			u_low    = uuuuu;
			u_low  >>= 2;

			u_high  = uuuuu;
			u_high <<= 14;
			u_high >>= 10;

			*dst++ = (char)(0xF0 | u_low);
			*dst++ = (char)(0x80 | u_high | wz);
			*dst++ = (char)(0x80 | wy_low | wy_high);
			*dst++ = (char)(0x80 | wx);
		}

		if (len == 0)
			return dst - s;
	}

	if (len)
		*dst = 0;

	return dst - s;
}