Blame src/unicode/ntapi_uc_unicode_conversion_from_utf16.c

dd89bb
/********************************************************/
dd89bb
/*  ntapi: Native API core library                      */
59d585
/*  Copyright (C) 2013--2021  Z. Gilboa                 */
dd89bb
/*  Released under GPLv2 and GPLv3; see COPYING.NTAPI.  */
dd89bb
/********************************************************/
dd89bb
dd89bb
#include <psxtypes/psxtypes.h>
dd89bb
#include <ntapi/nt_status.h>
dd89bb
#include <ntapi/nt_unicode.h>
dd89bb
#include "ntapi_impl.h"
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf16_to_utf8_handler_1byte_or_null_termination(nt_utf16_callback_args * args)
dd89bb
{
dd89bb
	/*******************************************/
dd89bb
	/* from: 00000000 0xxxxxxx (little endian) */
dd89bb
	/* to:   0xxxxxxx          (utf-8)         */
dd89bb
	/*******************************************/
dd89bb
dd89bb
	uint8_t * dst;
dd89bb
dd89bb
	if (args->dst >= args->dst_cap)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	dst  = (uint8_t *)args->dst;
dd89bb
	*dst = *(uint8_t *)(args->src);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src++;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + 1);
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written++;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf16_to_utf8_handler_2bytes(nt_utf16_callback_args * args)
dd89bb
{
dd89bb
	/*******************************************/
dd89bb
	/* from: 00000yyy yyxxxxxx (little endian) */
dd89bb
	/* to:   110yyyyy 10xxxxxx (utf-8)         */
dd89bb
	/*******************************************/
dd89bb
dd89bb
	const wchar16_t * src;
dd89bb
	uint8_t *	  dst;
dd89bb
dd89bb
	wchar16_t	wx;
dd89bb
	wchar16_t	wy;
dd89bb
dd89bb
	if ((uintptr_t)(args->dst) + 1 >= (uintptr_t)(args->dst_cap))
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src = args->src;
dd89bb
	dst = (uint8_t *)args->dst;
dd89bb
dd89bb
	wy  = *src;
dd89bb
	wy >>= 6;
dd89bb
dd89bb
	wx  = *src;
dd89bb
	wx <<= 10;
dd89bb
	wx >>= 10;
dd89bb
dd89bb
	/* write the y part */
dd89bb
	*dst = (char)(0xC0 | wy);
dd89bb
	dst++;
dd89bb
dd89bb
	/* write the x part */
dd89bb
	*dst = (char)(0x80 | wx);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src++;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + 2);
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += 2;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf16_to_utf8_handler_3bytes(nt_utf16_callback_args * args)
dd89bb
{
dd89bb
	/********************************************/
dd89bb
	/* from: zzzzyyyy yyxxxxxx (little endian)  */
dd89bb
	/* to:   1110zzzz 10yyyyyy 10xxxxxx (utf-8) */
dd89bb
	/********************************************/
dd89bb
dd89bb
	const wchar16_t * src;
dd89bb
	uint8_t *	  dst;
dd89bb
dd89bb
	wchar16_t	wx;
dd89bb
	wchar16_t	wy;
dd89bb
	wchar16_t	wz;
dd89bb
dd89bb
	if ((uintptr_t)(args->dst) + 2 >= (uintptr_t)(args->dst_cap))
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src = args->src;
dd89bb
	dst = (uint8_t *)args->dst;
dd89bb
dd89bb
	wz  = *src;
dd89bb
	wz >>= 12;
dd89bb
dd89bb
	wy  = *src;
dd89bb
	wy <<= 4;
dd89bb
	wy >>= 10;
dd89bb
dd89bb
	wx  = *src;
dd89bb
	wx <<= 10;
dd89bb
	wx >>= 10;
dd89bb
dd89bb
	/* write the z part */
dd89bb
	*dst = (char)(0xE0 | wz);
dd89bb
	dst++;
dd89bb
dd89bb
	/* write the y part */
dd89bb
	*dst = (char)(0x80 | wy);
dd89bb
	dst++;
dd89bb
dd89bb
	/* write the x part */
dd89bb
	*dst = (char)(0x80 | wx);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src++;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + 3);
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += 3;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf16_to_utf8_handler_4bytes(nt_utf16_callback_args * args)
dd89bb
{
dd89bb
	/****************************************************************/
dd89bb
	/* from: 110110ww  wwzzzzyy  110111yy  yyxxxxxx (little endian) */
dd89bb
	/* to:   11110uuu  10uuzzzz  10yyyyyy  10xxxxxx (utf-8)         */
dd89bb
	/****************************************************************/
dd89bb
dd89bb
	const wchar16_t * src;
dd89bb
	uint8_t *	  dst;
dd89bb
dd89bb
	wchar16_t	wx;
dd89bb
	wchar16_t	wz;
dd89bb
dd89bb
	wchar16_t	wy_low;
dd89bb
	wchar16_t	wy_high;
dd89bb
	wchar16_t	ww;
dd89bb
	wchar16_t	uuuuu;
dd89bb
	wchar16_t	u_low;
dd89bb
	wchar16_t	u_high;
dd89bb
dd89bb
	if ((uintptr_t)(args->dst) + 3 >= (uintptr_t)(args->dst_cap))
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src = args->src;
dd89bb
	dst = (uint8_t *)args->dst;
dd89bb
dd89bb
	/* low two bytes */
13e6f2
	wy_high   = *src;
dd89bb
	wy_high <<= 14;
dd89bb
	wy_high >>= 10;
dd89bb
13e6f2
	wz   = *src;
dd89bb
	wz <<= 10;
dd89bb
	wz >>= 12;
dd89bb
13e6f2
	ww   = *src;
dd89bb
	ww <<= 6;
dd89bb
	ww >>= 12;
dd89bb
13e6f2
	/* (surrogate pair) */
13e6f2
	src++;
13e6f2
13e6f2
	/* high two bytes */
13e6f2
	wx   = *src;
13e6f2
	wx <<= 10;
13e6f2
	wx >>= 10;
13e6f2
13e6f2
	wy_low   = *src;
13e6f2
	wy_low <<= 6;
13e6f2
	wy_low >>= 12;
13e6f2
13e6f2
	/* uuuuu */
13e6f2
	uuuuu    = ww + 1;
13e6f2
	u_low    = uuuuu;
13e6f2
	u_low  >>= 2;
13e6f2
13e6f2
	u_high  = uuuuu;
13e6f2
	u_high <<= 14;
13e6f2
	u_high >>= 10;
dd89bb
dd89bb
	/* 1st byte: 11110uuu */
13e6f2
	*dst = (char)(0xF0 | u_low);
dd89bb
	dst++;
dd89bb
dd89bb
	/* 2nd byte: 10uuzzzz */
13e6f2
	*dst = (char)(0x80 | u_high | wz);
dd89bb
	dst++;
dd89bb
dd89bb
	/* 3rd byte: 10yyyyyy */
dd89bb
	*dst = (char)(0x80 | wy_low | wy_high);
dd89bb
	dst++;
dd89bb
dd89bb
	/* 4th byte: 10xxxxxx */
dd89bb
	*dst = (char)(0x80 | wx);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src += 2;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + 4);
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += 4;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __update_stream_leftover_info_utf16(
dd89bb
	__in_out	nt_unicode_conversion_params_utf16_to_utf8 *	params)
dd89bb
{
dd89bb
	int32_t		status;
dd89bb
	ptrdiff_t	offset;
dd89bb
	wchar16_t *	wlead;
dd89bb
dd89bb
	offset	= (uintptr_t)params->src + (uintptr_t)params->src_size_in_bytes - (uintptr_t)params->addr_failed;
dd89bb
	wlead	= (wchar16_t *)params->addr_failed;
dd89bb
dd89bb
dd89bb
	if ((offset == 2) && (*wlead >= 0xD800) && (*wlead < 0xDC00)) {
dd89bb
			/* possibly the lead of a surrogate pair lead */
dd89bb
			params->leftover_count = 2;
dd89bb
			params->leftover_bytes = *wlead;
dd89bb
			params->leftover_bytes <<= 16;
dd89bb
			status = NT_STATUS_SUCCESS;
dd89bb
	} else {
dd89bb
		params->leftover_count = 0;
dd89bb
		params->leftover_bytes = 0;
dd89bb
		status	= NT_STATUS_ILLEGAL_CHARACTER;
dd89bb
	}
dd89bb
dd89bb
	return status;
dd89bb
}
dd89bb
dd89bb
dd89bb
int32_t __stdcall 	__ntapi_uc_convert_unicode_stream_utf16_to_utf8(
dd89bb
	__in_out	nt_unicode_conversion_params_utf16_to_utf8 *	params)
dd89bb
{
dd89bb
	int32_t 			status;
dd89bb
	nt_utf16_callback_args		args;
dd89bb
	ntapi_uc_utf16_callback_fn *	callback_fn[5];
dd89bb
dd89bb
	callback_fn[0] = (ntapi_uc_utf16_callback_fn *)__utf16_to_utf8_handler_1byte_or_null_termination;
dd89bb
	callback_fn[1] = (ntapi_uc_utf16_callback_fn *)__utf16_to_utf8_handler_1byte_or_null_termination;
dd89bb
	callback_fn[2] = (ntapi_uc_utf16_callback_fn *)__utf16_to_utf8_handler_2bytes;
dd89bb
	callback_fn[3] = (ntapi_uc_utf16_callback_fn *)__utf16_to_utf8_handler_3bytes;
dd89bb
	callback_fn[4] = (ntapi_uc_utf16_callback_fn *)__utf16_to_utf8_handler_4bytes;
dd89bb
dd89bb
	args.src		= params->src;
dd89bb
	args.dst		= params->dst;
dd89bb
	args.dst_cap		= (void *)((uintptr_t)(params->dst) + (params->dst_size_in_bytes));
dd89bb
	args.bytes_written	= params->bytes_written;
dd89bb
dd89bb
	status = __ntapi_uc_validate_unicode_stream_utf16(
dd89bb
		params->src,
dd89bb
		params->src_size_in_bytes,
dd89bb
		&params->code_points,
dd89bb
		&params->addr_failed,
dd89bb
		callback_fn,
dd89bb
		&args);
dd89bb
dd89bb
	params->bytes_written = args.bytes_written;
dd89bb
56548d
	switch (status) {
56548d
		case NT_STATUS_SUCCESS:
0e7864
			params->addr_failed    = 0;
0e7864
			params->leftover_bytes = 0;
0e7864
			params->leftover_count = 0;
0e7864
			return status;
0e7864
56548d
		case NT_STATUS_BUFFER_TOO_SMALL:
0e7864
			params->addr_failed    = args.src;
0e7864
			params->leftover_bytes = 0;
0e7864
			params->leftover_count = 0;
56548d
			return status;
dd89bb
56548d
		default:
56548d
			status = __update_stream_leftover_info_utf16(params);
56548d
	}
56548d
56548d
	/* (optimized out on 32-bit architectures) */
dd89bb
	params->leftover_bytes <<= (8 * (sizeof(uintptr_t) - sizeof(uint32_t)));
dd89bb
dd89bb
	return status;
dd89bb
}
dd89bb
dd89bb
dd89bb
int32_t __stdcall 	__ntapi_uc_convert_unicode_stream_utf16_to_utf32(
dd89bb
	__in_out	nt_unicode_conversion_params_utf16_to_utf32 *	params)
dd89bb
{
c713d8
	(void)params;
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}