Blame src/unicode/ntapi_uc_unicode_conversion_from_utf8.c

dd89bb
/********************************************************/
dd89bb
/*  ntapi: Native API core library                      */
59d585
/*  Copyright (C) 2013--2021  Z. Gilboa                 */
dd89bb
/*  Released under GPLv2 and GPLv3; see COPYING.NTAPI.  */
dd89bb
/********************************************************/
dd89bb
dd89bb
#include <psxtypes/psxtypes.h>
dd89bb
#include <ntapi/nt_status.h>
dd89bb
#include <ntapi/nt_unicode.h>
dd89bb
#include "ntapi_impl.h"
dd89bb
dd89bb
dd89bb
typedef struct ___two_bytes {
dd89bb
	unsigned char	low;
dd89bb
	unsigned char	high;
dd89bb
} __two_bytes;
dd89bb
dd89bb
dd89bb
typedef struct ___three_bytes {
dd89bb
	unsigned char	low;
dd89bb
	unsigned char	middle;
dd89bb
	unsigned char	high;
dd89bb
} __three_bytes;
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf8_to_utf16_handler_1byte_or_null_termination(nt_utf8_callback_args * args)
dd89bb
{
dd89bb
	/***************************/
dd89bb
	/* from: 0xxxxxxx          */
dd89bb
	/* to:   00000000 0xxxxxxx */
dd89bb
	/***************************/
dd89bb
dd89bb
	wchar16_t *	dst;
dd89bb
dd89bb
	if (args->dst >= args->dst_cap)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	dst  = (wchar16_t *)args->dst;
dd89bb
	*dst = *(args->src);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src++;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + sizeof(wchar16_t));
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += sizeof(wchar16_t);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf8_to_utf16_handler_2bytes(nt_utf8_callback_args * args)
dd89bb
{
dd89bb
	/***************************/
dd89bb
	/* from: 110yyyyy 10xxxxxx */
dd89bb
	/* to:   00000yyy yyxxxxxx */
dd89bb
	/***************************/
dd89bb
dd89bb
	__two_bytes *		src; /* big endian */
dd89bb
	wchar16_t *		dst;
dd89bb
dd89bb
	if (args->dst >= args->dst_cap)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src	= (__two_bytes *)args->src;
dd89bb
	dst	= (wchar16_t *)args->dst;
dd89bb
dd89bb
	/* yyyyy */
dd89bb
	*dst   = (src->low ^ 0xC0);
dd89bb
	*dst <<= 6;
dd89bb
dd89bb
	/* xxxxxx */
dd89bb
	*dst |= (src->high  ^ 0x80);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src += 2;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + sizeof(wchar16_t));
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += sizeof(wchar16_t);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf8_to_utf16_handler_3bytes(nt_utf8_callback_args * args)
dd89bb
{
dd89bb
	/************************************/
dd89bb
	/* from: 1110zzzz 10yyyyyy 10xxxxxx */
dd89bb
	/* to:   zzzzyyyy yyxxxxxx          */
dd89bb
	/************************************/
dd89bb
dd89bb
	__three_bytes *		src; /* big endian */
dd89bb
	wchar16_t *		dst;
dd89bb
	wchar16_t		yyyyy;
dd89bb
dd89bb
	if (args->dst >= args->dst_cap)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src	= (__three_bytes *)args->src;
dd89bb
	dst	= (wchar16_t *)args->dst;
dd89bb
dd89bb
	/* zzzz */
dd89bb
	*dst   = (src->low ^ 0xE0);
dd89bb
	*dst <<= 12;
dd89bb
dd89bb
	/* yyyyy */
dd89bb
	yyyyy   = (src->middle ^ 0x80);
dd89bb
	yyyyy <<= 6;
dd89bb
	*dst |= yyyyy;
dd89bb
dd89bb
	/* xxxxxx */
dd89bb
	*dst |= (src->high ^ 0x80);
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src += 3;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + sizeof(wchar16_t));
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += sizeof(wchar16_t);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __utf8_to_utf16_handler_4bytes(nt_utf8_callback_args * args)
dd89bb
{
dd89bb
	/*************************************************/
dd89bb
	/* from: 11110uuu  10uuzzzz  10yyyyyy  10xxxxxx  */
dd89bb
	/* to:   110110ww  wwzzzzyy  110111yy  yyxxxxxx  */
dd89bb
	/*************************************************/
dd89bb
dd89bb
	__two_bytes *		src_low;	/* big endian */
dd89bb
	__two_bytes *		src_high;	/* big endian */
dd89bb
	wchar16_t *		dst_lead;
dd89bb
	wchar16_t *		dst_trail;
dd89bb
880eca
	wchar16_t		wwww;
880eca
	wchar16_t		lead;
880eca
	wchar16_t		trail;
dd89bb
	unsigned char		ulow;
dd89bb
	unsigned char		uhigh;
880eca
	unsigned char		yy;
dd89bb
	unsigned char		yyyy;
880eca
	unsigned char		zzzz;
dd89bb
dd89bb
	dst_lead = dst_trail = (wchar16_t *)args->dst;
dd89bb
	dst_trail++;
dd89bb
dd89bb
	if ((uintptr_t)dst_trail >= (uintptr_t)args->dst_cap)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	src_low	= src_high = (__two_bytes *)args->src;
dd89bb
	src_high++;
dd89bb
880eca
	/* uuuuu */
dd89bb
	ulow	= src_low->low  ^ 0xF0;
dd89bb
	uhigh	= src_low->high ^ 0x80;
dd89bb
dd89bb
	ulow  <<= 2;
dd89bb
	uhigh >>= 4;
dd89bb
880eca
	/* wwww */
880eca
	wwww   = (ulow | uhigh) - 1;
880eca
	wwww <<= 6;
dd89bb
dd89bb
	/* 110110ww wwzzzzyy */
880eca
	yy     = src_high->low ^ 0x80;
880eca
	yy   >>= 4;
880eca
880eca
	zzzz   = src_low->high;
880eca
	zzzz <<= 4;
880eca
	zzzz >>= 2;
880eca
880eca
	lead   = 0xD800;
880eca
	lead  |= wwww;
880eca
	lead  |= zzzz;
880eca
	lead  |= yy;
dd89bb
dd89bb
	/* 110111yy  yyxxxxxx */
880eca
	yyyy   = src_high->low << 4;
880eca
	trail  = yyyy << 2;
880eca
	trail |= src_high->high ^ 0x80;
880eca
	trail |= 0xDC00;
880eca
880eca
	/* write */
880eca
	*dst_lead  = lead;
880eca
	*dst_trail = trail;
dd89bb
dd89bb
	/* advance source and destination buffer */
dd89bb
	args->src += 4;
dd89bb
	args->dst = (void *)((uintptr_t)(args->dst) + (2 * sizeof(wchar16_t)));
dd89bb
dd89bb
	/* bytes_written */
dd89bb
	args->bytes_written += 2 * sizeof(wchar16_t);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
dd89bb
static int32_t __fastcall __update_stream_leftover_info_utf8(
dd89bb
	__in_out	nt_unicode_conversion_params_utf8_to_utf16 *	params)
dd89bb
{
dd89bb
	int32_t		status;
dd89bb
	ptrdiff_t	offset;
dd89bb
	unsigned char *	utf8;
dd89bb
dd89bb
	offset	= (uintptr_t)params->src + (uintptr_t)params->src_size_in_bytes - (uintptr_t)params->addr_failed;
dd89bb
	utf8	= (unsigned char *)params->addr_failed;
dd89bb
dd89bb
	/* default status */
dd89bb
	status	= NT_STATUS_ILLEGAL_CHARACTER;
dd89bb
dd89bb
	if (offset == 1) {
dd89bb
		if ((utf8[0] >= 0xC2) && (utf8[0] <= 0xF4)) {
dd89bb
			/* one leftover byte */
dd89bb
			params->leftover_count = 1;
dd89bb
			params->leftover_bytes = utf8[0];
dd89bb
			params->leftover_bytes <<= 24;
dd89bb
			status = NT_STATUS_SUCCESS;
dd89bb
		}
dd89bb
	} else 	if (offset == 2) {
dd89bb
		if /* ------- */  (((utf8[0] == 0xE0) &&                      (utf8[1] >= 0xA0) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] >= 0xE1) && (utf8[0] <= 0xEC) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] == 0xED) &&                      (utf8[1] >= 0x80) && (utf8[1] <= 0x9F))
dd89bb
				|| ((utf8[0] >= 0xEE) && (utf8[0] <= 0xEF) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] == 0xF0) &&                      (utf8[1] >= 0x90) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] >= 0xF1) && (utf8[0] <= 0xF3) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] == 0xF4) &&                      (utf8[1] >= 0x80) && (utf8[1] <= 0x8F))) {
dd89bb
			/* two leftover bytes */
dd89bb
			params->leftover_count = 2;
dd89bb
			params->leftover_bytes = utf8[0];
dd89bb
			params->leftover_bytes <<= 8;
dd89bb
			params->leftover_bytes += utf8[1];
dd89bb
			params->leftover_bytes <<= 16;
dd89bb
			status = NT_STATUS_SUCCESS;
dd89bb
		}
dd89bb
	} else if (offset == 3) {
dd89bb
		if /* ------- */  (((utf8[0] == 0xF0) &&                      (utf8[1] >= 0x90) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] >= 0xF1) && (utf8[0] <= 0xF3) && (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
dd89bb
				|| ((utf8[0] == 0xF4) &&                      (utf8[1] >= 0x80) && (utf8[1] <= 0x8F))) {
dd89bb
			/* three leftover bytes */
dd89bb
			params->leftover_count = 3;
dd89bb
			params->leftover_bytes = utf8[0];
dd89bb
			params->leftover_bytes <<= 8;
dd89bb
			params->leftover_bytes += utf8[1];
dd89bb
			params->leftover_bytes <<= 8;
dd89bb
			params->leftover_bytes += utf8[2];
dd89bb
			params->leftover_bytes <<= 8;
dd89bb
			status = NT_STATUS_SUCCESS;
dd89bb
		}
dd89bb
	}
dd89bb
dd89bb
	if (status != NT_STATUS_SUCCESS) {
dd89bb
		params->leftover_count = 0;
dd89bb
		params->leftover_bytes = 0;
dd89bb
	}
dd89bb
dd89bb
	return status;
dd89bb
}
dd89bb
dd89bb
int32_t __stdcall 	__ntapi_uc_convert_unicode_stream_utf8_to_utf16(
dd89bb
	__in_out	nt_unicode_conversion_params_utf8_to_utf16 *	params)
dd89bb
{
dd89bb
	int32_t 			status;
dd89bb
	nt_utf8_callback_args		args;
dd89bb
	ntapi_uc_utf8_callback_fn *	callback_fn[5];
dd89bb
dd89bb
	callback_fn[0] = (ntapi_uc_utf8_callback_fn *)__utf8_to_utf16_handler_1byte_or_null_termination;
dd89bb
	callback_fn[1] = (ntapi_uc_utf8_callback_fn *)__utf8_to_utf16_handler_1byte_or_null_termination;
dd89bb
	callback_fn[2] = (ntapi_uc_utf8_callback_fn *)__utf8_to_utf16_handler_2bytes;
dd89bb
	callback_fn[3] = (ntapi_uc_utf8_callback_fn *)__utf8_to_utf16_handler_3bytes;
dd89bb
	callback_fn[4] = (ntapi_uc_utf8_callback_fn *)__utf8_to_utf16_handler_4bytes;
dd89bb
dd89bb
	args.src		= params->src;
dd89bb
	args.dst		= params->dst;
dd89bb
	args.dst_cap		= (void *)((uintptr_t)(params->dst) + (params->dst_size_in_bytes));
dd89bb
	args.bytes_written	= params->bytes_written;
dd89bb
dd89bb
	status = __ntapi_uc_validate_unicode_stream_utf8(
dd89bb
		params->src,
dd89bb
		params->src_size_in_bytes,
dd89bb
		&params->code_points,
dd89bb
		&params->addr_failed,
dd89bb
		callback_fn,
dd89bb
		&args);
dd89bb
dd89bb
	params->bytes_written = args.bytes_written;
dd89bb
56548d
	switch (status) {
56548d
		case NT_STATUS_SUCCESS:
0e7864
			params->addr_failed    = 0;
0e7864
			params->leftover_bytes = 0;
0e7864
			params->leftover_count = 0;
0e7864
			return status;
0e7864
56548d
		case NT_STATUS_BUFFER_TOO_SMALL:
0e7864
			params->addr_failed    = args.src;
0e7864
			params->leftover_bytes = 0;
0e7864
			params->leftover_count = 0;
56548d
			return status;
56548d
56548d
		default:
56548d
			status = __update_stream_leftover_info_utf8(params);
56548d
	}
dd89bb
dd89bb
	/* (optimized out on 32-bit architectures) */
dd89bb
	params->leftover_bytes <<= (8 * (sizeof(uintptr_t) - sizeof(uint32_t)));
dd89bb
dd89bb
	return status;
dd89bb
}
dd89bb
dd89bb
dd89bb
int32_t __stdcall 	__ntapi_uc_convert_unicode_stream_utf8_to_utf32(
dd89bb
	__in_out	nt_unicode_conversion_params_utf8_to_utf32 *	params)
dd89bb
{
c713d8
	(void)params;
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}