Blame src/argv/ntapi_tt_array_utf16.c

dd89bb
/********************************************************/
dd89bb
/*  ntapi: Native API core library                      */
59d585
/*  Copyright (C) 2013--2021  Z. Gilboa                 */
dd89bb
/*  Released under GPLv2 and GPLv3; see COPYING.NTAPI.  */
dd89bb
/********************************************************/
dd89bb
dd89bb
#include <psxtypes/psxtypes.h>
dd89bb
#include <pemagine/pemagine.h>
dd89bb
#include <ntapi/nt_argv.h>
dd89bb
#include <ntapi/ntapi.h>
dd89bb
#include "ntapi_impl.h"
dd89bb
dd89bb
/**
dd89bb
 * scenario: program -e app [arg1 arg2 ... argn]
dd89bb
 * input:    a utf-16 argument vector
dd89bb
 * output:   a utf-16 cmd_line string
dd89bb
 * example:  tty_pipe_create_child_process
dd89bb
**/
dd89bb
dd89bb
int32_t __stdcall __ntapi_tt_array_copy_utf16(
dd89bb
	__out	int *			argc,
dd89bb
	__in	const wchar16_t **	wargv,
dd89bb
	__in	const wchar16_t **	wenvp,
112cac
	__in	const wchar16_t *	interp,
112cac
	__in	const wchar16_t *	optarg,
112cac
	__in	const wchar16_t *	script,
dd89bb
	__in	void *			base,
dd89bb
	__out	void *			buffer,
dd89bb
	__in	size_t			buflen,
dd89bb
	__out	size_t *		blklen)
dd89bb
{
dd89bb
	const wchar16_t **	parg;
dd89bb
	const wchar16_t *	warg;
112cac
	const wchar16_t *	mark;
dd89bb
	wchar16_t *		wch;
dd89bb
	ptrdiff_t		diff;
dd89bb
	ptrdiff_t		ptrs;
dd89bb
	size_t			needed;
112cac
	const wchar16_t *	dummy[2] = {0,0};
c713d8
dd89bb
	/* fallback */
112cac
	wargv = wargv ? wargv : dummy;
112cac
	wenvp = wenvp ? wenvp : dummy;
dd89bb
dd89bb
	/* ptrs, needed */
dd89bb
	ptrs   = 0;
dd89bb
	needed = 0;
dd89bb
112cac
	/* interp */
112cac
	if (interp) {
112cac
		ptrs++;
112cac
		needed += sizeof(wchar16_t *)
112cac
			+ __ntapi->tt_string_null_offset_short((const int16_t *)interp)
112cac
			+ sizeof(wchar16_t);
112cac
	}
112cac
112cac
	/* optarg */
112cac
	if (interp) {
112cac
		ptrs++;
112cac
		needed += sizeof(wchar16_t *)
112cac
			+ __ntapi->tt_string_null_offset_short((const int16_t *)optarg)
112cac
			+ sizeof(wchar16_t);
112cac
	}
112cac
112cac
	/* script / wargv[0] */
112cac
	 if ((mark = script ? script : wargv[0])) {
dd89bb
		ptrs++;
dd89bb
		needed += sizeof(wchar16_t *)
112cac
			+ __ntapi->tt_string_null_offset_short((const int16_t *)mark)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
	}
dd89bb
112cac
	/* wargv */
112cac
	for (parg=&wargv[1]; *parg; parg++)
dd89bb
		needed += sizeof(wchar16_t *)
dd89bb
			+ __ntapi->tt_string_null_offset_short((const int16_t *)*parg)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
112cac
	ptrs += (parg - &wargv[1]);
dd89bb
	*argc = (int)ptrs;
dd89bb
112cac
	/* wenvp */
dd89bb
	for (parg=wenvp; *parg; parg++)
dd89bb
		needed += sizeof(wchar16_t *)
dd89bb
			+ __ntapi->tt_string_null_offset_short((const int16_t *)*parg)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
dd89bb
	ptrs += (parg - wenvp);
dd89bb
112cac
	ptrs   += 2;
112cac
	needed += 2*sizeof(wchar16_t *);
dd89bb
	blklen  = blklen ? blklen : &needed;
dd89bb
	*blklen = needed;
dd89bb
dd89bb
	if (buflen < needed)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	/* init */
dd89bb
	parg = (const wchar16_t **)buffer;
dd89bb
	wch  = (wchar16_t *)(parg+ptrs);
dd89bb
	diff = (uintptr_t)base / sizeof(wchar16_t);
dd89bb
112cac
	/* interp */
112cac
	if (interp) {
112cac
		*parg++ = wch-diff;
112cac
		for (warg=interp; *warg; warg++,wch++)
112cac
			*wch = *warg;
112cac
		*wch++ = '\0';
112cac
	}
112cac
112cac
	/* optarg */
112cac
	if (optarg) {
112cac
		*parg++ = wch-diff;
112cac
		for (warg=optarg; *warg; warg++,wch++)
112cac
			*wch = *warg;
112cac
		*wch++ = '\0';
112cac
	}
112cac
112cac
	/* script / wargv[0] */
112cac
	if ((mark = script ? script : wargv[0])) {
dd89bb
		*parg++ = wch-diff;
112cac
		for (warg=mark; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
112cac
	/* wargv */
112cac
	for (++wargv; *wargv; wargv++) {
dd89bb
		*parg++=wch-diff;
dd89bb
		for (warg=*wargv; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
dd89bb
	*parg++ = 0;
dd89bb
112cac
	/* wenvp */
dd89bb
	for (; *wenvp; wenvp++) {
dd89bb
		*parg++=wch-diff;
dd89bb
		for (warg=*wenvp; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
dd89bb
	*parg++ = 0;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
int32_t __stdcall __ntapi_tt_array_convert_utf16_to_utf8(
dd89bb
	__in		wchar16_t **			warrv,
dd89bb
	__in		char **				arrv,
dd89bb
	__in		void *				base,
dd89bb
	__in		char *				buffer,
dd89bb
	__in		size_t				buffer_len,
dd89bb
	__out		size_t *			bytes_written)
dd89bb
{
dd89bb
	uint8_t *	ubound;
dd89bb
	uint8_t *	ch;
dd89bb
	wchar16_t *	wch;
dd89bb
	wchar16_t	wx;
dd89bb
	wchar16_t	wy;
dd89bb
	wchar16_t	wz;
dd89bb
	wchar16_t	wy_low;
dd89bb
	wchar16_t	wy_high;
dd89bb
	wchar16_t	ww;
dd89bb
	wchar16_t	uuuuu;
dd89bb
	wchar16_t	u_low;
dd89bb
	wchar16_t	u_high;
dd89bb
	ptrdiff_t	diff;
dd89bb
dd89bb
	ch	= (uint8_t *)buffer;
037eed
	ubound	= (uint8_t *)buffer + buffer_len - 5;
dd89bb
	diff	= (uintptr_t)base / sizeof(wchar16_t);
dd89bb
20aed3
	for (; warrv && *warrv; arrv++,warrv++) {
dd89bb
		*arrv	= (char *)(ch-(uintptr_t)base);
dd89bb
		wch	= *warrv + diff;
dd89bb
037eed
		/* ubound already accounts for null termination, see above */
20aed3
		for (; *wch && (ch
dd89bb
			if (*wch <= 0x7F) {
dd89bb
				/* from: 00000000 0xxxxxxx (little endian) */
dd89bb
				/* to:   0xxxxxxx          (utf-8)    */
dd89bb
				*ch = (char)(*wch);
dd89bb
			} else if (*wch <= 0x7FF) {
dd89bb
				/* from: 00000yyy yyxxxxxx (little endian) */
dd89bb
				/* to:   110yyyyy 10xxxxxx (utf-8)    */
dd89bb
				wy  = *wch;
dd89bb
				wy >>= 6;
dd89bb
dd89bb
				wx  = *wch;
dd89bb
				wx <<= 10;
dd89bb
				wx >>= 10;
dd89bb
dd89bb
				/* write the y part */
dd89bb
				*ch = (char)(0xC0 | wy);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the x part */
dd89bb
				*ch = (char)(0x80 | wx);
dd89bb
			} else if ((*wch < 0xD800) || (*wch >= 0xE000)) {
dd89bb
				/* from: zzzzyyyy yyxxxxxx (little endian)  */
dd89bb
				/* to:   1110zzzz 10yyyyyy 10xxxxxx (utf-8) */
dd89bb
				wz  = *wch;
dd89bb
				wz >>= 12;
dd89bb
dd89bb
				wy  = *wch;
dd89bb
				wy <<= 4;
dd89bb
				wy >>= 10;
dd89bb
dd89bb
				wx  = *wch;
dd89bb
				wx <<= 10;
dd89bb
				wx >>= 10;
dd89bb
dd89bb
				/* write the z part */
dd89bb
				*ch = (char)(0xE0 | wz);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the y part */
dd89bb
				*ch = (char)(0x80 | wy);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the x part */
dd89bb
				*ch = (char)(0x80 | wx);
834438
			} else if (wch[0] >= 0xDC00) {
834438
				return NT_STATUS_ILLEGAL_CHARACTER;
834438
			} else if (wch[1] < 0xDC00) {
834438
				return NT_STATUS_ILLEGAL_CHARACTER;
834438
			} else if (wch[1] >= 0xE000) {
834438
				return NT_STATUS_ILLEGAL_CHARACTER;
dd89bb
			} else {
dd89bb
				/* from: 110110ww  wwzzzzyy  110111yy  yyxxxxxx (little endian) */
dd89bb
				/* to:   11110uuu  10uuzzzz  10yyyyyy  10xxxxxx (utf-8)         */
dd89bb
dd89bb
				/* low two bytes */
cef942
				wy_high   = *wch;
dd89bb
				wy_high <<= 14;
dd89bb
				wy_high >>= 10;
dd89bb
cef942
				wz   = *wch;
dd89bb
				wz <<= 10;
dd89bb
				wz >>= 12;
dd89bb
cef942
				ww   = *wch;
dd89bb
				ww <<= 6;
dd89bb
				ww >>= 12;
dd89bb
cef942
				/* (surrogate pair) */
cef942
				wch++;
cef942
cef942
				/* high two bytes */
cef942
				wx   = *wch;
cef942
				wx <<= 10;
cef942
				wx >>= 10;
cef942
cef942
				wy_low   = *wch;
cef942
				wy_low <<= 6;
cef942
				wy_low >>= 12;
cef942
cef942
				/* uuuuu */
cef942
				uuuuu    = ww + 1;
cef942
				u_low    = uuuuu;
cef942
				u_low  >>= 2;
cef942
cef942
				u_high  = uuuuu;
cef942
				u_high <<= 14;
cef942
				u_high >>= 10;
dd89bb
dd89bb
				/* 1st byte: 11110uuu */
cef942
				*ch++ = (char)(0xF0 | u_low);
dd89bb
dd89bb
				/* 2nd byte: 10uuzzzz */
cef942
				*ch++ = (char)(0x80 | u_high | wz);
dd89bb
dd89bb
				/* 3rd byte: 10yyyyyy */
cef942
				*ch++ = (char)(0x80 | wy_low | wy_high);
dd89bb
dd89bb
				/* 4th byte: 10xxxxxx */
dd89bb
				*ch = (char)(0x80 | wx);
dd89bb
			}
dd89bb
dd89bb
			ch++;
dd89bb
			wch++;
dd89bb
		}
dd89bb
dd89bb
		if (*wch)
dd89bb
			return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
20aed3
		*ch++ = 0;
dd89bb
	}
dd89bb
1da907
	*arrv = 0;
dd89bb
	*bytes_written = (size_t)(ch - (uint8_t *)buffer);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}