Blame src/argv/ntapi_tt_array_utf16.c

dd89bb
/********************************************************/
dd89bb
/*  ntapi: Native API core library                      */
4256e2
/*  Copyright (C) 2013--2016  Z. Gilboa                 */
dd89bb
/*  Released under GPLv2 and GPLv3; see COPYING.NTAPI.  */
dd89bb
/********************************************************/
dd89bb
dd89bb
#include <psxtypes/psxtypes.h>
dd89bb
#include <pemagine/pemagine.h>
dd89bb
#include <ntapi/nt_argv.h>
dd89bb
#include <ntapi/ntapi.h>
dd89bb
#include "ntapi_impl.h"
dd89bb
dd89bb
/**
dd89bb
 * scenario: program -e app [arg1 arg2 ... argn]
dd89bb
 * input:    a utf-16 argument vector
dd89bb
 * output:   a utf-16 cmd_line string
dd89bb
 * example:  tty_pipe_create_child_process
dd89bb
**/
dd89bb
dd89bb
int32_t __stdcall __ntapi_tt_array_copy_utf16(
dd89bb
	__out	int *			argc,
dd89bb
	__in	const wchar16_t **	wargv,
dd89bb
	__in	const wchar16_t **	wenvp,
dd89bb
	__in	const wchar16_t *	image_name	__optional,
dd89bb
	__in	const wchar16_t *	interpreter	__optional,
dd89bb
	__in	const wchar16_t *	optarg		__optional,
dd89bb
	__in	void *			base,
dd89bb
	__out	void *			buffer,
dd89bb
	__in	size_t			buflen,
dd89bb
	__out	size_t *		blklen)
dd89bb
{
dd89bb
	const wchar16_t **	parg;
dd89bb
	const wchar16_t *	warg;
dd89bb
	const wchar16_t *	dummy;
dd89bb
	wchar16_t *		wch;
dd89bb
	ptrdiff_t		diff;
dd89bb
	ptrdiff_t		ptrs;
dd89bb
	size_t			needed;
dd89bb
dd89bb
	/* fallback */
dd89bb
	dummy = 0;
dd89bb
	wargv = wargv ? wargv : &dummy;
dd89bb
	wenvp = wenvp ? wenvp : &dummy;
dd89bb
dd89bb
	/* ptrs, needed */
dd89bb
	ptrs   = 0;
dd89bb
	needed = 0;
dd89bb
dd89bb
	if (image_name) {
dd89bb
		ptrs++;
dd89bb
		needed += sizeof(wchar16_t *)
dd89bb
			+ __ntapi->tt_string_null_offset_short((const int16_t *)image_name)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
	}
dd89bb
dd89bb
	for (parg=wargv; *parg; parg++)
dd89bb
		needed += sizeof(wchar16_t *)
dd89bb
			+ __ntapi->tt_string_null_offset_short((const int16_t *)*parg)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
dd89bb
	ptrs += (parg - wargv);
dd89bb
	*argc = (int)ptrs;
dd89bb
dd89bb
	for (parg=wenvp; *parg; parg++)
dd89bb
		needed += sizeof(wchar16_t *)
dd89bb
			+ __ntapi->tt_string_null_offset_short((const int16_t *)*parg)
dd89bb
			+ sizeof(wchar16_t);
dd89bb
dd89bb
	ptrs += (parg - wenvp);
dd89bb
dd89bb
	ptrs    += 2;
dd89bb
	needed  += 2*sizeof(wchar16_t *);
dd89bb
	blklen  = blklen ? blklen : &needed;
dd89bb
	*blklen = needed;
dd89bb
dd89bb
	if (buflen < needed)
dd89bb
		return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
	/* init */
dd89bb
	parg = (const wchar16_t **)buffer;
dd89bb
	wch  = (wchar16_t *)(parg+ptrs);
dd89bb
	diff = (uintptr_t)base / sizeof(wchar16_t);
dd89bb
dd89bb
	/* image_name */
dd89bb
	if (image_name) {
dd89bb
		*parg++ = wch-diff;
dd89bb
		for (warg=image_name; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
dd89bb
	/* argv */
dd89bb
	for (; *wargv; wargv++) {
dd89bb
		*parg++=wch-diff;
dd89bb
		for (warg=*wargv; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
dd89bb
	*parg++ = 0;
dd89bb
dd89bb
	/* envp */
dd89bb
	for (; *wenvp; wenvp++) {
dd89bb
		*parg++=wch-diff;
dd89bb
		for (warg=*wenvp; *warg; warg++,wch++)
dd89bb
			*wch = *warg;
dd89bb
		*wch++ = '\0';
dd89bb
	}
dd89bb
dd89bb
	*parg++ = 0;
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}
dd89bb
dd89bb
int32_t __stdcall __ntapi_tt_array_convert_utf16_to_utf8(
dd89bb
	__in		wchar16_t **			warrv,
dd89bb
	__in		char **				arrv,
dd89bb
	__in		void *				base,
dd89bb
	__in		char *				buffer,
dd89bb
	__in		size_t				buffer_len,
dd89bb
	__out		size_t *			bytes_written)
dd89bb
{
dd89bb
	uint8_t *	ubound;
dd89bb
	uint8_t *	ch;
dd89bb
	wchar16_t *	wch;
dd89bb
	wchar16_t	wx;
dd89bb
	wchar16_t	wy;
dd89bb
	wchar16_t	wz;
dd89bb
	wchar16_t	wy_low;
dd89bb
	wchar16_t	wy_high;
dd89bb
	wchar16_t	ww;
dd89bb
	wchar16_t	uuuuu;
dd89bb
	wchar16_t	u_low;
dd89bb
	wchar16_t	u_high;
dd89bb
	ptrdiff_t	diff;
dd89bb
dd89bb
	#define __UTF8_MAX_CODE_POINT_BYTES	(4)
dd89bb
dd89bb
	ch	= (uint8_t *)buffer;
dd89bb
	ubound	= (uint8_t *)buffer + buffer_len - __UTF8_MAX_CODE_POINT_BYTES;
dd89bb
	diff	= (uintptr_t)base / sizeof(wchar16_t);
dd89bb
dd89bb
	while (warrv && *warrv) {
dd89bb
		*arrv	= (char *)(ch-(uintptr_t)base);
dd89bb
		wch	= *warrv + diff;
dd89bb
dd89bb
		/* all utf-16 streams at stake have been validated */
dd89bb
		while (*wch && (ch < ubound)) {
dd89bb
			if (*wch <= 0x7F) {
dd89bb
				/* from: 00000000 0xxxxxxx (little endian) */
dd89bb
				/* to:   0xxxxxxx          (utf-8)    */
dd89bb
				*ch = (char)(*wch);
dd89bb
			} else if (*wch <= 0x7FF) {
dd89bb
				/* from: 00000yyy yyxxxxxx (little endian) */
dd89bb
				/* to:   110yyyyy 10xxxxxx (utf-8)    */
dd89bb
				wy  = *wch;
dd89bb
				wy >>= 6;
dd89bb
dd89bb
				wx  = *wch;
dd89bb
				wx <<= 10;
dd89bb
				wx >>= 10;
dd89bb
dd89bb
				/* write the y part */
dd89bb
				*ch = (char)(0xC0 | wy);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the x part */
dd89bb
				*ch = (char)(0x80 | wx);
dd89bb
			} else if ((*wch < 0xD800) || (*wch >= 0xE000)) {
dd89bb
				/* from: zzzzyyyy yyxxxxxx (little endian)  */
dd89bb
				/* to:   1110zzzz 10yyyyyy 10xxxxxx (utf-8) */
dd89bb
				wz  = *wch;
dd89bb
				wz >>= 12;
dd89bb
dd89bb
				wy  = *wch;
dd89bb
				wy <<= 4;
dd89bb
				wy >>= 10;
dd89bb
dd89bb
				wx  = *wch;
dd89bb
				wx <<= 10;
dd89bb
				wx >>= 10;
dd89bb
dd89bb
				/* write the z part */
dd89bb
				*ch = (char)(0xE0 | wz);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the y part */
dd89bb
				*ch = (char)(0x80 | wy);
dd89bb
				ch++;
dd89bb
dd89bb
				/* write the x part */
dd89bb
				*ch = (char)(0x80 | wx);
dd89bb
			} else {
dd89bb
				/* from: 110110ww  wwzzzzyy  110111yy  yyxxxxxx (little endian) */
dd89bb
				/* to:   11110uuu  10uuzzzz  10yyyyyy  10xxxxxx (utf-8)         */
dd89bb
dd89bb
				/* low two bytes */
dd89bb
				wx  = *wch;
dd89bb
				wx <<= 10;
dd89bb
				wx >>= 10;
dd89bb
dd89bb
				wy_low  = *wch;
dd89bb
				wy_low <<= 6;
dd89bb
				wy_low >>= 12;
dd89bb
dd89bb
				/* (surrogate pair) */
dd89bb
				wch++;
dd89bb
dd89bb
				/* high two bytes */
dd89bb
				wy_high  = *wch;
dd89bb
				wy_high <<= 14;
dd89bb
				wy_high >>= 10;
dd89bb
dd89bb
				wz  = *wch;
dd89bb
				wz <<= 10;
dd89bb
				wz >>= 12;
dd89bb
				wz <<= 2;
dd89bb
dd89bb
				ww  = *wch;
dd89bb
				ww <<= 6;
dd89bb
				ww >>= 12;
dd89bb
dd89bb
				uuuuu  = ww + 1;
dd89bb
				u_high  = uuuuu >> 2;
dd89bb
				u_low = ((uuuuu << 14) >> 10);
dd89bb
dd89bb
				/* 1st byte: 11110uuu */
dd89bb
				*ch = (char)(0xF0 | u_high);
dd89bb
				ch++;
dd89bb
dd89bb
				/* 2nd byte: 10uuzzzz */
dd89bb
				*ch = (char)(0x80 | u_low | wz);
dd89bb
				ch++;
dd89bb
dd89bb
				/* 3rd byte: 10yyyyyy */
dd89bb
				*ch = (char)(0x80 | wy_low | wy_high);
dd89bb
				ch++;
dd89bb
dd89bb
				/* 4th byte: 10xxxxxx */
dd89bb
				*ch = (char)(0x80 | wx);
dd89bb
			}
dd89bb
dd89bb
			ch++;
dd89bb
			wch++;
dd89bb
		}
dd89bb
dd89bb
		if (*wch)
dd89bb
			return NT_STATUS_BUFFER_TOO_SMALL;
dd89bb
dd89bb
		ch++;
dd89bb
		arrv++;
dd89bb
		warrv++;
dd89bb
	}
dd89bb
dd89bb
	*bytes_written = (size_t)(ch - (uint8_t *)buffer);
dd89bb
dd89bb
	return NT_STATUS_SUCCESS;
dd89bb
}