/********************************************************/
/* ntapi: Native API core library */
/* Copyright (C) 2013--2021 SysDeer Technologies, LLC */
/* Released under GPLv2 and GPLv3; see COPYING.NTAPI. */
/********************************************************/
#include <psxtypes/psxtypes.h>
#include <pemagine/pemagine.h>
#include <ntapi/nt_argv.h>
#include <ntapi/ntapi.h>
#include "ntapi_impl.h"
typedef struct ___two_bytes {
unsigned char low;
unsigned char high;
} __two_bytes;
typedef struct ___three_bytes {
unsigned char low;
unsigned char middle;
unsigned char high;
} __three_bytes;
static void __utf8_to_utf16_handler_1byte_or_null_termination(wchar16_t * dst, const unsigned char * ch)
{
/***************************/
/* from: 0xxxxxxx */
/* to: 00000000 0xxxxxxx */
/***************************/
*dst = *ch;
}
static void __utf8_to_utf16_handler_2bytes(wchar16_t * dst, const unsigned char * ch)
{
/***************************/
/* from: 110yyyyy 10xxxxxx */
/* to: 00000yyy yyxxxxxx */
/***************************/
__two_bytes * src; /* big endian */
src = (__two_bytes *)ch;
/* yyyyy */
*dst = (src->low ^ 0xC0);
*dst <<= 6;
/* xxxxxx */
*dst |= (src->high ^ 0x80);
}
static void __utf8_to_utf16_handler_3bytes(wchar16_t * dst, const unsigned char * ch)
{
/************************************/
/* from: 1110zzzz 10yyyyyy 10xxxxxx */
/* to: zzzzyyyy yyxxxxxx */
/************************************/
__three_bytes * src; /* big endian */
wchar16_t yyyyy;
src = (__three_bytes *)ch;
/* zzzz */
*dst = (src->low ^ 0xE0);
*dst <<= 12;
/* yyyyy */
yyyyy = (src->middle ^ 0x80);
yyyyy <<= 6;
*dst |= yyyyy;
/* xxxxxx */
*dst |= (src->high ^ 0x80);
}
static void __utf8_to_utf16_handler_4bytes(wchar16_t * dst, const unsigned char * ch)
{
/*************************************************/
/* from: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx */
/* to: 110110ww wwzzzzyy 110111yy yyxxxxxx */
/*************************************************/
__two_bytes * src_low; /* big endian */
__two_bytes * src_high; /* big endian */
wchar16_t * dst_lead;
wchar16_t * dst_trail;
wchar16_t wwww;
wchar16_t lead;
wchar16_t trail;
unsigned char ulow;
unsigned char uhigh;
unsigned char yy;
unsigned char yyyy;
unsigned char zzzz;
dst_lead = dst_trail = (wchar16_t *)dst;
dst_trail++;
src_low = src_high = (__two_bytes *)ch;
src_high++;
/* uuuuu */
ulow = src_low->low ^ 0xF0;
uhigh = src_low->high ^ 0x80;
ulow <<= 2;
uhigh >>= 4;
/* wwww */
wwww = (ulow | uhigh) - 1;
wwww <<= 6;
/* 110110ww wwzzzzyy */
yy = src_high->low ^ 0x80;
yy >>= 4;
zzzz = src_low->high;
zzzz <<= 4;
zzzz >>= 2;
lead = 0xD800;
lead |= wwww;
lead |= zzzz;
lead |= yy;
/* 110111yy yyxxxxxx */
yyyy = src_high->low << 4;
trail = yyyy << 2;
trail |= src_high->high ^ 0x80;
trail |= 0xDC00;
/* write */
*dst_lead = lead;
*dst_trail = trail;
}
int32_t __stdcall __ntapi_tt_array_copy_utf8(
__out int * argc,
__in const char ** argv,
__in const char ** envp,
__in const char * interp,
__in const char * optarg,
__in const char * script,
__in void * base,
__out void * buffer,
__in size_t buflen,
__out size_t * blklen)
{
const char ** parg;
const char * arg;
const char * mark;
char * ch;
ptrdiff_t diff;
ptrdiff_t ptrs;
size_t needed;
const char * dummy[2] = {0,0};
/* fallback */
argv = argv ? argv : dummy;
envp = envp ? envp : dummy;
/* ptrs, needed */
ptrs = 0;
needed = 0;
/* interpr */
if (interp) {
ptrs++;
needed += sizeof(char *)
+ __ntapi->tt_string_null_offset_multibyte(interp)
+ sizeof(char);
}
/* optarg */
if (optarg) {
ptrs++;
needed += sizeof(char *)
+ __ntapi->tt_string_null_offset_multibyte(optarg)
+ sizeof(char);
}
/* script / argv[0] */
if ((mark = script ? script : argv[0])) {
ptrs++;
needed += sizeof(char *)
+ __ntapi->tt_string_null_offset_multibyte(mark)
+ sizeof(char);
}
/* argv */
for (parg=&argv[1]; *parg; parg++)
needed += sizeof(char *)
+ __ntapi->tt_string_null_offset_multibyte(*parg)
+ sizeof(char);
ptrs += (parg - &argv[1]);
*argc = (int)ptrs;
/* envp */
for (parg=envp; *parg; parg++)
needed += sizeof(char *)
+ __ntapi->tt_string_null_offset_multibyte(*parg)
+ sizeof(char);
ptrs += (parg - envp);
ptrs += 2;
needed += 2*sizeof(char *);
blklen = blklen ? blklen : &needed;
*blklen = needed;
if (buflen < needed)
return NT_STATUS_BUFFER_TOO_SMALL;
/* init */
parg = (const char **)buffer;
ch = (char *)(parg+ptrs);
diff = (ptrdiff_t)base;
/* interp */
if (interp) {
*parg++ = ch-diff;
for (arg=interp; *arg; arg++,ch++)
*ch = *arg;
*ch++ = '\0';
}
/* optarg */
if (optarg) {
*parg++ = ch-diff;
for (arg=optarg; *arg; arg++,ch++)
*ch = *arg;
*ch++ = '\0';
}
/* script / argv[0] */
if ((mark = script ? script : argv[0])) {
*parg++ = ch-diff;
for (arg=mark; *arg; arg++,ch++)
*ch = *arg;
*ch++ = '\0';
}
/* argv */
for (++argv; *argv; argv++) {
*parg++=ch-diff;
for (arg=*argv; *arg; arg++,ch++)
*ch = *arg;
*ch++ = '\0';
}
*parg++ = 0;
/* envp */
for (; *envp; envp++) {
*parg++=ch-diff;
for (arg=*envp; *arg; arg++,ch++)
*ch = *arg;
*ch++ = '\0';
}
*parg++ = 0;
return NT_STATUS_SUCCESS;
}
static void (*__utf8_to_utf16_handlers[5])(wchar16_t *, const unsigned char *) = {
0,
__utf8_to_utf16_handler_1byte_or_null_termination,
__utf8_to_utf16_handler_2bytes,
__utf8_to_utf16_handler_3bytes,
__utf8_to_utf16_handler_4bytes};
int32_t __stdcall __ntapi_tt_array_convert_utf8_to_utf16(
__in char ** arrv,
__out wchar16_t ** warrv,
__out void * base,
__out wchar16_t * buffer,
__in size_t buffer_len,
__out size_t * bytes_written)
{
wchar16_t * ubound;
wchar16_t * wch;
ptrdiff_t diff;
ptrdiff_t wdiff;
char * ch;
const uint8_t * utf8;
uint8_t byte_count;
if ((uintptr_t)base % sizeof(wchar16_t))
return NT_STATUS_INVALID_PARAMETER_3;
wch = buffer;
diff = (ptrdiff_t)base;
wdiff = (ptrdiff_t)base / sizeof(wchar16_t);
ubound = buffer;
ubound += buffer_len / sizeof(wchar16_t);
ubound--;
ubound--;
ubound--;
for (; arrv && *arrv && (wch<ubound); arrv++,warrv++) {
*warrv = wch - wdiff;
ch = *arrv + diff;
/* ubound already accounts for null termination, see above */
for (; *ch && (wch<ubound); ch++) {
utf8 = (const uint8_t *)ch;
byte_count = 0;
/* try one byte */
if (utf8[0] <= 0x7F)
byte_count = 1;
/* try two bytes */
else if ((++ch)
&& (utf8[0] >= 0xC2) && (utf8[0] <= 0xDF)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
byte_count = 2;
/* try three bytes */
else if ((++ch)
&& (utf8[0] == 0xE0)
&& (utf8[1] >= 0xA0) && (utf8[1] <= 0xBF)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
byte_count = 3;
else if (
(utf8[0] >= 0xE1) && (utf8[0] <= 0xEC)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
byte_count = 3;
else if (
(utf8[0] == 0xED)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0x9F)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
byte_count = 3;
else if (
(utf8[0] >= 0xEE) && (utf8[0] <= 0xEF)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
byte_count = 3;
/* try four bytes */
else if ((++ch)
&& (utf8[0] == 0xF0)
&& (utf8[1] >= 0x90) && (utf8[1] <= 0xBF)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
byte_count = 4;
else if (
(utf8[0] >= 0xF1) && (utf8[0] <= 0xF3)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
byte_count = 4;
else if (
(utf8[0] == 0xF4)
&& (utf8[1] >= 0x80) && (utf8[1] <= 0x8F)
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
byte_count = 4;
if (byte_count) {
__utf8_to_utf16_handlers[byte_count](wch,utf8);
wch = &wch[byte_count >> 2];
wch++;
} else {
return NT_STATUS_ILLEGAL_CHARACTER;
}
}
*wch++ = 0;
}
if (wch == ubound)
return NT_STATUS_BUFFER_TOO_SMALL;
*wch++ = 0;
*warrv = 0;
*bytes_written = sizeof(wchar16_t) * (wch - buffer);
return NT_STATUS_SUCCESS;
}