|
|
dd89bb |
/********************************************************/
|
|
|
dd89bb |
/* ntapi: Native API core library */
|
|
|
64e606 |
/* Copyright (C) 2013--2021 SysDeer Technologies, LLC */
|
|
|
dd89bb |
/* Released under GPLv2 and GPLv3; see COPYING.NTAPI. */
|
|
|
dd89bb |
/********************************************************/
|
|
|
dd89bb |
|
|
|
dd89bb |
#include <psxtypes/psxtypes.h>
|
|
|
dd89bb |
#include <pemagine/pemagine.h>
|
|
|
dd89bb |
#include <ntapi/nt_argv.h>
|
|
|
dd89bb |
#include <ntapi/ntapi.h>
|
|
|
dd89bb |
#include "ntapi_impl.h"
|
|
|
dd89bb |
|
|
|
4cd401 |
typedef struct ___two_bytes {
|
|
|
4cd401 |
unsigned char low;
|
|
|
4cd401 |
unsigned char high;
|
|
|
4cd401 |
} __two_bytes;
|
|
|
4cd401 |
|
|
|
4cd401 |
|
|
|
4cd401 |
typedef struct ___three_bytes {
|
|
|
4cd401 |
unsigned char low;
|
|
|
4cd401 |
unsigned char middle;
|
|
|
4cd401 |
unsigned char high;
|
|
|
4cd401 |
} __three_bytes;
|
|
|
4cd401 |
|
|
|
4cd401 |
static void __utf8_to_utf16_handler_1byte_or_null_termination(wchar16_t * dst, const unsigned char * ch)
|
|
|
4cd401 |
{
|
|
|
4cd401 |
/***************************/
|
|
|
4cd401 |
/* from: 0xxxxxxx */
|
|
|
4cd401 |
/* to: 00000000 0xxxxxxx */
|
|
|
4cd401 |
/***************************/
|
|
|
4cd401 |
|
|
|
4cd401 |
*dst = *ch;
|
|
|
4cd401 |
}
|
|
|
4cd401 |
|
|
|
4cd401 |
|
|
|
4cd401 |
static void __utf8_to_utf16_handler_2bytes(wchar16_t * dst, const unsigned char * ch)
|
|
|
4cd401 |
{
|
|
|
4cd401 |
/***************************/
|
|
|
4cd401 |
/* from: 110yyyyy 10xxxxxx */
|
|
|
4cd401 |
/* to: 00000yyy yyxxxxxx */
|
|
|
4cd401 |
/***************************/
|
|
|
4cd401 |
|
|
|
4cd401 |
__two_bytes * src; /* big endian */
|
|
|
4cd401 |
|
|
|
4cd401 |
src = (__two_bytes *)ch;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* yyyyy */
|
|
|
4cd401 |
*dst = (src->low ^ 0xC0);
|
|
|
4cd401 |
*dst <<= 6;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* xxxxxx */
|
|
|
4cd401 |
*dst |= (src->high ^ 0x80);
|
|
|
4cd401 |
}
|
|
|
4cd401 |
|
|
|
4cd401 |
|
|
|
4cd401 |
static void __utf8_to_utf16_handler_3bytes(wchar16_t * dst, const unsigned char * ch)
|
|
|
4cd401 |
{
|
|
|
4cd401 |
/************************************/
|
|
|
4cd401 |
/* from: 1110zzzz 10yyyyyy 10xxxxxx */
|
|
|
4cd401 |
/* to: zzzzyyyy yyxxxxxx */
|
|
|
4cd401 |
/************************************/
|
|
|
4cd401 |
|
|
|
4cd401 |
__three_bytes * src; /* big endian */
|
|
|
4cd401 |
wchar16_t yyyyy;
|
|
|
4cd401 |
|
|
|
4cd401 |
src = (__three_bytes *)ch;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* zzzz */
|
|
|
4cd401 |
*dst = (src->low ^ 0xE0);
|
|
|
4cd401 |
*dst <<= 12;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* yyyyy */
|
|
|
4cd401 |
yyyyy = (src->middle ^ 0x80);
|
|
|
4cd401 |
yyyyy <<= 6;
|
|
|
4cd401 |
*dst |= yyyyy;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* xxxxxx */
|
|
|
4cd401 |
*dst |= (src->high ^ 0x80);
|
|
|
4cd401 |
}
|
|
|
4cd401 |
|
|
|
4cd401 |
|
|
|
4cd401 |
static void __utf8_to_utf16_handler_4bytes(wchar16_t * dst, const unsigned char * ch)
|
|
|
4cd401 |
{
|
|
|
4cd401 |
/*************************************************/
|
|
|
4cd401 |
/* from: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx */
|
|
|
4cd401 |
/* to: 110110ww wwzzzzyy 110111yy yyxxxxxx */
|
|
|
4cd401 |
/*************************************************/
|
|
|
4cd401 |
|
|
|
4cd401 |
__two_bytes * src_low; /* big endian */
|
|
|
4cd401 |
__two_bytes * src_high; /* big endian */
|
|
|
4cd401 |
wchar16_t * dst_lead;
|
|
|
4cd401 |
wchar16_t * dst_trail;
|
|
|
4cd401 |
|
|
|
4cd401 |
wchar16_t wwww;
|
|
|
4cd401 |
wchar16_t lead;
|
|
|
4cd401 |
wchar16_t trail;
|
|
|
4cd401 |
unsigned char ulow;
|
|
|
4cd401 |
unsigned char uhigh;
|
|
|
4cd401 |
unsigned char yy;
|
|
|
4cd401 |
unsigned char yyyy;
|
|
|
4cd401 |
unsigned char zzzz;
|
|
|
4cd401 |
|
|
|
4cd401 |
dst_lead = dst_trail = (wchar16_t *)dst;
|
|
|
4cd401 |
dst_trail++;
|
|
|
4cd401 |
|
|
|
4cd401 |
src_low = src_high = (__two_bytes *)ch;
|
|
|
4cd401 |
src_high++;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* uuuuu */
|
|
|
4cd401 |
ulow = src_low->low ^ 0xF0;
|
|
|
4cd401 |
uhigh = src_low->high ^ 0x80;
|
|
|
4cd401 |
|
|
|
4cd401 |
ulow <<= 2;
|
|
|
4cd401 |
uhigh >>= 4;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* wwww */
|
|
|
4cd401 |
wwww = (ulow | uhigh) - 1;
|
|
|
4cd401 |
wwww <<= 6;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* 110110ww wwzzzzyy */
|
|
|
4cd401 |
yy = src_high->low ^ 0x80;
|
|
|
4cd401 |
yy >>= 4;
|
|
|
4cd401 |
|
|
|
4cd401 |
zzzz = src_low->high;
|
|
|
4cd401 |
zzzz <<= 4;
|
|
|
4cd401 |
zzzz >>= 2;
|
|
|
4cd401 |
|
|
|
4cd401 |
lead = 0xD800;
|
|
|
4cd401 |
lead |= wwww;
|
|
|
4cd401 |
lead |= zzzz;
|
|
|
4cd401 |
lead |= yy;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* 110111yy yyxxxxxx */
|
|
|
4cd401 |
yyyy = src_high->low << 4;
|
|
|
4cd401 |
trail = yyyy << 2;
|
|
|
4cd401 |
trail |= src_high->high ^ 0x80;
|
|
|
4cd401 |
trail |= 0xDC00;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* write */
|
|
|
4cd401 |
*dst_lead = lead;
|
|
|
4cd401 |
*dst_trail = trail;
|
|
|
4cd401 |
}
|
|
|
4cd401 |
|
|
|
dd89bb |
int32_t __stdcall __ntapi_tt_array_copy_utf8(
|
|
|
dd89bb |
__out int * argc,
|
|
|
dd89bb |
__in const char ** argv,
|
|
|
dd89bb |
__in const char ** envp,
|
|
|
15812f |
__in const char * interp,
|
|
|
15812f |
__in const char * optarg,
|
|
|
15812f |
__in const char * script,
|
|
|
dd89bb |
__in void * base,
|
|
|
dd89bb |
__out void * buffer,
|
|
|
dd89bb |
__in size_t buflen,
|
|
|
dd89bb |
__out size_t * blklen)
|
|
|
dd89bb |
{
|
|
|
dd89bb |
const char ** parg;
|
|
|
dd89bb |
const char * arg;
|
|
|
15812f |
const char * mark;
|
|
|
dd89bb |
char * ch;
|
|
|
dd89bb |
ptrdiff_t diff;
|
|
|
dd89bb |
ptrdiff_t ptrs;
|
|
|
dd89bb |
size_t needed;
|
|
|
15812f |
const char * dummy[2] = {0,0};
|
|
|
c713d8 |
|
|
|
dd89bb |
/* fallback */
|
|
|
15812f |
argv = argv ? argv : dummy;
|
|
|
15812f |
envp = envp ? envp : dummy;
|
|
|
dd89bb |
|
|
|
dd89bb |
/* ptrs, needed */
|
|
|
dd89bb |
ptrs = 0;
|
|
|
dd89bb |
needed = 0;
|
|
|
dd89bb |
|
|
|
15812f |
/* interpr */
|
|
|
15812f |
if (interp) {
|
|
|
15812f |
ptrs++;
|
|
|
15812f |
needed += sizeof(char *)
|
|
|
15812f |
+ __ntapi->tt_string_null_offset_multibyte(interp)
|
|
|
15812f |
+ sizeof(char);
|
|
|
15812f |
}
|
|
|
15812f |
|
|
|
15812f |
/* optarg */
|
|
|
15812f |
if (optarg) {
|
|
|
dd89bb |
ptrs++;
|
|
|
dd89bb |
needed += sizeof(char *)
|
|
|
15812f |
+ __ntapi->tt_string_null_offset_multibyte(optarg)
|
|
|
dd89bb |
+ sizeof(char);
|
|
|
dd89bb |
}
|
|
|
dd89bb |
|
|
|
15812f |
/* script / argv[0] */
|
|
|
15812f |
if ((mark = script ? script : argv[0])) {
|
|
|
15812f |
ptrs++;
|
|
|
15812f |
needed += sizeof(char *)
|
|
|
15812f |
+ __ntapi->tt_string_null_offset_multibyte(mark)
|
|
|
15812f |
+ sizeof(char);
|
|
|
15812f |
}
|
|
|
15812f |
|
|
|
15812f |
/* argv */
|
|
|
15812f |
for (parg=&argv[1]; *parg; parg++)
|
|
|
dd89bb |
needed += sizeof(char *)
|
|
|
dd89bb |
+ __ntapi->tt_string_null_offset_multibyte(*parg)
|
|
|
dd89bb |
+ sizeof(char);
|
|
|
dd89bb |
|
|
|
15812f |
ptrs += (parg - &argv[1]);
|
|
|
dd89bb |
*argc = (int)ptrs;
|
|
|
dd89bb |
|
|
|
15812f |
/* envp */
|
|
|
dd89bb |
for (parg=envp; *parg; parg++)
|
|
|
dd89bb |
needed += sizeof(char *)
|
|
|
dd89bb |
+ __ntapi->tt_string_null_offset_multibyte(*parg)
|
|
|
dd89bb |
+ sizeof(char);
|
|
|
dd89bb |
|
|
|
dd89bb |
ptrs += (parg - envp);
|
|
|
dd89bb |
|
|
|
15812f |
ptrs += 2;
|
|
|
15812f |
needed += 2*sizeof(char *);
|
|
|
dd89bb |
blklen = blklen ? blklen : &needed;
|
|
|
dd89bb |
*blklen = needed;
|
|
|
dd89bb |
|
|
|
dd89bb |
if (buflen < needed)
|
|
|
dd89bb |
return NT_STATUS_BUFFER_TOO_SMALL;
|
|
|
dd89bb |
|
|
|
dd89bb |
/* init */
|
|
|
dd89bb |
parg = (const char **)buffer;
|
|
|
15812f |
ch = (char *)(parg+ptrs);
|
|
|
dd89bb |
diff = (ptrdiff_t)base;
|
|
|
dd89bb |
|
|
|
15812f |
/* interp */
|
|
|
15812f |
if (interp) {
|
|
|
15812f |
*parg++ = ch-diff;
|
|
|
15812f |
for (arg=interp; *arg; arg++,ch++)
|
|
|
15812f |
*ch = *arg;
|
|
|
15812f |
*ch++ = '\0';
|
|
|
15812f |
}
|
|
|
15812f |
|
|
|
15812f |
/* optarg */
|
|
|
15812f |
if (optarg) {
|
|
|
15812f |
*parg++ = ch-diff;
|
|
|
15812f |
for (arg=optarg; *arg; arg++,ch++)
|
|
|
15812f |
*ch = *arg;
|
|
|
15812f |
*ch++ = '\0';
|
|
|
15812f |
}
|
|
|
15812f |
|
|
|
15812f |
/* script / argv[0] */
|
|
|
15812f |
if ((mark = script ? script : argv[0])) {
|
|
|
dd89bb |
*parg++ = ch-diff;
|
|
|
15812f |
for (arg=mark; *arg; arg++,ch++)
|
|
|
dd89bb |
*ch = *arg;
|
|
|
dd89bb |
*ch++ = '\0';
|
|
|
dd89bb |
}
|
|
|
dd89bb |
|
|
|
dd89bb |
/* argv */
|
|
|
15812f |
for (++argv; *argv; argv++) {
|
|
|
dd89bb |
*parg++=ch-diff;
|
|
|
dd89bb |
for (arg=*argv; *arg; arg++,ch++)
|
|
|
dd89bb |
*ch = *arg;
|
|
|
dd89bb |
*ch++ = '\0';
|
|
|
dd89bb |
}
|
|
|
dd89bb |
|
|
|
dd89bb |
*parg++ = 0;
|
|
|
dd89bb |
|
|
|
dd89bb |
/* envp */
|
|
|
dd89bb |
for (; *envp; envp++) {
|
|
|
dd89bb |
*parg++=ch-diff;
|
|
|
dd89bb |
for (arg=*envp; *arg; arg++,ch++)
|
|
|
dd89bb |
*ch = *arg;
|
|
|
dd89bb |
*ch++ = '\0';
|
|
|
dd89bb |
}
|
|
|
dd89bb |
|
|
|
dd89bb |
*parg++ = 0;
|
|
|
dd89bb |
|
|
|
dd89bb |
return NT_STATUS_SUCCESS;
|
|
|
dd89bb |
}
|
|
|
dd89bb |
|
|
|
4cd401 |
static void (*__utf8_to_utf16_handlers[5])(wchar16_t *, const unsigned char *) = {
|
|
|
4cd401 |
0,
|
|
|
4cd401 |
__utf8_to_utf16_handler_1byte_or_null_termination,
|
|
|
4cd401 |
__utf8_to_utf16_handler_2bytes,
|
|
|
4cd401 |
__utf8_to_utf16_handler_3bytes,
|
|
|
4cd401 |
__utf8_to_utf16_handler_4bytes};
|
|
|
4cd401 |
|
|
|
dd89bb |
int32_t __stdcall __ntapi_tt_array_convert_utf8_to_utf16(
|
|
|
dd89bb |
__in char ** arrv,
|
|
|
4cd401 |
__out wchar16_t ** warrv,
|
|
|
4cd401 |
__out void * base,
|
|
|
4cd401 |
__out wchar16_t * buffer,
|
|
|
dd89bb |
__in size_t buffer_len,
|
|
|
dd89bb |
__out size_t * bytes_written)
|
|
|
dd89bb |
{
|
|
|
4cd401 |
wchar16_t * ubound;
|
|
|
4cd401 |
wchar16_t * wch;
|
|
|
4cd401 |
ptrdiff_t diff;
|
|
|
4cd401 |
ptrdiff_t wdiff;
|
|
|
4cd401 |
char * ch;
|
|
|
4cd401 |
const uint8_t * utf8;
|
|
|
4cd401 |
uint8_t byte_count;
|
|
|
4cd401 |
|
|
|
4cd401 |
if ((uintptr_t)base % sizeof(wchar16_t))
|
|
|
4cd401 |
return NT_STATUS_INVALID_PARAMETER_3;
|
|
|
4cd401 |
|
|
|
4cd401 |
wch = buffer;
|
|
|
4cd401 |
diff = (ptrdiff_t)base;
|
|
|
4cd401 |
wdiff = (ptrdiff_t)base / sizeof(wchar16_t);
|
|
|
4cd401 |
|
|
|
4cd401 |
ubound = buffer;
|
|
|
4cd401 |
ubound += buffer_len / sizeof(wchar16_t);
|
|
|
4cd401 |
ubound--;
|
|
|
4cd401 |
ubound--;
|
|
|
4cd401 |
ubound--;
|
|
|
4cd401 |
|
|
|
3f4654 |
for (; arrv && *arrv && (wch
|
|
|
4cd401 |
*warrv = wch - wdiff;
|
|
|
4cd401 |
ch = *arrv + diff;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* ubound already accounts for null termination, see above */
|
|
|
4cd401 |
for (; *ch && (wch
|
|
|
4cd401 |
utf8 = (const uint8_t *)ch;
|
|
|
4cd401 |
byte_count = 0;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* try one byte */
|
|
|
4cd401 |
if (utf8[0] <= 0x7F)
|
|
|
4cd401 |
byte_count = 1;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* try two bytes */
|
|
|
4cd401 |
else if ((++ch)
|
|
|
4cd401 |
&& (utf8[0] >= 0xC2) && (utf8[0] <= 0xDF)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF))
|
|
|
4cd401 |
byte_count = 2;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* try three bytes */
|
|
|
4cd401 |
else if ((++ch)
|
|
|
4cd401 |
&& (utf8[0] == 0xE0)
|
|
|
4cd401 |
&& (utf8[1] >= 0xA0) && (utf8[1] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
|
|
|
4cd401 |
byte_count = 3;
|
|
|
4cd401 |
|
|
|
4cd401 |
else if (
|
|
|
4cd401 |
(utf8[0] >= 0xE1) && (utf8[0] <= 0xEC)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
|
|
|
4cd401 |
byte_count = 3;
|
|
|
4cd401 |
|
|
|
4cd401 |
else if (
|
|
|
4cd401 |
(utf8[0] == 0xED)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0x9F)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
|
|
|
4cd401 |
byte_count = 3;
|
|
|
4cd401 |
|
|
|
4cd401 |
else if (
|
|
|
4cd401 |
(utf8[0] >= 0xEE) && (utf8[0] <= 0xEF)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF))
|
|
|
4cd401 |
byte_count = 3;
|
|
|
4cd401 |
|
|
|
4cd401 |
/* try four bytes */
|
|
|
4cd401 |
else if ((++ch)
|
|
|
4cd401 |
&& (utf8[0] == 0xF0)
|
|
|
4cd401 |
&& (utf8[1] >= 0x90) && (utf8[1] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
|
|
|
4cd401 |
byte_count = 4;
|
|
|
4cd401 |
|
|
|
4cd401 |
else if (
|
|
|
4cd401 |
(utf8[0] >= 0xF1) && (utf8[0] <= 0xF3)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
|
|
|
4cd401 |
byte_count = 4;
|
|
|
4cd401 |
|
|
|
4cd401 |
else if (
|
|
|
4cd401 |
(utf8[0] == 0xF4)
|
|
|
4cd401 |
&& (utf8[1] >= 0x80) && (utf8[1] <= 0x8F)
|
|
|
4cd401 |
&& (utf8[2] >= 0x80) && (utf8[2] <= 0xBF)
|
|
|
4cd401 |
&& (utf8[3] >= 0x80) && (utf8[3] <= 0xBF))
|
|
|
4cd401 |
byte_count = 4;
|
|
|
4cd401 |
|
|
|
4cd401 |
if (byte_count) {
|
|
|
4cd401 |
__utf8_to_utf16_handlers[byte_count](wch,utf8);
|
|
|
0293b8 |
wch = &wch[byte_count >> 2];
|
|
|
4cd401 |
wch++;
|
|
|
4cd401 |
} else {
|
|
|
4cd401 |
return NT_STATUS_ILLEGAL_CHARACTER;
|
|
|
4cd401 |
}
|
|
|
4cd401 |
}
|
|
|
4cd401 |
|
|
|
4cd401 |
*wch++ = 0;
|
|
|
4cd401 |
}
|
|
|
1cf951 |
|
|
|
3f4654 |
if (wch == ubound)
|
|
|
3f4654 |
return NT_STATUS_BUFFER_TOO_SMALL;
|
|
|
3f4654 |
|
|
|
8ce81d |
*wch++ = 0;
|
|
|
4cd401 |
*warrv = 0;
|
|
|
4cd401 |
*bytes_written = sizeof(wchar16_t) * (wch - buffer);
|
|
|
c713d8 |
|
|
|
dd89bb |
return NT_STATUS_SUCCESS;
|
|
|
dd89bb |
}
|