Blame src/multibyte/utf8bench.c

nsz 6cc52f
#include <stdio.h>
nsz 6cc52f
#include <stdlib.h>
nsz 6cc52f
#include <string.h>
nsz 6cc52f
#include <wchar.h>
nsz 6cc52f
#include <locale.h>
nsz 6cc52f
#include <langinfo.h>
nsz 6cc52f
nsz 6cc52f
#define LEN 500000
nsz 6cc52f
nsz 6cc52f
static char *initbuf() {
nsz 6cc52f
	char *buf;
nsz 6cc52f
	int i, j, k, l;
nsz 6cc52f
nsz 6cc52f
	setlocale(LC_CTYPE, "C.UTF-8")
nsz 6cc52f
	|| setlocale(LC_CTYPE, "en_US.UTF-8")
nsz 6cc52f
	|| setlocale(LC_CTYPE, "en_GB.UTF-8")
nsz 6cc52f
	|| setlocale(LC_CTYPE, "en.UTF-8")
nsz 6cc52f
	|| setlocale(LC_CTYPE, "de_DE-8")
nsz 6cc52f
	|| setlocale(LC_CTYPE, "fr_FR-8");
nsz 6cc52f
	if (strcmp(nl_langinfo(CODESET), "UTF-8")) exit(1);
nsz 6cc52f
nsz 6cc52f
	buf = malloc(LEN);
nsz 6cc52f
	l = 0;
nsz 6cc52f
	for (i=0xc3; i<0xe0; i++)
nsz 6cc52f
		for (j=0x80; j<0xc0; j++)
nsz 6cc52f
			buf[l++] = i, buf[l++] = j;
nsz 6cc52f
	for (i=0xe1; i<0xed; i++)
nsz 6cc52f
		for (j=0x80; j<0xc0; j++)
nsz 6cc52f
			for (k=0x80; k<0xc0; k++)
nsz 6cc52f
				buf[l++] = i, buf[l++] = j, buf[l++] = k;
nsz 6cc52f
	for (i=0xf1; i<0xf4; i++)
nsz 6cc52f
		for (j=0x80; j<0xc0; j++)
nsz 6cc52f
			for (k=0x80; k<0xc0; k++)
nsz 6cc52f
				buf[l++] = i, buf[l++] = j, buf[l++] = 0x80, buf[l++] = k;
nsz 6cc52f
	buf[l++] = 0;
nsz 6cc52f
	return buf;
nsz 6cc52f
}
nsz 6cc52f
nsz 6cc52f
void bench_utf8_bigbuf(int N) {
nsz 6cc52f
	char *buf;
nsz 6cc52f
	wchar_t *wbuf;
nsz 6cc52f
	int i;
nsz 6cc52f
	int cs;
nsz 6cc52f
nsz 6cc52f
	buf = initbuf();
nsz 6cc52f
	wbuf = malloc(LEN*sizeof(wchar_t));
nsz 6cc52f
	for (i=0; i
nsz 6cc52f
		cs ^= mbstowcs(wbuf, buf, LEN);
nsz 6cc52f
	buf[0] = cs;
nsz 6cc52f
	free(wbuf);
nsz 6cc52f
	free(buf);
nsz 6cc52f
}
nsz 6cc52f
nsz 6cc52f
void bench_utf8_onebyone(int N) {
nsz 6cc52f
	char *buf;
nsz 6cc52f
	wchar_t wc;
nsz 6cc52f
	int i, j;
nsz 6cc52f
	mbstate_t st = {0};
nsz 6cc52f
nsz 6cc52f
	buf = initbuf();
nsz 6cc52f
	for (i=0; i
nsz 6cc52f
		for (j=0; buf[j]; j+=mbrtowc(&wc, buf+j, 4, &st);;
nsz 6cc52f
	}
nsz 6cc52f
	free(buf);
nsz 6cc52f
}