Blame src/regex/tbnf_regex.c

c25718
/**************************************************************/
c25718
/*  treebnf: a tree oriented bnf library                      */
c25718
/*  Copyright (C) 2024  SysDeer Technologies, LLC             */
c25718
/*  Released under GPLv2 and GPLv3; see COPYING.TREEBNF.      */
c25718
/**************************************************************/
c25718
c25718
#include <treebnf/treebnf.h>
c25718
c25718
#include "treebnf_regex_impl.h"
c25718
#include "treebnf_visibility_impl.h"
c25718
c25718
#include "tbnf_regex_defs.h"
c25718
#include "tbnf_regex_scanfns.h"
c25718
c25718
#define TBNF_STATE_STACK_SIZE   (512)
c25718
c25718
/* init state scan table*/
c25718
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__init[TBNF_REGEX_TOK_CAP] = {
c25718
	/* --> brace */
c25718
	[TBNF_REGEX_TOK_LBRACE] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lbrace,
c25718
		.tok_state_op   = TBNF_STATE_PUSH,
c25718
		.tok_state_next = TBNF_REGEX_STATE_BRACE,
c25718
	},
c25718
c25718
c25718
	/* --> bracket */
c25718
	[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX_RBRACKET] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lbracket_circumflex_rbracket,
c25718
		.tok_state_op   = TBNF_STATE_PUSH,
c25718
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lbracket_circumflex,
c25718
		.tok_state_op   = TBNF_STATE_PUSH,
c25718
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_LBRACKET_RBRACKET] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lbracket_rbracket,
c25718
		.tok_state_op   = TBNF_STATE_PUSH,
c25718
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_LBRACKET] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lbracket,
c25718
		.tok_state_op   = TBNF_STATE_PUSH,
c25718
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
c25718
	},
c25718
c25718
c25718
	/* (expression) */
c25718
	[TBNF_REGEX_TOK_ESCAPED_CHAR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_escaped_char,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_CIRCUMFLEX_ASTERISK] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_circumflex_asterisk,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_CIRCUMFLEX] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_circumflex,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_LPAREN] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_lparen,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_RPAREN] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_rparen,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_ASTERISK] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_asterisk,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_PERIOD] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_period,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_DOLLAR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_dollar,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_VLINE] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_vline,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_QMARK] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_qmark,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_PLUS] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_plus,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_CHAR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_char,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
};
c25718
c25718
c25718
/* brace state scan table*/
c25718
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__brace[TBNF_REGEX_TOK_CAP] = {
c25718
	[TBNF_REGEX_TOK_BRACE_RBRACE] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_brace_rbrace,
c25718
		.tok_state_op   = TBNF_STATE_POP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACE_DIGIT] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_brace_digit,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACE_COMMA] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_brace_comma,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
};
c25718
c25718
c25718
/* bracket state scan table*/
c25718
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__bracket[TBNF_REGEX_TOK_CAP] = {
c25718
	[TBNF_REGEX_TOK_BRACKET_RBRACKET] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_rbracket,
c25718
		.tok_state_op   = TBNF_STATE_POP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_ESCAPED_CHAR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_escaped_char,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_CHARACTER_CLASS] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_character_class,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_COLLATION_SYMBOL] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_collation_symbol,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_EQUIVALENCE_CLASS] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_equivalence_class,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_HYPHEN] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_hyphen,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_ERROR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_bracket_error,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
c25718
	[TBNF_REGEX_TOK_BRACKET_CHAR] = {
c25718
		.tok_scan_fn    = tbnf_regex_scan_char,
c25718
		.tok_state_op   = TBNF_STATE_KEEP,
c25718
	},
c25718
};
c25718
c25718
c25718
/* extended regex expression token scan table */
c25718
tbnf_hidden struct tbnf_scan_tbl * tbnf_regex_scan_tbl[] = {
c25718
	[TBNF_REGEX_STATE_INIT]    = tbnf_regex_scan_tbl__init,
c25718
	[TBNF_REGEX_STATE_BRACE]   = tbnf_regex_scan_tbl__brace,
c25718
	[TBNF_REGEX_STATE_BRACKET] = tbnf_regex_scan_tbl__bracket,
c25718
	[TBNF_REGEX_STATE_CAP]     = 0,
c25718
};