Blob Blame History Raw
/**************************************************************/
/*  treebnf: a tree oriented bnf library                      */
/*  Copyright (C) 2024  SysDeer Technologies, LLC             */
/*  Released under GPLv2 and GPLv3; see COPYING.TREEBNF.      */
/**************************************************************/

#include <treebnf/treebnf.h>

#include "treebnf_regex_impl.h"
#include "treebnf_visibility_impl.h"

#include "tbnf_regex_defs.h"
#include "tbnf_regex_scanfns.h"

#define TBNF_STATE_STACK_SIZE   (512)

/* init state scan table*/
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__init[TBNF_REGEX_TOK_CAP] = {
	/* --> brace */
	[TBNF_REGEX_TOK_LBRACE] = {
		.tok_scan_fn    = tbnf_regex_scan_lbrace,
		.tok_state_op   = TBNF_STATE_PUSH,
		.tok_state_next = TBNF_REGEX_STATE_BRACE,
	},


	/* --> bracket */
	[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX_RBRACKET] = {
		.tok_scan_fn    = tbnf_regex_scan_lbracket_circumflex_rbracket,
		.tok_state_op   = TBNF_STATE_PUSH,
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
	},

	[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX] = {
		.tok_scan_fn    = tbnf_regex_scan_lbracket_circumflex,
		.tok_state_op   = TBNF_STATE_PUSH,
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
	},

	[TBNF_REGEX_TOK_LBRACKET_RBRACKET] = {
		.tok_scan_fn    = tbnf_regex_scan_lbracket_rbracket,
		.tok_state_op   = TBNF_STATE_PUSH,
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
	},

	[TBNF_REGEX_TOK_LBRACKET] = {
		.tok_scan_fn    = tbnf_regex_scan_lbracket,
		.tok_state_op   = TBNF_STATE_PUSH,
		.tok_state_next = TBNF_REGEX_STATE_BRACKET,
	},


	/* (expression) */
	[TBNF_REGEX_TOK_ESCAPED_CHAR] = {
		.tok_scan_fn    = tbnf_regex_scan_escaped_char,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_CIRCUMFLEX_ASTERISK] = {
		.tok_scan_fn    = tbnf_regex_scan_circumflex_asterisk,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_CIRCUMFLEX] = {
		.tok_scan_fn    = tbnf_regex_scan_circumflex,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_LPAREN] = {
		.tok_scan_fn    = tbnf_regex_scan_lparen,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_RPAREN] = {
		.tok_scan_fn    = tbnf_regex_scan_rparen,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_ASTERISK] = {
		.tok_scan_fn    = tbnf_regex_scan_asterisk,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_PERIOD] = {
		.tok_scan_fn    = tbnf_regex_scan_period,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_DOLLAR] = {
		.tok_scan_fn    = tbnf_regex_scan_dollar,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_VLINE] = {
		.tok_scan_fn    = tbnf_regex_scan_vline,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_QMARK] = {
		.tok_scan_fn    = tbnf_regex_scan_qmark,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_PLUS] = {
		.tok_scan_fn    = tbnf_regex_scan_plus,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_CHAR] = {
		.tok_scan_fn    = tbnf_regex_scan_char,
		.tok_state_op   = TBNF_STATE_KEEP,
	},
};


/* brace state scan table*/
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__brace[TBNF_REGEX_TOK_CAP] = {
	[TBNF_REGEX_TOK_BRACE_RBRACE] = {
		.tok_scan_fn    = tbnf_regex_scan_brace_rbrace,
		.tok_state_op   = TBNF_STATE_POP,
	},

	[TBNF_REGEX_TOK_BRACE_DIGIT] = {
		.tok_scan_fn    = tbnf_regex_scan_brace_digit,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACE_COMMA] = {
		.tok_scan_fn    = tbnf_regex_scan_brace_comma,
		.tok_state_op   = TBNF_STATE_KEEP,
	},
};


/* bracket state scan table*/
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__bracket[TBNF_REGEX_TOK_CAP] = {
	[TBNF_REGEX_TOK_BRACKET_RBRACKET] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_rbracket,
		.tok_state_op   = TBNF_STATE_POP,
	},

	[TBNF_REGEX_TOK_BRACKET_ESCAPED_CHAR] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_escaped_char,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_CHARACTER_CLASS] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_character_class,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_COLLATION_SYMBOL] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_collation_symbol,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_EQUIVALENCE_CLASS] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_equivalence_class,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_HYPHEN] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_hyphen,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_ERROR] = {
		.tok_scan_fn    = tbnf_regex_scan_bracket_error,
		.tok_state_op   = TBNF_STATE_KEEP,
	},

	[TBNF_REGEX_TOK_BRACKET_CHAR] = {
		.tok_scan_fn    = tbnf_regex_scan_char,
		.tok_state_op   = TBNF_STATE_KEEP,
	},
};


/* extended regex expression token scan table */
tbnf_hidden struct tbnf_scan_tbl * tbnf_regex_scan_tbl[] = {
	[TBNF_REGEX_STATE_INIT]    = tbnf_regex_scan_tbl__init,
	[TBNF_REGEX_STATE_BRACE]   = tbnf_regex_scan_tbl__brace,
	[TBNF_REGEX_STATE_BRACKET] = tbnf_regex_scan_tbl__bracket,
	[TBNF_REGEX_STATE_CAP]     = 0,
};