|
|
c25718 |
/**************************************************************/
|
|
|
c25718 |
/* treebnf: a tree oriented bnf library */
|
|
|
c25718 |
/* Copyright (C) 2024 SysDeer Technologies, LLC */
|
|
|
c25718 |
/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */
|
|
|
c25718 |
/**************************************************************/
|
|
|
c25718 |
|
|
|
c25718 |
#include <treebnf/treebnf.h>
|
|
|
c25718 |
|
|
|
c25718 |
#include "treebnf_regex_impl.h"
|
|
|
c25718 |
#include "treebnf_visibility_impl.h"
|
|
|
c25718 |
|
|
|
c25718 |
#include "tbnf_regex_defs.h"
|
|
|
c25718 |
#include "tbnf_regex_scanfns.h"
|
|
|
c25718 |
|
|
|
c25718 |
#define TBNF_STATE_STACK_SIZE (512)
|
|
|
c25718 |
|
|
|
c25718 |
/* init state scan table*/
|
|
|
c25718 |
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__init[TBNF_REGEX_TOK_CAP] = {
|
|
|
c25718 |
/* --> brace */
|
|
|
c25718 |
[TBNF_REGEX_TOK_LBRACE] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lbrace,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_PUSH,
|
|
|
c25718 |
.tok_state_next = TBNF_REGEX_STATE_BRACE,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
|
|
|
c25718 |
/* --> bracket */
|
|
|
c25718 |
[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX_RBRACKET] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lbracket_circumflex_rbracket,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_PUSH,
|
|
|
c25718 |
.tok_state_next = TBNF_REGEX_STATE_BRACKET,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_LBRACKET_CIRCUMFLEX] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lbracket_circumflex,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_PUSH,
|
|
|
c25718 |
.tok_state_next = TBNF_REGEX_STATE_BRACKET,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_LBRACKET_RBRACKET] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lbracket_rbracket,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_PUSH,
|
|
|
c25718 |
.tok_state_next = TBNF_REGEX_STATE_BRACKET,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_LBRACKET] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lbracket,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_PUSH,
|
|
|
c25718 |
.tok_state_next = TBNF_REGEX_STATE_BRACKET,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
|
|
|
c25718 |
/* (expression) */
|
|
|
c25718 |
[TBNF_REGEX_TOK_ESCAPED_CHAR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_escaped_char,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_CIRCUMFLEX_ASTERISK] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_circumflex_asterisk,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_CIRCUMFLEX] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_circumflex,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_LPAREN] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_lparen,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_RPAREN] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_rparen,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_ASTERISK] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_asterisk,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_PERIOD] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_period,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_DOLLAR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_dollar,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_VLINE] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_vline,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_QMARK] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_qmark,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_PLUS] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_plus,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_CHAR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_char,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
};
|
|
|
c25718 |
|
|
|
c25718 |
|
|
|
c25718 |
/* brace state scan table*/
|
|
|
c25718 |
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__brace[TBNF_REGEX_TOK_CAP] = {
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACE_RBRACE] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_brace_rbrace,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_POP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACE_DIGIT] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_brace_digit,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACE_COMMA] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_brace_comma,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
};
|
|
|
c25718 |
|
|
|
c25718 |
|
|
|
c25718 |
/* bracket state scan table*/
|
|
|
c25718 |
static struct tbnf_scan_tbl tbnf_regex_scan_tbl__bracket[TBNF_REGEX_TOK_CAP] = {
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_RBRACKET] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_rbracket,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_POP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_ESCAPED_CHAR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_escaped_char,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_CHARACTER_CLASS] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_character_class,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_COLLATION_SYMBOL] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_collation_symbol,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_EQUIVALENCE_CLASS] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_equivalence_class,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_HYPHEN] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_hyphen,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_ERROR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_bracket_error,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
|
|
|
c25718 |
[TBNF_REGEX_TOK_BRACKET_CHAR] = {
|
|
|
c25718 |
.tok_scan_fn = tbnf_regex_scan_char,
|
|
|
c25718 |
.tok_state_op = TBNF_STATE_KEEP,
|
|
|
c25718 |
},
|
|
|
c25718 |
};
|
|
|
c25718 |
|
|
|
c25718 |
|
|
|
c25718 |
/* extended regex expression token scan table */
|
|
|
c25718 |
tbnf_hidden struct tbnf_scan_tbl * tbnf_regex_scan_tbl[] = {
|
|
|
c25718 |
[TBNF_REGEX_STATE_INIT] = tbnf_regex_scan_tbl__init,
|
|
|
c25718 |
[TBNF_REGEX_STATE_BRACE] = tbnf_regex_scan_tbl__brace,
|
|
|
c25718 |
[TBNF_REGEX_STATE_BRACKET] = tbnf_regex_scan_tbl__bracket,
|
|
|
c25718 |
[TBNF_REGEX_STATE_CAP] = 0,
|
|
|
c25718 |
};
|