diff --git a/include/treebnf/treebnf.h b/include/treebnf/treebnf.h index 1a8db27..a5c0359 100644 --- a/include/treebnf/treebnf.h +++ b/include/treebnf/treebnf.h @@ -163,6 +163,10 @@ tbnf_api int tbnf_lib_map_raw_input (const struct tbnf_driver_ctx *, tbnf_api int tbnf_lib_unmap_raw_input (struct tbnf_raw_input *); +/* table-based token scanner api */ +tbnf_api int tbnf_scan_token (const struct tbnf_scan_ctx *, struct tbnf_token *); +tbnf_api int tbnf_scan_tokens (struct tbnf_scan_ctx *, size_t ntoks, struct tbnf_token *, int any); + /* utility api */ tbnf_api int tbnf_main (char **, char **, const struct tbnf_fd_ctx *); diff --git a/project/common.mk b/project/common.mk index 7a77195..c9523bd 100644 --- a/project/common.mk +++ b/project/common.mk @@ -5,6 +5,7 @@ API_SRCS = \ src/driver/tbnf_unit_ctx.c \ src/output/tbnf_output_error.c \ src/skin/tbnf_skin_default.c \ + src/tokscan/tbnf_scan_token.c \ INTERNAL_SRCS = \ src/internal/$(PACKAGE)_dprintf_impl.c \ diff --git a/project/tree.mk b/project/tree.mk index f3ae848..0c51986 100644 --- a/project/tree.mk +++ b/project/tree.mk @@ -2,6 +2,7 @@ TREE_DIRS = src \ src/driver \ src/output \ src/skin \ + src/tokscan \ src/internal \ tree.tag: diff --git a/src/tokscan/tbnf_scan_token.c b/src/tokscan/tbnf_scan_token.c new file mode 100644 index 0000000..b546959 --- /dev/null +++ b/src/tokscan/tbnf_scan_token.c @@ -0,0 +1,102 @@ +/**************************************************************/ +/* treebnf: a tree oriented bnf library */ +/* Copyright (C) 2024 SysDeer Technologies, LLC */ +/* Released under GPLv2 and GPLv3; see COPYING.TREEBNF. */ +/**************************************************************/ + +#include + +#define TBNF_STATE_STACK_SIZE (512) + +/* single token, read-only context */ +int tbnf_scan_token(const struct tbnf_scan_ctx * sctx, struct tbnf_token * tok) +{ + int ret = 0; + int len = 0; + int type = 0; + + int tidx = 0; + int sidx = sctx->tok_scan_state; + + for (; tidx < sctx->tok_scan_nents; ) { + if (sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn) + ret = sctx->tok_scan_tbls[sidx][tidx].tok_scan_fn(sctx); + + if (ret > len) { + len = ret; + type = tidx; + } + + tidx++; + } + + tok->tok_type = type; + tok->tok_len = len; + tok->tok_off = sctx->tok_scan_mark - sctx->tok_scan_base; + + return (len > 0) ? 0 : -1; +} + +/* scan up to ntoks tokens, read-write context */ +int tbnf_scan_tokens(struct tbnf_scan_ctx * sctx, size_t ntoks, struct tbnf_token * tokv, int any) +{ + int ret; + int * state; + int * stcap; + int ststk[TBNF_STATE_STACK_SIZE]; + struct tbnf_scan_tbl * pentry; + + ret = 0; + ntoks = (ntoks > INT32_MAX) ? INT32_MAX : ntoks; + + state = ststk; + state[0] = sctx->tok_scan_state; + + stcap = &state[TBNF_STATE_STACK_SIZE]; + stcap--; + + /*******************************************************************/ + /* a positive return value that's smaller than the original ntoks, */ + /* in combination with mark < cap, indicates an error while trying */ + /* to obtain the next token. */ + /*******************************************************************/ + + for (; ntoks && (sctx->tok_scan_mark < sctx->tok_scan_cap); ) { + if (tbnf_scan_token(sctx,tokv) < 0) + return (ret > 0) ? ret : (-1); + + pentry = &sctx->tok_scan_tbls[*state][tokv->tok_type]; + + switch (pentry->tok_state_op) { + case TBNF_STATE_POP: + if (state == ststk) + return (-1); + + state--; + sctx->tok_scan_state = *state; + break; + + case TBNF_STATE_KEEP: + break; + + case TBNF_STATE_PUSH: + if (state == stcap) + return (-1); + + sctx->tok_scan_state = pentry->tok_state_next; + *++state = sctx->tok_scan_state; + break; + } + + sctx->tok_scan_type = tokv->tok_type; + sctx->tok_scan_mark += tokv->tok_len; + + tokv->tok_any = any; + tokv++; + + ntoks--; + ret++; + } + + return ret; +}