diff --git a/include/tpax/tpax.h b/include/tpax/tpax.h index 00271cb..fa685c3 100644 --- a/include/tpax/tpax.h +++ b/include/tpax/tpax.h @@ -1,6 +1,7 @@ #ifndef TPAX_H #define TPAX_H +#include #include #include #include @@ -172,6 +173,9 @@ tpax_api int tpax_archive_seal (const struct tpax_driver_ctx *); tpax_api int tpax_util_path_copy (char *, const char *, size_t, uint32_t, size_t *); tpax_api int tpax_util_stat_compare (const struct stat *, const struct stat *); +tpax_api int tpax_util_path_replstr (char * dstpath, const char * srcpath, const char * replstr, + const regex_t * regex, size_t buflen, int flags); + /* utility api */ tpax_api int tpax_main (char **, char **, const struct tpax_fd_ctx *); diff --git a/project/common.mk b/project/common.mk index 26e171c..03e35ee 100644 --- a/project/common.mk +++ b/project/common.mk @@ -12,6 +12,7 @@ API_SRCS = \ src/output/tpax_output_error.c \ src/skin/tpax_skin_default.c \ src/util/tpax_path_copy.c \ + src/util/tpax_path_replstr.c \ src/util/tpax_stat_compare.c \ INTERNAL_SRCS = \ diff --git a/src/driver/tpax_driver_ctx.c b/src/driver/tpax_driver_ctx.c index dff6ef6..5593ec6 100644 --- a/src/driver/tpax_driver_ctx.c +++ b/src/driver/tpax_driver_ctx.c @@ -6,6 +6,7 @@ #define _DEFAULT_SOURCE 1 +#include #include #include #include @@ -322,6 +323,109 @@ static void tpax_set_archive_block_size(struct tpax_common_ctx * cctx) cctx->blksize = TPAX_USTAR_BLOCK_SIZE; } +static int tpax_add_replstr( + struct argv_entry * entry, + struct tpax_replstr * replstr, + char ** mark) +{ + const char * src; + char * dst; + char sep; + int nsep; + + /* non-null separator character */ + if (!(sep = entry->arg[0])) + return -1; + + /* exactly three separator characters */ + for (nsep=1,src=&entry->arg[1]; *src; src++) { + if ((src[0] == '\\') && (src[1] == sep)) { + src++; + + } else if (src[0] == sep) { + nsep++; + } + } + + if (nsep != 3) + return -1; + + /* regexp */ + for (src=&entry->arg[1],dst=*mark; (*src != sep); src++) { + if ((src[0] == '\\') && (src[1] == sep)) + src++; + + *dst++ = *src; + } + + replstr->replarg = entry->arg; + replstr->replstr = ++dst; + replstr->regexp = *mark; + + /* replstr */ + for (++src; (*src != sep); src++) { + if ((src[0] == '\\') && (src[1] == sep)) + src++; + + *dst++ = *src; + } + + src++; + dst++; + + *mark = dst; + + /* flags */ + if (src[0] && src[1] && src[2]) + return -1; + + if (src[0] && (src[0] == src[1])) + return -1; + + if (src[0] && (src[0] != 'g') && (src[0] != 'p')) + return -1; + + if (src[0] && src[1] && (src[1] != 'g') && (src[1] != 'p')) + return -1; + + if (src[0] && ((src[0] == 'g') || (src[1] == 'g'))) + replstr->flags |= TPAX_REPL_GLOBAL; + + if (src[0] && ((src[0] == 'p') || (src[1] == 'p'))) + replstr->flags |= TPAX_REPL_PRINT; + + /* regex */ + if (regcomp(&replstr->regex,replstr->regexp,0)) { + replstr->regexp = 0; + return -1; + } + + return 0; +} + +static int tpax_init_replstr_vector( + struct tpax_driver_ctx_impl * ctx, + struct argv_meta * meta) +{ + struct argv_entry * entry; + struct tpax_replstr * replstr; + char * mark; + + if (!(replstr = ctx->replstrv)) + return 0; + + for (entry=meta->entries,mark=ctx->replstrs; entry->fopt || entry->arg; entry++) { + if (entry->tag == TAG_REPLSTR) { + if (tpax_add_replstr(entry,replstr,&mark) < 0) + return -1; + + replstr++; + } + } + + return 0; +} + static int tpax_driver_is_valid_keyval(struct argv_keyval * keyval) { (void)keyval; @@ -332,7 +436,9 @@ static struct tpax_driver_ctx_impl * tpax_driver_ctx_alloc( struct argv_meta * meta, const struct tpax_fd_ctx * fdctx, const struct tpax_common_ctx * cctx, - size_t nunits) + size_t nunits, + size_t nreplstr, + size_t sreplstr) { struct tpax_driver_ctx_alloc * ictx; size_t size; @@ -379,6 +485,19 @@ static struct tpax_driver_ctx_impl * tpax_driver_ctx_alloc( return 0; } + if (nreplstr && !(ictx->ctx.replstrv = calloc(++nreplstr,sizeof(*ictx->ctx.replstrv)))) { + free(ictx->ctx.keyvalv); + free(ictx); + return 0; + } + + if (sreplstr && !(ictx->ctx.replstrs = calloc(sreplstr,1))) { + free(ictx->ctx.replstrv); + free(ictx->ctx.keyvalv); + free(ictx); + return 0; + } + if ((pkeyval = ictx->ctx.keyvalv)) for (entry=meta->entries; entry->fopt || entry->arg; entry++) if (entry->keyv) @@ -474,6 +593,8 @@ int tpax_lib_get_driver_ctx( struct argv_keyval ** pkeyval; struct tpax_fd_ctx lfdctx; size_t nunits; + size_t nreplstr; + size_t sreplstr; const char * program; int fddst; const char * ch; @@ -497,6 +618,9 @@ int tpax_lib_get_driver_ctx( program = argv_program_name(argv[0]); memset(&cctx,0,sizeof(cctx)); + nreplstr = 0; + sreplstr = 0; + cctx.drvflags = flags; fddst = fdctx->fddst; @@ -581,6 +705,12 @@ int tpax_lib_get_driver_ctx( meta); break; + case TAG_REPLSTR: + sreplstr += strlen(entry->arg); + sreplstr++; + nreplstr++; + break; + case TAG_RECURSE: cctx.drvflags |= TPAX_DRIVER_DIR_MEMBER_RECURSE; break; @@ -751,13 +881,19 @@ int tpax_lib_get_driver_ctx( } /* driver ctx */ - if (!(ctx = tpax_driver_ctx_alloc(meta,fdctx,&cctx,nunits))) { + if (!(ctx = tpax_driver_ctx_alloc(meta,fdctx,&cctx,nunits,nreplstr,sreplstr))) { if (cctx.drvflags & TPAX_DRIVER_EXEC_MODE_COPY) close(fddst); return tpax_get_driver_ctx_fail(meta); } + /* replstr validation and vector initialization */ + if (tpax_init_replstr_vector(ctx,meta) < 0) { + tpax_lib_free_driver_ctx(&ctx->ctx); + return TPAX_ERROR; + } + /* keyval validation */ for (pkeyval=ctx->keyvalv; pkeyval && *pkeyval; pkeyval++) if (!tpax_driver_is_valid_keyval(*pkeyval)) @@ -781,6 +917,8 @@ static void tpax_free_driver_ctx_impl(struct tpax_driver_ctx_alloc * ictx) size_t size; char ** ppref; + struct tpax_replstr * replstrv; + for (; ictx->ctx.dirents; ) { next = ictx->ctx.dirents->next; size = ictx->ctx.dirents->size; @@ -789,6 +927,15 @@ static void tpax_free_driver_ctx_impl(struct tpax_driver_ctx_alloc * ictx) ictx->ctx.dirents = (struct tpax_dirent_buffer *)next; } + for (replstrv=ictx->ctx.replstrv; replstrv && replstrv->regexp; replstrv++) + regfree(&replstrv->regex); + + if (ictx->ctx.replstrv) + free(ictx->ctx.replstrv); + + if (ictx->ctx.replstrs) + free(ictx->ctx.replstrs); + if (ictx->ctx.keyvalv) free(ictx->ctx.keyvalv); diff --git a/src/internal/tpax_driver_impl.h b/src/internal/tpax_driver_impl.h index d340748..3df8244 100644 --- a/src/internal/tpax_driver_impl.h +++ b/src/internal/tpax_driver_impl.h @@ -7,6 +7,7 @@ #ifndef TPAX_DRIVER_IMPL_H #define TPAX_DRIVER_IMPL_H +#include #include #include #include @@ -31,6 +32,9 @@ #define TPAX_ITEM_SYMLINK 0X4 #define TPAX_ITEM_NAMEREF 0x8 +#define TPAX_REPL_GLOBAL 0x01 +#define TPAX_REPL_PRINT 0x02 + extern const struct argv_option tpax_default_options[]; enum app_tags { @@ -45,6 +49,7 @@ enum app_tags { TAG_FORMAT, TAG_BLKSIZE, TAG_OPTIONS, + TAG_REPLSTR, TAG_RECURSE, TAG_NORECURSE, TAG_STRICT_PATH, @@ -74,6 +79,14 @@ struct tpax_dirent_buffer { struct tpax_dirent dbuf[]; }; +struct tpax_replstr { + const char * replarg; + const char * replstr; + const char * regexp; + regex_t regex; + uint32_t flags; +}; + struct tpax_driver_ctx_impl { const char * file; struct tpax_common_ctx cctx; @@ -82,6 +95,8 @@ struct tpax_driver_ctx_impl { const struct tpax_unit_ctx * euctx; const char * eunit; struct argv_keyval ** keyvalv; + struct tpax_replstr * replstrv; + char * replstrs; struct tpax_error_info ** errinfp; struct tpax_error_info ** erricap; struct tpax_error_info * erriptr[64]; diff --git a/src/logic/tpax_archive_write.c b/src/logic/tpax_archive_write.c index 32a5f2c..a881bd9 100644 --- a/src/logic/tpax_archive_write.c +++ b/src/logic/tpax_archive_write.c @@ -75,6 +75,35 @@ static int tpax_archive_write_ret( return ret; } +static int tpax_apply_string_replacement( + const struct tpax_driver_ctx * dctx, + const char * path, + char * replbuf, + size_t buflen) +{ + int ret; + struct tpax_driver_ctx_impl * ictx; + struct tpax_replstr * replstrv; + + ictx = tpax_get_driver_ictx(dctx); + + if (!(replstrv = ictx->replstrv)) + return 0; + + for (ret=0; !ret && replstrv->regexp; replstrv++) { + ret = tpax_util_path_replstr( + replbuf,path, + replstrv->replstr, + &replstrv->regex, + buflen,replstrv->flags); + + if ((ret > 0) && (replstrv->flags & TPAX_REPL_PRINT)) + tpax_dprintf(tpax_driver_fderr(dctx),"%s >> %s\n",path,replbuf); + } + + return ret; +} + static int tpax_archive_write_impl( const struct tpax_driver_ctx * dctx, const struct tpax_dirent * cdent, @@ -85,12 +114,14 @@ static int tpax_archive_write_impl( struct tpax_ustar_header uhdr; const struct stat * st; struct stat stbuf; + const char * apath; const char * path; const char * slnk; const char * mlnk; off_t hpos; off_t dpos; int fdtmp; + int slen; ssize_t nread; ssize_t nbytes; void * buf; @@ -98,6 +129,7 @@ static int tpax_archive_write_impl( size_t cmplen; void * membuf; char * ch; + char replbuf[PATH_MAX]; char pathbuf[PATH_MAX]; /* followed symlink? */ @@ -110,9 +142,17 @@ static int tpax_archive_write_impl( dctx, TPAX_ERR_FLOW_ERROR); + /* regex matching and patter substitution */ + if ((slen = tpax_apply_string_replacement(dctx,path,replbuf,PATH_MAX)) < 0) + return TPAX_CUSTOM_ERROR( + dctx, + TPAX_ERR_FLOW_ERROR); + + apath = slen ? replbuf : path; + /* verbose mode */ if (dctx->cctx->drvflags & TPAX_DRIVER_VERBOSE) - tpax_dprintf(tpax_driver_fderr(dctx),"%s",path); + tpax_dprintf(tpax_driver_fderr(dctx),"%s",apath); /* uctx */ if (tpax_lib_get_unit_ctx(dctx,fdcwd,path,&uctx) < 0) @@ -171,7 +211,7 @@ static int tpax_archive_write_impl( /* header */ if (tpax_meta_init_ustar_header( - dctx,path,st, + dctx,apath,st, slnk,&uhdr) < 0) return tpax_archive_write_ret( TPAX_NESTED_ERROR(dctx), diff --git a/src/skin/tpax_skin_default.c b/src/skin/tpax_skin_default.c index a618645..dd62b86 100644 --- a/src/skin/tpax_skin_default.c +++ b/src/skin/tpax_skin_default.c @@ -93,6 +93,21 @@ const tpax_hidden struct argv_option tpax_default_options[] = { "a user-provided, format-specific keyval array of the form " "keyword[[:]=value][,keyword[[:]=value], ...]"}, + {"Wreplstr", 's',TAG_REPLSTR,ARGV_OPTARG_REQUIRED, + ARGV_OPTION_HYBRID_ONLY|ARGV_OPTION_HYBRID_SPACE,0,0, + "rename files and archive members as they are being added to " + "or extracted from the archive according to the specified " + "ed(1) style replacement string, which should be in the format " + "[gp]; as an example, " + "-s ',^/git/tpax/,tpax-1.2.3/,' uses as the separator " + "character, and instructs pax to prefix all files rooted in " + "'/git/tpax/' with 'tpax-1.2.3/' while leaving the names of files which " + "do not match the regex expression unchanged. " + "When this option is repeated, pax shall attempt to match each file or " + "member name against all of the provided repalcement-string arguments " + "in the order of appearnce on the command line until the first " + "successful match."}, + {"Wstrict-device-id", 'X',TAG_STRICT_DEVICE_ID,ARGV_OPTARG_NONE, ARGV_OPTION_HYBRID_ONLY,0,0, diff --git a/src/util/tpax_path_replstr.c b/src/util/tpax_path_replstr.c new file mode 100644 index 0000000..1935628 --- /dev/null +++ b/src/util/tpax_path_replstr.c @@ -0,0 +1,120 @@ +/**************************************************************/ +/* tpax: a topological pax implementation */ +/* Copyright (C) 2020--2024 SysDeer Technologies, LLC */ +/* Released under GPLv2 and GPLv3; see COPYING.TPAX. */ +/**************************************************************/ + +#include +#include +#include +#include + +#include +#include "tpax_driver_impl.h" + +static int tpax_backref_idx(const char c) +{ + return ((c >= '1') && (c <= '9')) ? c - '0' : 0; +} + +int tpax_util_path_replstr( + char * dstpath, + const char * srcpath, + const char * replstr, + const regex_t * regex, + size_t buflen, + int flags) +{ + int ret; + int idx; + regoff_t ro; + const char * ch; + char * dst; + size_t explen; + regmatch_t pmatch[11]; + + /* attempt to match */ + switch (regexec(regex,srcpath,11,pmatch,0)) { + case 0: + break; + + case REG_NOMATCH: + return 0; + + default: + return -1; + } + + /* copy bytes leading up to match */ + if (buflen <= (explen = pmatch[0].rm_so)) { + errno = ENOBUFS; + return -1; + } + + for (ro=0,dst=dstpath; ro stands for the entire matched string */ + if (ch[0] == '&') { + idx = 0; + + /* back-reference semantics: a matched subexpression or an empty string */ + } else if ((ch[0] == '\\') && (idx = tpax_backref_idx(ch[1]))) { + if (pmatch[idx].rm_so < 0) + idx = -1; + + ch++; + + /* all other escaped characters */ + } else if (ch[0] == '\\') { + *dst++ = *++ch; + idx = -1; + buflen--; + + /* all other characters */ + } else { + *dst++ = *ch; + idx = -1; + buflen--; + } + + /* copy matched string or matched subexpression, if any */ + if (idx >= 0) { + if (buflen <= (explen = (pmatch[idx].rm_eo - pmatch[idx].rm_so))) { + errno = ENOBUFS; + return -1; + } + + for (ro=pmatch[idx].rm_so; ro