commit 4ba42acfea72bbb378808bbf033396cd6a0e3d22 Author: Konstantin Demin Date: Tue May 27 11:36:06 2025 +0300 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..78660df --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +## local copy +xxhash.h + +## build objects +*.o + +## build artefact +overlaydirs diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6d98eba --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +#!/usr/bin/make -f +SHELL :=/bin/sh +.SHELLFLAGS :=-efc +MAKEFLAGS +=--no-print-directory + +SRC = main.cc coreutils-sort.cc overlay.cc print.cc sort.cc xxhash.cc + +CROSS ?= +CC =$(CROSS)gcc +CXX =$(CROSS)g++ +STRIP =$(CROSS)strip + +CFLAGS_LTO ?=-flto=2 -fuse-linker-plugin -ffat-lto-objects -flto-partition=none +CFLAGS_COMMON ?=-O2 -g -fPIE -fstack-protector-strong +CFLAGS ?=$(CFLAGS_COMMON) $(CFLAGS_LTO) +CPPFLAGS ?=-Wall -Wextra -Werror=format-security -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 +CXXFLAGS ?=$(CFLAGS_COMMON) $(CFLAGS_LTO) -std=gnu++20 +LDFLAGS ?=-Wl,-z,relro -Wl,-z,now -pie + +NO_WARN = attributes class-memaccess unused-function unused-result +CPPFLAGS += $(foreach w,$(NO_WARN),-Wno-$(w)) + +NO_CXX = rtti exceptions +CXXFLAGS +=$(foreach f,$(NO_CXX),-fno-$(f)) + +OBJ = $(SRC:.cc=.cc.o) + +.DEFAULT: all +.PHONY: all build clean +all build: overlaydirs + +%.cc.o: %.cc + $(CXX) -c $(CXXFLAGS) $(CPPFLAGS) -o $@ $^ + +overlaydirs: $(OBJ) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ \ + $(if $(filter $(RELMODE),1),&& $(STRIP) --strip-debug --strip-unneeded $@) + +clean: + rm -f overlaydirs $(OBJ) diff --git a/coreutils-sort.cc b/coreutils-sort.cc new file mode 100644 index 0000000..3bda749 --- /dev/null +++ b/coreutils-sort.cc @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later + Origin: coreutils-9.7/lib/filevercmp.c + Copyright (C) 1995 Ian Jackson + Copyright (C) 2001 Anthony Towns + Copyright (C) 2008-2025 Free Software Foundation, Inc. +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include + +#include "coreutils-sort.hh" + +/* Return the length of a prefix of S that corresponds to the suffix + defined by this extended regular expression in the C locale: + (\.[A-Za-z~][A-Za-z0-9~]*)*$ + Use the longest suffix matching this regular expression, + except do not use all of S as a suffix if S is nonempty. + If *LEN is -1, S is a string; set *LEN to S's length. + Otherwise, *LEN should be nonnegative, S is a char array, + and *LEN does not change. */ +static +ptrdiff_t file_prefixlen(char const * s, ptrdiff_t * len) +{ + size_t n = *len; /* SIZE_MAX if N == -1. */ + ptrdiff_t prefixlen = 0; + + for (ptrdiff_t i = 0; ; ) { + if ((*len < 0) ? !s[i] : (i == n)) { + *len = i; + return prefixlen; + } + + i++; + prefixlen = i; + while ((i + 1 < n) && (s[i] == '.') && (isalpha(s[i + 1]) || (s[i + 1] == '~'))) { + for (i += 2; (i < n) && (isalnum(s[i]) || (s[i] == '~')); i++) { + continue; + } + } + } +} + +/* Return a version sort comparison value for S's byte at position POS. + S has length LEN. If POS == LEN, sort before all non-'~' bytes. */ +static +int order(char const * s, ptrdiff_t pos, ptrdiff_t len) +{ + if (pos == len) return -1; + + unsigned char c = s[pos]; + if (isdigit(c)) return 0; + else if (isalpha(c)) return c; + else if (c == '~') return -2; + else + { + static_assert (UCHAR_MAX <= (INT_MAX - 1 - 2) / 2); + return c + UCHAR_MAX + 1; + } +} + +/* slightly modified verrevcmp function from dpkg + S1, S2 - compared char array + S1_LEN, S2_LEN - length of arrays to be scanned + + This implements the algorithm for comparison of version strings + specified by Debian and now widely adopted. The detailed + specification can be found in the Debian Policy Manual in the + section on the 'Version' control field. This version of the code + implements that from s5.6.12 of Debian Policy v3.8.0.1 + https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */ +static +int verrevcmp(const char * s1, ptrdiff_t s1_len, const char * s2, ptrdiff_t s2_len) +{ + ptrdiff_t s1_pos = 0; + ptrdiff_t s2_pos = 0; + + while (s1_pos < s1_len || s2_pos < s2_len) { + int first_diff = 0; + while (((s1_pos < s1_len) && !isdigit(s1[s1_pos])) || ((s2_pos < s2_len) && !isdigit(s2[s2_pos]))) { + int s1_c = order(s1, s1_pos, s1_len); + int s2_c = order(s2, s2_pos, s2_len); + if (s1_c != s2_c) return s1_c - s2_c; + s1_pos++; + s2_pos++; + } + + while ((s1_pos < s1_len) && (s1[s1_pos] == '0')) { + s1_pos++; + } + while ((s2_pos < s2_len) && (s2[s2_pos] == '0')) { + s2_pos++; + } + while ((s1_pos < s1_len) && (s2_pos < s2_len) && isdigit(s1[s1_pos]) && isdigit(s2[s2_pos])) { + if (!first_diff) first_diff = s1[s1_pos] - s2[s2_pos]; + s1_pos++; + s2_pos++; + } + + if ((s1_pos < s1_len) && isdigit(s1[s1_pos])) return 1; + if ((s2_pos < s2_len) && isdigit(s2[s2_pos])) return -1; + if (first_diff) return first_diff; + } + return 0; +} + +static +int filenvercmp (char const * a, ptrdiff_t alen, char const * b, ptrdiff_t blen) +{ + /* Special case for empty versions. */ + bool aempty = (alen < 0) ? !a[0] : !alen; + bool bempty = (blen < 0) ? !b[0] : !blen; + if (aempty) return -!bempty; + if (bempty) return 1; + + /* Special cases for leading ".": "." sorts first, then "..", then + other names with leading ".", then other names. */ + if (a[0] == '.') { + if (b[0] != '.') return -1; + + bool adot = alen < 0 ? !a[1] : alen == 1; + bool bdot = blen < 0 ? !b[1] : blen == 1; + if (adot) return -!bdot; + if (bdot) return 1; + + bool adotdot = (a[1] == '.') && ((alen < 0) ? !a[2] : (alen == 2)); + bool bdotdot = (b[1] == '.') && ((blen < 0) ? !b[2] : (blen == 2)); + if (adotdot) return -!bdotdot; + if (bdotdot) return 1; + } + else if (b[0] == '.') { + return 1; + } + + /* Cut file suffixes. */ + ptrdiff_t aprefixlen = file_prefixlen(a, &alen); + ptrdiff_t bprefixlen = file_prefixlen(b, &blen); + + /* If both suffixes are empty, a second pass would return the same thing. */ + bool one_pass_only = (aprefixlen == alen) && (bprefixlen == blen); + + int result = verrevcmp(a, aprefixlen, b, bprefixlen); + + /* Return the initial result if nonzero, or if no second pass is needed. + Otherwise, restore the suffixes and try again. */ + return (result || one_pass_only) ? result : verrevcmp(a, alen, b, blen); +} + +static inline +int filevercmp(const char * s1, const char * s2) +{ + return filenvercmp(s1, -1, s2, -1); +} + +int coreutils_version_sort(char const * a, char const * b) +{ + return filevercmp(a, b); +} diff --git a/coreutils-sort.hh b/coreutils-sort.hh new file mode 100644 index 0000000..4fb287f --- /dev/null +++ b/coreutils-sort.hh @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_COREUTILS_SORT_HH +#define INCLUDE_COREUTILS_SORT_HH 1 + +int coreutils_version_sort(char const * a, char const * b); + +#endif /* INCLUDE_COREUTILS_SORT_HH */ diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..10c04f8 --- /dev/null +++ b/main.cc @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include + +extern "C" { + #include +} + +#include "overlay.hh" + +static +void usage(int retcode = 0) +{ + static const char usage_msg[] = + "overlaydirs 0.0.1\n" + "Usage:\n" + " overlaydirs --help\n" + " show this message\n" + " overlaydirs --list [--no-sort|--zero] [..]\n" + // TODO: shell-escape mode + /* " overlaydirs --list [--no-sort|--zero|--escape] [..]\n" */ + " list entries\n" + " overlaydirs --merge [..]\n" + " symlinks entries into \n" + "\n" + " --no-sort - don't sort entries\n" + " --zero - separate entries with NUL instead of LF\n" + // TODO: shell-escape mode + /* + " --escape - shell-escape strings\n" + "\n" + "Notes:\n" + " - flags \"--zero\" and \"--escape\" are mutually exclusive.\n" + */ + ; + + (void) write(STDERR_FILENO, usage_msg, sizeof(usage_msg)); + + exit(retcode); +} + +static int main_list(int argc, char * argv[]); +static int main_merge(int argc, char * argv[]); + +int main(int argc, char * argv[]) +{ + if (argc < 2) usage(0); + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) + usage(0); + else if (strcmp(argv[1], "--list") == 0) + return main_list(argc, argv); + else if (strcmp(argv[1], "--merge") == 0) + return main_merge(argc, argv); + else + usage(EINVAL); + + return 0; +} + +static int main_list(int argc, char * argv[]) +{ + if (argc < 3) usage(EINVAL); + + print_mode print_m = print_mode::_default; + sort_mode sort_m = sort_mode::_default; + + int arg_start = 2; + while (arg_start < argc) { + char * _arg = argv[arg_start]; + + if (strncmp(_arg, "--", 2) != 0) break; + + if (strcmp(_arg, "--") == 0) { + arg_start++; + break; + } + else if (strcmp(_arg, "--no-sort") == 0) { + arg_start++; + if (sort_m != sort_mode::_default) { + (void) fprintf(stderr, "overlaydirs: no-sort mode already set\n"); + } + sort_m = sort_mode::none; + } + else if (strcmp(_arg, "--zero") == 0) { + arg_start++; + if (print_m != print_mode::_default) { + (void) fprintf(stderr, "overlaydirs: output mode already set\n"); + } + print_m = print_mode::zero; + } + // TODO + /* + else if (strcmp(_arg, "--escape") == 0) { + arg_start++; + if (print_m != print_mode::_default) { + (void) fprintf(stderr, "overlaydirs: output mode already set\n"); + } + print_m = print_mode::shell_escape; + } + */ + else { + (void) fprintf(stderr, "overlaydirs: unknown option \"%s\"\n", _arg); + // nevertheless, continue and try argument as a directory + break; + } + } + + auto roots = std::vector(); + for (int i = arg_start; i < argc; i++) { + ovl_dirspec_t t; + if (!arg_to_rootspec(argv[i], &t, ovl_rootspec_flags::relative)) continue; + roots.push_back(t); + } + if (roots.empty()) { + (void) fprintf(stderr, "overlaydirs: no usable sources found\n"); + return EINVAL; + } + + auto ovl = process_overlay(roots, sort_m); + // not needed anymore + roots.clear(); + roots.shrink_to_fit(); + + list_overlay(ovl, print_m); + return 0; +} + +static int main_merge(int argc, char * argv[]) +{ + if (argc < 4) usage(EINVAL); + + ovl_dirspec_t target; + if (!arg_to_rootspec(argv[2], &target, ovl_rootspec_flags::relative)) { + (void) fprintf(stderr, "overlaydirs: not a directory: \"%s\"\n", argv[2]); + return EINVAL; + } + if (!is_empty_dir(target.fd)) { + (void) fprintf(stderr, "overlaydirs: target directory is not empty\n"); + return EEXIST; + } + + auto roots = std::vector(); + for (int i = 3; i < argc; i++) { + ovl_dirspec_t t; + if (!arg_to_rootspec(argv[i], &t)) continue; + roots.push_back(t); + } + if (roots.empty()) { + (void) fprintf(stderr, "overlaydirs: no usable sources found\n"); + return EINVAL; + } + + auto ovl = process_overlay(roots); + // not needed anymore + roots.clear(); + roots.shrink_to_fit(); + + int rv = merge_overlay(ovl, target); + if (rv == -1) { + (void) fprintf(stderr, "overlaydirs: target directory is not empty\n"); + return EEXIST; + } + + return rv; +} diff --git a/overlay-common.hh b/overlay-common.hh new file mode 100644 index 0000000..53b61e1 --- /dev/null +++ b/overlay-common.hh @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_OVERLAY_COMMON_HH +#define INCLUDE_OVERLAY_COMMON_HH 1 + +#include + +extern "C" { + #include +} + +constexpr unsigned int ovl_name_max = sizeof(dirent::d_name); +constexpr unsigned int ovl_path_max = (PATH_MAX); +/* +constexpr unsigned int ovl_path_max = +#ifdef PATH_MAX + #if (PATH_MAX > 0) && (PATH_MAX <= 8192) + (PATH_MAX); + #else + 8192; + #endif +#else + 4096; +#endif +*/ + +constexpr unsigned int ovl_path_name_ratio = ovl_path_max / ovl_name_max; +constexpr unsigned int ovl_depth_max = ovl_path_name_ratio - 1; + +struct ovl_name_t { + size_t len; + char str[ovl_name_max]; +}; + +struct ovl_path_t { + size_t len; + char str[ovl_path_max]; +}; +typedef std::vector ovl_path_vec; + +#endif /* INCLUDE_OVERLAY_COMMON_HH */ diff --git a/overlay.cc b/overlay.cc new file mode 100644 index 0000000..2db9d41 --- /dev/null +++ b/overlay.cc @@ -0,0 +1,842 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#include "overlay.hh" + +#include +#include +#include +#include +#include + +extern "C" { + #include + #include + #include + + #include + #include + + #include +} + +struct x_dirspec_t { + dev_t dev; + ino_t ino; + int fd; +}; +typedef std::vector dirspec_vec; + +typedef std::set> dev_ino_set; + +struct x_entry_t { + int32_t root_id; + uint32_t type; + xxhash_t name_hash; + dev_t dev; + ino_t ino; + ovl_name_t name; +}; + +static +size_t procfs_fd2name(int fd, char * buffer, size_t size); + +template +static inline +bool set_contains(const std::set & set, const T & needle); + +static inline +bool filter_out_dots(const char * name); + +static inline +bool handle_file_type(int type, bool allow_symlinks = false); + +static inline +ovl_entry_kind dirent_to_entry_kind(typeof(dirent::d_type) d_type); + +bool arg_to_rootspec(const char * arg, ovl_dirspec_t * rootspec, uint32_t flags) +{ + if ((!arg) || (!rootspec)) return false; + if (!(arg[0])) return false; + + int fd, fd_real; + fd = open(arg, O_DIRECTORY | O_RDONLY); + // here is relatively small window for race/TOCTOU + fd_real = open(arg, O_PATH | O_NOFOLLOW | O_RDONLY); + + if (fd < 0) { + if (fd_real >= 0) (void) close(fd_real); + return false; + } + + (void) memset(rootspec, 0, sizeof(ovl_dirspec_t)); + + do { + struct stat st; + (void) memset(&st, 0, sizeof(st)); + if (fstat(fd, &st) != 0) break; + + if (!S_ISDIR(st.st_mode)) break; + + rootspec->dev = st.st_dev; + rootspec->ino = st.st_ino; + + rootspec->fd = fd; + rootspec->root.len = procfs_fd2name((fd_real < 0) ? fd : fd_real, rootspec->root.str, ovl_path_max); + } while (false); + + if ((fd_real >= 0) && (fd_real != fd)) { + (void) close(fd_real); + } + + // basic sanity checks: + // - path length is non-zero + // - either dev_t or ino_t are non-zero + if ((!rootspec->root.len) || ((!rootspec->dev) && (!rootspec->ino))) { + // not needed anymore + (void) close(fd); + + (void) memset(rootspec, 0, sizeof(ovl_dirspec_t)); + return false; + } + + // try to adjust/shorten path against current working directory + do { + bool relative = !!(flags & ovl_rootspec_flags::relative); + if (!relative) break; + + ovl_path_t cwd; + (void) memset(&cwd, 0, sizeof(cwd)); + + if (!getcwd(cwd.str, ovl_path_max)) break; + cwd.len = strnlen(cwd.str, ovl_path_max); + if ((!cwd.len) || (cwd.len == ovl_path_max)) break; + + // cwd is root - nothing to do here + if ((cwd.len == 1) && (cwd.str[0] == '/')) break; + + // ensure that rootpath has prefix of cwd + "/" + if (cwd.len >= rootspec->root.len) break; + if (strncmp(rootspec->root.str, cwd.str, cwd.len) != 0) break; + if (rootspec->root.str[cwd.len] != '/') break; + + // "cwd" isn't needed anymore so reuse it as temporary container + (void) memset(cwd.str, 0, cwd.len); + size_t prefix_len = cwd.len + 1; + cwd.len = rootspec->root.len - prefix_len; + (void) memcpy(cwd.str, &rootspec->root.str[prefix_len], cwd.len); + // finally replace with adjusted value + (void) memcpy(&rootspec->root, &cwd, sizeof(ovl_path_t)); + } while (false); + + return true; +} + +bool is_empty_dir(int dirfd) +{ + // internal method, parameters are already checked + /* if (dirfd < 0) return false; */ + + // reset handle to beginning + (void) lseek(dirfd, 0, SEEK_SET); + + int fd_dir = dup(dirfd); + if (fd_dir < 0) return false; + + DIR * p_dir = fdopendir(fd_dir); + if (!p_dir) { + (void) close(fd_dir); + return false; + } + + // read current directory entries + bool empty = true; + for (struct dirent * dent = nullptr; (dent = readdir(p_dir)) != nullptr; ) { + if (!filter_out_dots(dent->d_name)) continue; + empty = false; + break; + } + + // rewind directory + rewinddir(p_dir); + (void) lseek(fd_dir, 0, SEEK_SET); + + // close handles + (void) closedir(p_dir); + // handle was reowned [by libc] via fdopendir() but why not to try to close it [again] + (void) close(fd_dir); + + return empty; +} + +class overlay_processor { + sort_mode _sort; + unsigned int _depth; + ovl_dirspec_vec _roots; + + // we should report problems when they arise but there's common problem: + // name is relative to almost unknown path and we don't know the full path. + // furthermore we won't track path due to performance reasons. + ovl_entry_vec _impl(const dirspec_vec & paths, unsigned int depth) { + ovl_entry_vec rv = ovl_entry_vec(); + + if (!depth) return rv; + + int n_usable_paths = 0; + for (const auto & p : paths) { + if (p.fd < 0) continue; + n_usable_paths++; + } + if (!n_usable_paths) return rv; + + bool multiple_layers = n_usable_paths > 1; + + auto seen_paths = dev_ino_set(); + auto dirs = std::list(); + auto nondirs = std::list(); + + for (size_t i = 0; i < paths.size(); i++) { + const auto & p = paths[i]; + if (p.fd < 0) continue; + + if (multiple_layers) { + auto needle = std::make_tuple(p.dev, p.ino); + if (set_contains(seen_paths, needle)) { + continue; + } + (void) seen_paths.emplace(needle); + } + + // reset handle to beginning + (void) lseek(p.fd, 0, SEEK_SET); + + int fd_dir = dup(p.fd); + if (fd_dir < 0) { + (void) fprintf(stderr, "overlaydirs: dup(%d): %s\n", p.fd, strerror(errno)); + continue; + } + + DIR * p_dir = fdopendir(fd_dir); + if (!p_dir) { + (void) fprintf(stderr, "overlaydirs: fdopendir(%d): %s\n", fd_dir, strerror(errno)); + (void) close(fd_dir); + continue; + } + + // read current directory entries + auto ent_all = std::vector(); + for (struct dirent * dent = nullptr; (dent = readdir(p_dir)) != nullptr; ) { + if (!filter_out_dots(dent->d_name)) continue; + if (!handle_file_type(dent->d_type)) continue; + + x_entry_t t; + (void) memset(&t, 0, sizeof(t)); + + // 1. only basic info is filled - name and type + // 2. directories and symlinks are handled later + // 3. dev_t and ino_t are relevant only for directories + + t.name.len = strnlen(dent->d_name, sizeof(dirent::d_name)); + if (t.name.len == sizeof(dirent::d_name)) { + // TODO: report warning about too long name + // (see function' comment) + continue; + } + + t.type = dent->d_type; + (void) memcpy(t.name.str, dent->d_name, t.name.len); + + ent_all.push_back(t); + } + + // rewind directory + rewinddir(p_dir); + (void) lseek(fd_dir, 0, SEEK_SET); + + // close handles + (void) closedir(p_dir); /* p_dir = nullptr; */ + // handle was reowned [by libc] via fdopendir() but why not to try to close it [again] + (void) close(fd_dir); /* fd_dir = -1; */ + + // process entries + auto hash_skip = std::set(); + auto ent_dirs = std::list(); + auto ent_nondirs = std::list(); + for (auto & x : ent_all) { + bool is_skip = false; + do { + if (x.name.len < 2) break; + /* if (strncmp(&x.name.name[x.name.len - 2], ".-", 2) != 0) break; */ + if (x.name.str[x.name.len - 2] != '.') break; + if (x.name.str[x.name.len - 1] != '-') break; + + // current entry is "skip" entry + is_skip = true; + } while (false); + if (is_skip) { + // skip file is found but we're not using them + if (!multiple_layers) continue; + + // don't use "bare skip" entries (e.g. named exactly ".-") + if (x.name.len == 2) continue; + + (void) hash_skip.emplace(xxhash(x.name.str, x.name.len - 2)); + continue; + } + + if ((x.type == DT_DIR) || (x.type == DT_LNK)) { + struct stat st; + (void) memset(&st, 0, sizeof(st)); + + if (fstatat(p.fd, x.name.str, &st, 0) != 0) { + // TODO: report warning about unavailable entry + // (see function' comment) + continue; + } + + // handle (new) entry type + typeof(x_entry_t::type) new_type = IFTODT(st.st_mode); + if ((x.type == DT_DIR) && (new_type != DT_DIR)) { + // TODO: report warning about inconsistent entry type + // (see function' comment) + // PS: this is probably a bug + continue; + } + + // skip dangling symlinks along with unknown file types + if (!handle_file_type(new_type, true)) { + // TODO: report warning about unapplicable entry type + // (see function' comment) + continue; + } + + // adjust entry type + x.type = new_type; + + if (x.type == DT_DIR) { + if (depth > 1) { + // we should track precise dev_t and ino_t for directories + x.dev = st.st_dev; + x.ino = st.st_ino; + } + else { + // end-of-depth directories are handled as regular files + x.type = DT_REG; + } + } + } + + x.root_id = i; + x.name_hash = xxhash(x.name.str, x.name.len); + + if (x.type == DT_DIR) { + ent_dirs.push_back(x); + } else { + ent_nondirs.push_back(x); + } + } + // not needed anymore + ent_all.clear(); + ent_all.shrink_to_fit(); + + // remove "skip" entries + if (!hash_skip.empty()) { + nondirs.remove_if( + [&hash_skip](const x_entry_t & x) -> bool { + return set_contains(hash_skip, x.name_hash); + } + ); + + dirs.remove_if( + [&hash_skip](const x_entry_t & x) -> bool { + return set_contains(hash_skip, x.name_hash); + } + ); + + // not needed anymore + hash_skip.clear(); + } + + // "hash_skip" is now empty and doesn't contain exactly "skip" entries + // but is going to be reused further + + // override all entries with current "non-dir" entries + if (!ent_nondirs.empty()) { + for (const auto & x : ent_nondirs) { + (void) hash_skip.emplace(x.name_hash); + } + + nondirs.remove_if( + [&hash_skip](const x_entry_t & x) -> bool { + return set_contains(hash_skip, x.name_hash); + } + ); + + dirs.remove_if( + [&hash_skip](const x_entry_t & x) -> bool { + return set_contains(hash_skip, x.name_hash); + } + ); + + // not needed anymore + hash_skip.clear(); + } + + // override "non-dir" entries with "dir" entries + if (!ent_dirs.empty()) { + for (const auto & x : ent_dirs) { + (void) hash_skip.emplace(x.name_hash); + } + + nondirs.remove_if( + [&hash_skip](const x_entry_t & x) -> bool { + return set_contains(hash_skip, x.name_hash); + } + ); + + // not needed anymore + hash_skip.clear(); + } + + // merge current entries with accumulated "all" entries + for (const auto & x : ent_nondirs) { + nondirs.push_back(x); + } + ent_nondirs.clear(); + for (const auto & x : ent_dirs) { + dirs.push_back(x); + } + ent_dirs.clear(); + } + + // sort "dir" entries + if (_sort != sort_mode::none) { + dirs.sort( + [this](const x_entry_t & a, const x_entry_t & b) -> bool { + if (a.name_hash == b.name_hash) return a.root_id < b.root_id; + + return custom_sort(a.name.str, b.name.str, _sort) < 0; + } + ); + } + + // merge entries + auto ent_all = std::list(); + for (const auto & x : dirs) { + ent_all.push_back(x); + } + for (const auto & x : nondirs) { + ent_all.push_back(x); + } + // not needed anymore + nondirs.clear(); + + // sort entries + if (_sort != sort_mode::none) { + ent_all.sort( + [this](const x_entry_t & a, const x_entry_t & b) -> bool { + // in case of directory entries + if (a.name_hash == b.name_hash) return a.root_id < b.root_id; + + return custom_sort(a.name.str, b.name.str, _sort) < 0; + } + ); + } + + auto seen_dirs = std::set(); + for (const auto & x : ent_all) { + if (x.root_id < 0) continue; + + ovl_entry_kind etype = dirent_to_entry_kind(x.type); + if (etype == ovl_entry_kind::unknown) continue; + if (etype == ovl_entry_kind::directory) { + // skip if directory with the same name is already handled + if (set_contains(seen_dirs, x.name_hash)) continue; + (void) seen_dirs.emplace(x.name_hash); + } + + ovl_entry_t t; + t.type = etype; + t.root_id = x.root_id; + t.name_hash = x.name_hash; + t.name.len = x.name.len; + (void) memcpy(t.name.str, x.name.str, t.name.len); + + if (t.type == ovl_entry_kind::directory) { + // root_id is meaningless for directories + t.root_id = 0; + + auto child_paths = dirspec_vec(); + for (size_t i = 0; i < paths.size(); i++) { + child_paths.push_back(x_dirspec_t{ .dev = 0, .ino = 0, .fd = -1 }); + } + + for (const auto & x : dirs) { + if (x.name_hash != t.name_hash) continue; + + int fd = openat(paths[x.root_id].fd, t.name.str, O_DIRECTORY | O_RDONLY); + if (fd < 0) { + // TODO: report warning about unsuccessful openat() + // (see function' comment) + continue; + } + + child_paths[x.root_id].dev = x.dev; + child_paths[x.root_id].ino = x.ino; + child_paths[x.root_id].fd = fd; + } + + t.children = _impl(child_paths, depth - 1); + + for (const auto & x : child_paths) { + if (x.fd < 0) continue; + (void) close(x.fd); + } + child_paths.clear(); + child_paths.shrink_to_fit(); + } + + rv.push_back(t); + } + // not needed anymore + dirs.clear(); + + return rv; + } + +public: + overlay_processor(const ovl_dirspec_vec & roots, unsigned int depth, sort_mode sort) { + _roots = ovl_dirspec_vec(); + for (const auto & r : roots) { + _roots.push_back(r); + } + _sort = sort; + _depth = (depth > ovl_depth_max) ? ovl_depth_max : depth; + } + + ovl_result run(void) { + auto rv = ovl_result(); + + auto paths = dirspec_vec(); + for (const auto & r : _roots) { + rv.roots.push_back(r.root); + paths.push_back(x_dirspec_t{ .dev = r.dev, .ino = r.ino, .fd = r.fd }); + } + rv.entries = _impl(paths, _depth); + return rv; + } +}; + +ovl_result process_overlay(ovl_dirspec_vec & roots, sort_mode sort, unsigned int depth) +{ + auto rv = ovl_result(); + if (!depth) return rv; + if (roots.empty()) return rv; + + rv = overlay_processor(roots, depth, sort).run(); + + // close all open file descriptors + for (auto & r : roots) { + (void) close(r.fd); + r.fd = -1; + } + + return rv; +} + +class overlay_printer { + print_mode _print; + ovl_result _ovl; + + void _impl(const ovl_entry_vec & entries, const ovl_path_t * subpath = nullptr) { + for (const auto & e : entries) { + switch (e.type) { + case ovl_entry_kind::file: + print_path(_print, &(_ovl.roots[e.root_id]), subpath, &(e.name)); + break; + + case ovl_entry_kind::directory: + // NOT printing directory name + + ovl_path_t n_subpath; + (void) memset(&n_subpath, 0, sizeof(n_subpath)); + if (subpath) { + n_subpath.len = subpath->len + 1 + e.name.len; + if (n_subpath.len >= ovl_path_max) { + (void) fprintf(stderr, "subpath too long: /%s/%s/\n", subpath->str, e.name.str); + continue; + } + + /* (void) snprintf(n_subpath.str, ovl_path_max, "%s/%s", subpath->str, e.name.str); */ + size_t off = 0; + (void) memcpy(n_subpath.str, subpath->str, subpath->len); + off = subpath->len; + n_subpath.str[off++] = '/'; + (void) memcpy(n_subpath.str + off, e.name.str, e.name.len); + } else { + // ovl_name_max is definitely smaller than ovl_path_max + + n_subpath.len = e.name.len; + (void) memcpy(n_subpath.str, e.name.str, e.name.len); + } + + _impl(e.children, &n_subpath); + break; + + default: + break; + } + } + } + +public: + overlay_printer(const ovl_result & ovl, print_mode print) { + _ovl = ovl; + _print = print; + } + + void run(void) { + _impl(_ovl.entries); + } +}; + +void list_overlay(const ovl_result & overlay, print_mode print) +{ + overlay_printer(overlay, print).run(); +} + +class overlay_merger { + ovl_result _ovl; + ovl_dirspec_t _target; + + int _impl(const ovl_entry_vec & entries, int dirfd, const ovl_path_t * subpath = nullptr) { + int rv = 0; + + int new_fd; + ovl_path_t t; + for (const auto & e : entries) { + switch (e.type) { + case ovl_entry_kind::file: + (void) memset(&t, 0, sizeof(t)); + + if (subpath) { + t.len = _ovl.roots[e.root_id].len + 1 + subpath->len + 1 + e.name.len; + if (t.len >= ovl_path_max) { + (void) fprintf(stderr, "name too long: %s/%s/%s\n", _ovl.roots[e.root_id].str, subpath->str, e.name.str); + return ENAMETOOLONG; + } + + /* (void) snprintf(t.str, ovl_path_max, "%s/%s/%s\n", _ovl.roots[e.root_id].str, subpath->str, e.name.str); */ + size_t off = 0; + (void) memcpy(t.str, _ovl.roots[e.root_id].str, _ovl.roots[e.root_id].len); + off = _ovl.roots[e.root_id].len; + t.str[off++] = '/'; + (void) memcpy(t.str + off, subpath->str, subpath->len); + off += subpath->len; + t.str[off++] = '/'; + (void) memcpy(t.str + off, e.name.str, e.name.len); + } else { + t.len = _ovl.roots[e.root_id].len + 1 + e.name.len; + if (t.len >= ovl_path_max) { + (void) fprintf(stderr, "name too long: %s/%s\n", _ovl.roots[e.root_id].str, e.name.str); + return ENAMETOOLONG; + } + + /* (void) snprintf(t.str, ovl_path_max, "%s/%s\n", _ovl.roots[e.root_id].str, e.name.str); */ + size_t off = 0; + (void) memcpy(t.str, _ovl.roots[e.root_id].str, _ovl.roots[e.root_id].len); + off = _ovl.roots[e.root_id].len; + t.str[off++] = '/'; + (void) memcpy(t.str + off, e.name.str, e.name.len); + } + + if (symlinkat(t.str, dirfd, e.name.str) < 0) { + rv = errno; + if (subpath) { + (void) fprintf(stderr, "symlinkat() error: \"%s\" with %s/%s/%s -> %s\n", strerror(rv), _target.root.str, subpath->str, e.name.str, t.str); + } else { + (void) fprintf(stderr, "symlinkat() error: \"%s\" with %s/%s -> %s\n", strerror(rv), _target.root.str, e.name.str, t.str); + } + return rv; + } + break; + + case ovl_entry_kind::directory: + (void) memset(&t, 0, sizeof(t)); + + if (subpath) { + t.len = subpath->len + 1 /* "/" */ + e.name.len; + if (t.len >= ovl_path_max) { + (void) fprintf(stderr, "subpath too long: /%s/%s/\n", subpath->str, e.name.str); + return ENAMETOOLONG; + } + + /* (void) snprintf(t.str, ovl_path_max, "%s/%s", subpath->str, e.name.str); */ + (void) memcpy(t.str, subpath->str, subpath->len); + t.str[subpath->len] = '/'; + (void) memcpy(t.str + subpath->len + 1, e.name.str, e.name.len); + } else { + // ovl_name_max is definitely smaller than ovl_path_max + + t.len = e.name.len; + (void) memcpy(t.str, e.name.str, e.name.len); + } + + if (mkdirat(dirfd, e.name.str, 0755) < 0) { + rv = errno; + if (subpath) { + (void) fprintf(stderr, "mkdirat() error: \"%s\" with path %s/%s/%s\n", strerror(rv), _target.root.str, subpath->str, e.name.str); + } else { + (void) fprintf(stderr, "mkdirat() error: \"%s\" with path %s/%s\n", strerror(rv), _target.root.str, e.name.str); + } + return rv; + } + + new_fd = openat(dirfd, e.name.str, O_DIRECTORY | O_RDONLY); + if (new_fd < 0) { + rv = errno; + if (subpath) { + (void) fprintf(stderr, "openat() error: \"%s\" with path %s/%s/%s\n", strerror(rv), _target.root.str, subpath->str, e.name.str); + } else { + (void) fprintf(stderr, "openat() error: \"%s\" with path %s/%s\n", strerror(rv), _target.root.str, e.name.str); + } + return rv; + } + + rv = _impl(e.children, new_fd, &t); + (void) close(new_fd); + + if (rv != 0) return rv; + + break; + + default: + break; + } + } + + return rv; + } + +public: + overlay_merger(const ovl_result & ovl, const ovl_dirspec_t & target) { + _ovl = ovl; + _target = target; + } + + int run(void) { + return _impl(_ovl.entries, _target.fd); + } +}; + +int merge_overlay(const ovl_result & overlay, const ovl_dirspec_t & target) +{ + if (!is_empty_dir(target.fd)) return -1; + + return overlay_merger(overlay, target).run(); +} + +static +size_t procfs_fd2name(int fd, char * buffer, size_t size) +{ + // internal method, parameters are already checked + /* if ((fd < 0) || (!buffer) || (!size)) return 0; */ + + // "/proc/self/fd/" - 14, "%d" - up to 10 + char procfs_link[32]; + (void) memset(procfs_link, 0, sizeof(procfs_link)); + (void) snprintf(procfs_link, sizeof(procfs_link) - 1, "/proc/self/fd/%d", fd); + + (void) memset(buffer, 0, size); + ssize_t x = readlink(procfs_link, buffer, size - 1); + x = (x > 0) ? x : 0; + // not really needed + buffer[x] = 0; + + // already clamped + return (size_t) x; +} + +#ifndef HAVE_CPP_SET_CONTAINS + #ifdef __cplusplus + #if __cplusplus >= 202002L + #define HAVE_CPP_SET_CONTAINS 1 + #endif + #endif +#endif + +#ifndef HAVE_CPP_SET_CONTAINS + #define HAVE_CPP_SET_CONTAINS 0 +#endif + +template +static inline +bool set_contains(const std::set & set, const T & needle) +{ +#if HAVE_CPP_SET_CONTAINS + return set.contains(needle); +#else + return (set.find(needle) != set.cend()); +#endif +} + +static inline +bool filter_out_dots(const char * name) +{ + /* + if (strcmp(name, ".") == 0) return false; + if (strcmp(name, "..") == 0) return false; + return true; + */ + + if (name[0] != '.') return true; + switch (name[1]) { + case 0: return false; + case '.': return (name[2] != 0); + } + return true; +} + +static inline +bool handle_file_type(int type, bool allow_symlinks) +{ + switch (type) { + // common file types + case DT_REG: /* -fallthrough */ + case DT_DIR: /* -fallthrough */ + + // less common types (but also allowed) + case DT_BLK: /* -fallthrough */ + case DT_CHR: /* -fallthrough */ + case DT_FIFO: /* -fallthrough */ + case DT_SOCK: + return true; + + // symlinks should be handled separately! + case DT_LNK: + return !allow_symlinks; + + default: + return false; + } +} + +static inline +ovl_entry_kind dirent_to_entry_kind(typeof(dirent::d_type) d_type) { + switch (d_type) { + case DT_DIR: + return ovl_entry_kind::directory; + + case DT_REG: /* -fallthrough */ + case DT_BLK: /* -fallthrough */ + case DT_CHR: /* -fallthrough */ + case DT_FIFO: /* -fallthrough */ + case DT_SOCK: + return ovl_entry_kind::file; + + // symlinks should be already handled! + + } + return ovl_entry_kind::unknown; +} diff --git a/overlay.hh b/overlay.hh new file mode 100644 index 0000000..103160f --- /dev/null +++ b/overlay.hh @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_OVERLAY_HH +#define INCLUDE_OVERLAY_HH 1 + +#include + +extern "C" { + #include +} + +#include "overlay-common.hh" + +#include "print.hh" +#include "sort.hh" +#include "xxhash.hh" + +struct ovl_dirspec_t { + dev_t dev; + ino_t ino; + int fd; + ovl_path_t root; +}; +typedef std::vector ovl_dirspec_vec; + +enum ovl_rootspec_flags : uint32_t { + none = 0, + + // relative path (if possible) + relative = (1 << 0), +}; + +// forward declaration +struct ovl_entry_t; +typedef std::vector ovl_entry_vec; + +enum struct ovl_entry_kind : uint32_t { + unknown = 0, + + // entry may have it's own entries + directory, + + // entry may NOT have it's own entries + // this applies to: + // - regular files + // - directories at maximum depth (!) + // - block and character devices + // - pipes (FIFO) + // - sockets + file, +}; + +struct ovl_entry_t { + ovl_entry_kind type; + int32_t root_id; + xxhash_t name_hash; + ovl_entry_vec children; + ovl_name_t name; + + ovl_entry_t() { + root_id = -1; + type = ovl_entry_kind::unknown; + name_hash = 0; + children = ovl_entry_vec(); + (void) memset(&name, 0, sizeof(name)); + } +}; + +struct ovl_result { + ovl_path_vec roots; + ovl_entry_vec entries; + + ovl_result() { + roots = ovl_path_vec(); + entries = ovl_entry_vec(); + } +}; + +bool arg_to_rootspec(const char * arg, ovl_dirspec_t * path, uint32_t flags = ovl_rootspec_flags::none); + +bool is_empty_dir(int dirfd); + +ovl_result process_overlay(ovl_dirspec_vec & roots, sort_mode sort = sort_mode::none, unsigned int depth = ovl_depth_max); + +void list_overlay(const ovl_result & overlay, print_mode print = print_mode::normal); +int merge_overlay(const ovl_result & overlay, const ovl_dirspec_t & target); + +#endif /* INCLUDE_OVERLAY_HH */ diff --git a/print.cc b/print.cc new file mode 100644 index 0000000..75a136a --- /dev/null +++ b/print.cc @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#include "print.hh" + +#include +#include + +extern "C" { + #include +} + +static +void print_simple(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name); + +// TODO: shell-escape mode +/* +static +void print_escape(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name); +*/ + +void print_path(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name) +{ + // "root" and "name" are mandatory + /* if ((!root) || (!name)) return; */ + + switch (mode) { + case print_mode::normal: /* -fallthrough */ + case print_mode::zero: + print_simple(mode, root, subpath, name); + return; + // TODO: shell-escape mode + /* + case print_mode::shell_escape: + print_escape(mode, root, subpath, name); + return; + */ + } +} + +static +void print_simple(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name) +{ + // internal method, parameters are already checked + /* if ((!root) || (!name)) return 0; */ + + char eol = (mode == print_mode::zero) ? 0 : '\n'; + if (subpath) { + (void) fprintf(stdout, "%s/%s/%s%c", root->str, subpath->str, name->str, eol); + } else { + (void) fprintf(stdout, "%s/%s%c", root->str, name->str, eol); + } +} + +// TODO: shell-escape mode +/* +static +void print_escape(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name) +{ +} +*/ diff --git a/print.hh b/print.hh new file mode 100644 index 0000000..b985684 --- /dev/null +++ b/print.hh @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_LIST_MODE_HH +#define INCLUDE_LIST_MODE_HH 1 + +#include "overlay-common.hh" + +enum struct print_mode : unsigned int { + normal = 0, + zero, + + // TODO + /* shell_escape, */ + + _default = normal, +}; + +void print_path(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name); + +#endif /* INCLUDE_LIST_MODE_HH */ diff --git a/sort.cc b/sort.cc new file mode 100644 index 0000000..f3c1865 --- /dev/null +++ b/sort.cc @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#include "sort.hh" + +#include + +#include "coreutils-sort.hh" + +int custom_sort(const char * a, const char * b, sort_mode mode) +{ + switch (mode) { + case sort_mode::glibc_version_sort: + return strverscmp(a, b); + case sort_mode::coreutils_version_sort: + return coreutils_version_sort(a, b); + + // not a actual version sort + + /* + case sort_mode::simple_sort: + return strcmp(a, b); + */ + + default: + return strcmp(a, b); + } +} diff --git a/sort.hh b/sort.hh new file mode 100644 index 0000000..1b7d665 --- /dev/null +++ b/sort.hh @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_SORT_MODE_HH +#define INCLUDE_SORT_MODE_HH 1 + +enum struct sort_mode : unsigned int { + // don't sort entries at all + none = 0, + + // sort entries using version sort from coreutils + coreutils_version_sort, + + // sort entries using version sort from glibc + glibc_version_sort, + + // simpliest sort via strcmp() + simple_sort, + + _default = coreutils_version_sort, +}; + +int custom_sort(const char * a, const char * b, sort_mode mode); + +#endif /* INCLUDE_SORT_MODE_HH */ diff --git a/xxhash.cc b/xxhash.cc new file mode 100644 index 0000000..e69f39f --- /dev/null +++ b/xxhash.cc @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#define XXH_IMPLEMENTATION + +#include "xxhash.hh" + +xxhash_t xxhash(const void * input, size_t len, xxhash_t seed) +{ +#ifndef XXH_NO_LONG_LONG + XXH64_state_t st; + XXH64_reset(&st, seed); + XXH64_update(&st, input, len); + return XXH64_digest(&st); +#else + XXH32_state_t st; + XXH32_reset(&st, seed); + XXH32_update(&st, input, len); + return XXH32_digest(&st); +#endif +} diff --git a/xxhash.hh b/xxhash.hh new file mode 100644 index 0000000..b0c6f3c --- /dev/null +++ b/xxhash.hh @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: Apache-2.0 + * (c) 2025, Konstantin Demin + */ + +#ifndef INCLUDE_XXHASH_HH +#define INCLUDE_XXHASH_HH 1 + +#include +#if LONG_MAX == INT_MAX + #define XXH_NO_LONG_LONG +#endif + +// #define XXH_NO_EXTERNC_GUARD + +#define XXH_NO_STDLIB +#define XXH_STATIC_LINKING_ONLY + +#define XXH_NO_XXH3 + +#define XXH_NO_INLINE_HINTS 1 +#define XXH_SIZE_OPT 2 + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define XXH_CPU_LITTLE_ENDIAN 1 + #endif +#endif + +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + #define XXH_CPU_LITTLE_ENDIAN 0 + #endif +#endif + +#ifdef USE_BUNDLED_XXHASH + #include "xxhash.h" +#else /* !USE_BUNDLED_XXHASH */ + #include +#endif /* USE_BUNDLED_XXHASH */ + +#ifndef XXH_NO_LONG_LONG + #define xxhash_t XXH64_hash_t +#else + #define xxhash_t XXH32_hash_t +#endif + +xxhash_t xxhash(const void * input, size_t len, xxhash_t seed = 0); + +#endif /* INCLUDE_XXHASH_HH */