1
0

initial commit

This commit is contained in:
2025-05-27 11:36:06 +03:00
commit 4ba42acfea
14 changed files with 1584 additions and 0 deletions

8
.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
## local copy
xxhash.h
## build objects
*.o
## build artefact
overlaydirs

40
Makefile Normal file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/make -f
SHELL :=/bin/sh
.SHELLFLAGS :=-efc
MAKEFLAGS +=--no-print-directory
SRC = main.cc coreutils-sort.cc overlay.cc print.cc sort.cc xxhash.cc
CROSS ?=
CC =$(CROSS)gcc
CXX =$(CROSS)g++
STRIP =$(CROSS)strip
CFLAGS_LTO ?=-flto=2 -fuse-linker-plugin -ffat-lto-objects -flto-partition=none
CFLAGS_COMMON ?=-O2 -g -fPIE -fstack-protector-strong
CFLAGS ?=$(CFLAGS_COMMON) $(CFLAGS_LTO)
CPPFLAGS ?=-Wall -Wextra -Werror=format-security -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
CXXFLAGS ?=$(CFLAGS_COMMON) $(CFLAGS_LTO) -std=gnu++20
LDFLAGS ?=-Wl,-z,relro -Wl,-z,now -pie
NO_WARN = attributes class-memaccess unused-function unused-result
CPPFLAGS += $(foreach w,$(NO_WARN),-Wno-$(w))
NO_CXX = rtti exceptions
CXXFLAGS +=$(foreach f,$(NO_CXX),-fno-$(f))
OBJ = $(SRC:.cc=.cc.o)
.DEFAULT: all
.PHONY: all build clean
all build: overlaydirs
%.cc.o: %.cc
$(CXX) -c $(CXXFLAGS) $(CPPFLAGS) -o $@ $^
overlaydirs: $(OBJ)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ \
$(if $(filter $(RELMODE),1),&& $(STRIP) --strip-debug --strip-unneeded $@)
clean:
rm -f overlaydirs $(OBJ)

163
coreutils-sort.cc Normal file
View File

@@ -0,0 +1,163 @@
/* SPDX-License-Identifier: GPL-3.0-or-later
Origin: coreutils-9.7/lib/filevercmp.c
Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk>
Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au>
Copyright (C) 2008-2025 Free Software Foundation, Inc.
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <cctype>
#include <climits>
#include <cstddef>
#include <cstdlib>
#include "coreutils-sort.hh"
/* Return the length of a prefix of S that corresponds to the suffix
defined by this extended regular expression in the C locale:
(\.[A-Za-z~][A-Za-z0-9~]*)*$
Use the longest suffix matching this regular expression,
except do not use all of S as a suffix if S is nonempty.
If *LEN is -1, S is a string; set *LEN to S's length.
Otherwise, *LEN should be nonnegative, S is a char array,
and *LEN does not change. */
static
ptrdiff_t file_prefixlen(char const * s, ptrdiff_t * len)
{
size_t n = *len; /* SIZE_MAX if N == -1. */
ptrdiff_t prefixlen = 0;
for (ptrdiff_t i = 0; ; ) {
if ((*len < 0) ? !s[i] : (i == n)) {
*len = i;
return prefixlen;
}
i++;
prefixlen = i;
while ((i + 1 < n) && (s[i] == '.') && (isalpha(s[i + 1]) || (s[i + 1] == '~'))) {
for (i += 2; (i < n) && (isalnum(s[i]) || (s[i] == '~')); i++) {
continue;
}
}
}
}
/* Return a version sort comparison value for S's byte at position POS.
S has length LEN. If POS == LEN, sort before all non-'~' bytes. */
static
int order(char const * s, ptrdiff_t pos, ptrdiff_t len)
{
if (pos == len) return -1;
unsigned char c = s[pos];
if (isdigit(c)) return 0;
else if (isalpha(c)) return c;
else if (c == '~') return -2;
else
{
static_assert (UCHAR_MAX <= (INT_MAX - 1 - 2) / 2);
return c + UCHAR_MAX + 1;
}
}
/* slightly modified verrevcmp function from dpkg
S1, S2 - compared char array
S1_LEN, S2_LEN - length of arrays to be scanned
This implements the algorithm for comparison of version strings
specified by Debian and now widely adopted. The detailed
specification can be found in the Debian Policy Manual in the
section on the 'Version' control field. This version of the code
implements that from s5.6.12 of Debian Policy v3.8.0.1
https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */
static
int verrevcmp(const char * s1, ptrdiff_t s1_len, const char * s2, ptrdiff_t s2_len)
{
ptrdiff_t s1_pos = 0;
ptrdiff_t s2_pos = 0;
while (s1_pos < s1_len || s2_pos < s2_len) {
int first_diff = 0;
while (((s1_pos < s1_len) && !isdigit(s1[s1_pos])) || ((s2_pos < s2_len) && !isdigit(s2[s2_pos]))) {
int s1_c = order(s1, s1_pos, s1_len);
int s2_c = order(s2, s2_pos, s2_len);
if (s1_c != s2_c) return s1_c - s2_c;
s1_pos++;
s2_pos++;
}
while ((s1_pos < s1_len) && (s1[s1_pos] == '0')) {
s1_pos++;
}
while ((s2_pos < s2_len) && (s2[s2_pos] == '0')) {
s2_pos++;
}
while ((s1_pos < s1_len) && (s2_pos < s2_len) && isdigit(s1[s1_pos]) && isdigit(s2[s2_pos])) {
if (!first_diff) first_diff = s1[s1_pos] - s2[s2_pos];
s1_pos++;
s2_pos++;
}
if ((s1_pos < s1_len) && isdigit(s1[s1_pos])) return 1;
if ((s2_pos < s2_len) && isdigit(s2[s2_pos])) return -1;
if (first_diff) return first_diff;
}
return 0;
}
static
int filenvercmp (char const * a, ptrdiff_t alen, char const * b, ptrdiff_t blen)
{
/* Special case for empty versions. */
bool aempty = (alen < 0) ? !a[0] : !alen;
bool bempty = (blen < 0) ? !b[0] : !blen;
if (aempty) return -!bempty;
if (bempty) return 1;
/* Special cases for leading ".": "." sorts first, then "..", then
other names with leading ".", then other names. */
if (a[0] == '.') {
if (b[0] != '.') return -1;
bool adot = alen < 0 ? !a[1] : alen == 1;
bool bdot = blen < 0 ? !b[1] : blen == 1;
if (adot) return -!bdot;
if (bdot) return 1;
bool adotdot = (a[1] == '.') && ((alen < 0) ? !a[2] : (alen == 2));
bool bdotdot = (b[1] == '.') && ((blen < 0) ? !b[2] : (blen == 2));
if (adotdot) return -!bdotdot;
if (bdotdot) return 1;
}
else if (b[0] == '.') {
return 1;
}
/* Cut file suffixes. */
ptrdiff_t aprefixlen = file_prefixlen(a, &alen);
ptrdiff_t bprefixlen = file_prefixlen(b, &blen);
/* If both suffixes are empty, a second pass would return the same thing. */
bool one_pass_only = (aprefixlen == alen) && (bprefixlen == blen);
int result = verrevcmp(a, aprefixlen, b, bprefixlen);
/* Return the initial result if nonzero, or if no second pass is needed.
Otherwise, restore the suffixes and try again. */
return (result || one_pass_only) ? result : verrevcmp(a, alen, b, blen);
}
static inline
int filevercmp(const char * s1, const char * s2)
{
return filenvercmp(s1, -1, s2, -1);
}
int coreutils_version_sort(char const * a, char const * b)
{
return filevercmp(a, b);
}

10
coreutils-sort.hh Normal file
View File

@@ -0,0 +1,10 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_COREUTILS_SORT_HH
#define INCLUDE_COREUTILS_SORT_HH 1
int coreutils_version_sort(char const * a, char const * b);
#endif /* INCLUDE_COREUTILS_SORT_HH */

174
main.cc Normal file
View File

@@ -0,0 +1,174 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <cerrno>
#include <cstdio>
#include <cstdlib>
extern "C" {
#include <unistd.h>
}
#include "overlay.hh"
static
void usage(int retcode = 0)
{
static const char usage_msg[] =
"overlaydirs 0.0.1\n"
"Usage:\n"
" overlaydirs --help\n"
" show this message\n"
" overlaydirs --list [--no-sort|--zero] <source directory> [..<source directory>]\n"
// TODO: shell-escape mode
/* " overlaydirs --list [--no-sort|--zero|--escape] <source directory> [..<source directory>]\n" */
" list entries\n"
" overlaydirs --merge <target directory> <source directory> [..<source directory>]\n"
" symlinks entries into <target directory>\n"
"\n"
" --no-sort - don't sort entries\n"
" --zero - separate entries with NUL instead of LF\n"
// TODO: shell-escape mode
/*
" --escape - shell-escape strings\n"
"\n"
"Notes:\n"
" - flags \"--zero\" and \"--escape\" are mutually exclusive.\n"
*/
;
(void) write(STDERR_FILENO, usage_msg, sizeof(usage_msg));
exit(retcode);
}
static int main_list(int argc, char * argv[]);
static int main_merge(int argc, char * argv[]);
int main(int argc, char * argv[])
{
if (argc < 2) usage(0);
if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0))
usage(0);
else if (strcmp(argv[1], "--list") == 0)
return main_list(argc, argv);
else if (strcmp(argv[1], "--merge") == 0)
return main_merge(argc, argv);
else
usage(EINVAL);
return 0;
}
static int main_list(int argc, char * argv[])
{
if (argc < 3) usage(EINVAL);
print_mode print_m = print_mode::_default;
sort_mode sort_m = sort_mode::_default;
int arg_start = 2;
while (arg_start < argc) {
char * _arg = argv[arg_start];
if (strncmp(_arg, "--", 2) != 0) break;
if (strcmp(_arg, "--") == 0) {
arg_start++;
break;
}
else if (strcmp(_arg, "--no-sort") == 0) {
arg_start++;
if (sort_m != sort_mode::_default) {
(void) fprintf(stderr, "overlaydirs: no-sort mode already set\n");
}
sort_m = sort_mode::none;
}
else if (strcmp(_arg, "--zero") == 0) {
arg_start++;
if (print_m != print_mode::_default) {
(void) fprintf(stderr, "overlaydirs: output mode already set\n");
}
print_m = print_mode::zero;
}
// TODO
/*
else if (strcmp(_arg, "--escape") == 0) {
arg_start++;
if (print_m != print_mode::_default) {
(void) fprintf(stderr, "overlaydirs: output mode already set\n");
}
print_m = print_mode::shell_escape;
}
*/
else {
(void) fprintf(stderr, "overlaydirs: unknown option \"%s\"\n", _arg);
// nevertheless, continue and try argument as a directory
break;
}
}
auto roots = std::vector<ovl_dirspec_t>();
for (int i = arg_start; i < argc; i++) {
ovl_dirspec_t t;
if (!arg_to_rootspec(argv[i], &t, ovl_rootspec_flags::relative)) continue;
roots.push_back(t);
}
if (roots.empty()) {
(void) fprintf(stderr, "overlaydirs: no usable sources found\n");
return EINVAL;
}
auto ovl = process_overlay(roots, sort_m);
// not needed anymore
roots.clear();
roots.shrink_to_fit();
list_overlay(ovl, print_m);
return 0;
}
static int main_merge(int argc, char * argv[])
{
if (argc < 4) usage(EINVAL);
ovl_dirspec_t target;
if (!arg_to_rootspec(argv[2], &target, ovl_rootspec_flags::relative)) {
(void) fprintf(stderr, "overlaydirs: not a directory: \"%s\"\n", argv[2]);
return EINVAL;
}
if (!is_empty_dir(target.fd)) {
(void) fprintf(stderr, "overlaydirs: target directory is not empty\n");
return EEXIST;
}
auto roots = std::vector<ovl_dirspec_t>();
for (int i = 3; i < argc; i++) {
ovl_dirspec_t t;
if (!arg_to_rootspec(argv[i], &t)) continue;
roots.push_back(t);
}
if (roots.empty()) {
(void) fprintf(stderr, "overlaydirs: no usable sources found\n");
return EINVAL;
}
auto ovl = process_overlay(roots);
// not needed anymore
roots.clear();
roots.shrink_to_fit();
int rv = merge_overlay(ovl, target);
if (rv == -1) {
(void) fprintf(stderr, "overlaydirs: target directory is not empty\n");
return EEXIST;
}
return rv;
}

43
overlay-common.hh Normal file
View File

@@ -0,0 +1,43 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_OVERLAY_COMMON_HH
#define INCLUDE_OVERLAY_COMMON_HH 1
#include <vector>
extern "C" {
#include <dirent.h>
}
constexpr unsigned int ovl_name_max = sizeof(dirent::d_name);
constexpr unsigned int ovl_path_max = (PATH_MAX);
/*
constexpr unsigned int ovl_path_max =
#ifdef PATH_MAX
#if (PATH_MAX > 0) && (PATH_MAX <= 8192)
(PATH_MAX);
#else
8192;
#endif
#else
4096;
#endif
*/
constexpr unsigned int ovl_path_name_ratio = ovl_path_max / ovl_name_max;
constexpr unsigned int ovl_depth_max = ovl_path_name_ratio - 1;
struct ovl_name_t {
size_t len;
char str[ovl_name_max];
};
struct ovl_path_t {
size_t len;
char str[ovl_path_max];
};
typedef std::vector<ovl_path_t> ovl_path_vec;
#endif /* INCLUDE_OVERLAY_COMMON_HH */

842
overlay.cc Normal file
View File

@@ -0,0 +1,842 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#include "overlay.hh"
#include <cerrno>
#include <cstdio>
#include <list>
#include <set>
#include <tuple>
extern "C" {
#include <dirent.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <linux/limits.h>
}
struct x_dirspec_t {
dev_t dev;
ino_t ino;
int fd;
};
typedef std::vector<x_dirspec_t> dirspec_vec;
typedef std::set<std::tuple<dev_t, ino_t>> dev_ino_set;
struct x_entry_t {
int32_t root_id;
uint32_t type;
xxhash_t name_hash;
dev_t dev;
ino_t ino;
ovl_name_t name;
};
static
size_t procfs_fd2name(int fd, char * buffer, size_t size);
template <typename T>
static inline
bool set_contains(const std::set<T> & set, const T & needle);
static inline
bool filter_out_dots(const char * name);
static inline
bool handle_file_type(int type, bool allow_symlinks = false);
static inline
ovl_entry_kind dirent_to_entry_kind(typeof(dirent::d_type) d_type);
bool arg_to_rootspec(const char * arg, ovl_dirspec_t * rootspec, uint32_t flags)
{
if ((!arg) || (!rootspec)) return false;
if (!(arg[0])) return false;
int fd, fd_real;
fd = open(arg, O_DIRECTORY | O_RDONLY);
// here is relatively small window for race/TOCTOU
fd_real = open(arg, O_PATH | O_NOFOLLOW | O_RDONLY);
if (fd < 0) {
if (fd_real >= 0) (void) close(fd_real);
return false;
}
(void) memset(rootspec, 0, sizeof(ovl_dirspec_t));
do {
struct stat st;
(void) memset(&st, 0, sizeof(st));
if (fstat(fd, &st) != 0) break;
if (!S_ISDIR(st.st_mode)) break;
rootspec->dev = st.st_dev;
rootspec->ino = st.st_ino;
rootspec->fd = fd;
rootspec->root.len = procfs_fd2name((fd_real < 0) ? fd : fd_real, rootspec->root.str, ovl_path_max);
} while (false);
if ((fd_real >= 0) && (fd_real != fd)) {
(void) close(fd_real);
}
// basic sanity checks:
// - path length is non-zero
// - either dev_t or ino_t are non-zero
if ((!rootspec->root.len) || ((!rootspec->dev) && (!rootspec->ino))) {
// not needed anymore
(void) close(fd);
(void) memset(rootspec, 0, sizeof(ovl_dirspec_t));
return false;
}
// try to adjust/shorten path against current working directory
do {
bool relative = !!(flags & ovl_rootspec_flags::relative);
if (!relative) break;
ovl_path_t cwd;
(void) memset(&cwd, 0, sizeof(cwd));
if (!getcwd(cwd.str, ovl_path_max)) break;
cwd.len = strnlen(cwd.str, ovl_path_max);
if ((!cwd.len) || (cwd.len == ovl_path_max)) break;
// cwd is root - nothing to do here
if ((cwd.len == 1) && (cwd.str[0] == '/')) break;
// ensure that rootpath has prefix of cwd + "/"
if (cwd.len >= rootspec->root.len) break;
if (strncmp(rootspec->root.str, cwd.str, cwd.len) != 0) break;
if (rootspec->root.str[cwd.len] != '/') break;
// "cwd" isn't needed anymore so reuse it as temporary container
(void) memset(cwd.str, 0, cwd.len);
size_t prefix_len = cwd.len + 1;
cwd.len = rootspec->root.len - prefix_len;
(void) memcpy(cwd.str, &rootspec->root.str[prefix_len], cwd.len);
// finally replace with adjusted value
(void) memcpy(&rootspec->root, &cwd, sizeof(ovl_path_t));
} while (false);
return true;
}
bool is_empty_dir(int dirfd)
{
// internal method, parameters are already checked
/* if (dirfd < 0) return false; */
// reset handle to beginning
(void) lseek(dirfd, 0, SEEK_SET);
int fd_dir = dup(dirfd);
if (fd_dir < 0) return false;
DIR * p_dir = fdopendir(fd_dir);
if (!p_dir) {
(void) close(fd_dir);
return false;
}
// read current directory entries
bool empty = true;
for (struct dirent * dent = nullptr; (dent = readdir(p_dir)) != nullptr; ) {
if (!filter_out_dots(dent->d_name)) continue;
empty = false;
break;
}
// rewind directory
rewinddir(p_dir);
(void) lseek(fd_dir, 0, SEEK_SET);
// close handles
(void) closedir(p_dir);
// handle was reowned [by libc] via fdopendir() but why not to try to close it [again]
(void) close(fd_dir);
return empty;
}
class overlay_processor {
sort_mode _sort;
unsigned int _depth;
ovl_dirspec_vec _roots;
// we should report problems when they arise but there's common problem:
// name is relative to almost unknown path and we don't know the full path.
// furthermore we won't track path due to performance reasons.
ovl_entry_vec _impl(const dirspec_vec & paths, unsigned int depth) {
ovl_entry_vec rv = ovl_entry_vec();
if (!depth) return rv;
int n_usable_paths = 0;
for (const auto & p : paths) {
if (p.fd < 0) continue;
n_usable_paths++;
}
if (!n_usable_paths) return rv;
bool multiple_layers = n_usable_paths > 1;
auto seen_paths = dev_ino_set();
auto dirs = std::list<x_entry_t>();
auto nondirs = std::list<x_entry_t>();
for (size_t i = 0; i < paths.size(); i++) {
const auto & p = paths[i];
if (p.fd < 0) continue;
if (multiple_layers) {
auto needle = std::make_tuple(p.dev, p.ino);
if (set_contains(seen_paths, needle)) {
continue;
}
(void) seen_paths.emplace(needle);
}
// reset handle to beginning
(void) lseek(p.fd, 0, SEEK_SET);
int fd_dir = dup(p.fd);
if (fd_dir < 0) {
(void) fprintf(stderr, "overlaydirs: dup(%d): %s\n", p.fd, strerror(errno));
continue;
}
DIR * p_dir = fdopendir(fd_dir);
if (!p_dir) {
(void) fprintf(stderr, "overlaydirs: fdopendir(%d): %s\n", fd_dir, strerror(errno));
(void) close(fd_dir);
continue;
}
// read current directory entries
auto ent_all = std::vector<x_entry_t>();
for (struct dirent * dent = nullptr; (dent = readdir(p_dir)) != nullptr; ) {
if (!filter_out_dots(dent->d_name)) continue;
if (!handle_file_type(dent->d_type)) continue;
x_entry_t t;
(void) memset(&t, 0, sizeof(t));
// 1. only basic info is filled - name and type
// 2. directories and symlinks are handled later
// 3. dev_t and ino_t are relevant only for directories
t.name.len = strnlen(dent->d_name, sizeof(dirent::d_name));
if (t.name.len == sizeof(dirent::d_name)) {
// TODO: report warning about too long name
// (see function' comment)
continue;
}
t.type = dent->d_type;
(void) memcpy(t.name.str, dent->d_name, t.name.len);
ent_all.push_back(t);
}
// rewind directory
rewinddir(p_dir);
(void) lseek(fd_dir, 0, SEEK_SET);
// close handles
(void) closedir(p_dir); /* p_dir = nullptr; */
// handle was reowned [by libc] via fdopendir() but why not to try to close it [again]
(void) close(fd_dir); /* fd_dir = -1; */
// process entries
auto hash_skip = std::set<xxhash_t>();
auto ent_dirs = std::list<x_entry_t>();
auto ent_nondirs = std::list<x_entry_t>();
for (auto & x : ent_all) {
bool is_skip = false;
do {
if (x.name.len < 2) break;
/* if (strncmp(&x.name.name[x.name.len - 2], ".-", 2) != 0) break; */
if (x.name.str[x.name.len - 2] != '.') break;
if (x.name.str[x.name.len - 1] != '-') break;
// current entry is "skip" entry
is_skip = true;
} while (false);
if (is_skip) {
// skip file is found but we're not using them
if (!multiple_layers) continue;
// don't use "bare skip" entries (e.g. named exactly ".-")
if (x.name.len == 2) continue;
(void) hash_skip.emplace(xxhash(x.name.str, x.name.len - 2));
continue;
}
if ((x.type == DT_DIR) || (x.type == DT_LNK)) {
struct stat st;
(void) memset(&st, 0, sizeof(st));
if (fstatat(p.fd, x.name.str, &st, 0) != 0) {
// TODO: report warning about unavailable entry
// (see function' comment)
continue;
}
// handle (new) entry type
typeof(x_entry_t::type) new_type = IFTODT(st.st_mode);
if ((x.type == DT_DIR) && (new_type != DT_DIR)) {
// TODO: report warning about inconsistent entry type
// (see function' comment)
// PS: this is probably a bug
continue;
}
// skip dangling symlinks along with unknown file types
if (!handle_file_type(new_type, true)) {
// TODO: report warning about unapplicable entry type
// (see function' comment)
continue;
}
// adjust entry type
x.type = new_type;
if (x.type == DT_DIR) {
if (depth > 1) {
// we should track precise dev_t and ino_t for directories
x.dev = st.st_dev;
x.ino = st.st_ino;
}
else {
// end-of-depth directories are handled as regular files
x.type = DT_REG;
}
}
}
x.root_id = i;
x.name_hash = xxhash(x.name.str, x.name.len);
if (x.type == DT_DIR) {
ent_dirs.push_back(x);
} else {
ent_nondirs.push_back(x);
}
}
// not needed anymore
ent_all.clear();
ent_all.shrink_to_fit();
// remove "skip" entries
if (!hash_skip.empty()) {
nondirs.remove_if(
[&hash_skip](const x_entry_t & x) -> bool {
return set_contains(hash_skip, x.name_hash);
}
);
dirs.remove_if(
[&hash_skip](const x_entry_t & x) -> bool {
return set_contains(hash_skip, x.name_hash);
}
);
// not needed anymore
hash_skip.clear();
}
// "hash_skip" is now empty and doesn't contain exactly "skip" entries
// but is going to be reused further
// override all entries with current "non-dir" entries
if (!ent_nondirs.empty()) {
for (const auto & x : ent_nondirs) {
(void) hash_skip.emplace(x.name_hash);
}
nondirs.remove_if(
[&hash_skip](const x_entry_t & x) -> bool {
return set_contains(hash_skip, x.name_hash);
}
);
dirs.remove_if(
[&hash_skip](const x_entry_t & x) -> bool {
return set_contains(hash_skip, x.name_hash);
}
);
// not needed anymore
hash_skip.clear();
}
// override "non-dir" entries with "dir" entries
if (!ent_dirs.empty()) {
for (const auto & x : ent_dirs) {
(void) hash_skip.emplace(x.name_hash);
}
nondirs.remove_if(
[&hash_skip](const x_entry_t & x) -> bool {
return set_contains(hash_skip, x.name_hash);
}
);
// not needed anymore
hash_skip.clear();
}
// merge current entries with accumulated "all" entries
for (const auto & x : ent_nondirs) {
nondirs.push_back(x);
}
ent_nondirs.clear();
for (const auto & x : ent_dirs) {
dirs.push_back(x);
}
ent_dirs.clear();
}
// sort "dir" entries
if (_sort != sort_mode::none) {
dirs.sort(
[this](const x_entry_t & a, const x_entry_t & b) -> bool {
if (a.name_hash == b.name_hash) return a.root_id < b.root_id;
return custom_sort(a.name.str, b.name.str, _sort) < 0;
}
);
}
// merge entries
auto ent_all = std::list<x_entry_t>();
for (const auto & x : dirs) {
ent_all.push_back(x);
}
for (const auto & x : nondirs) {
ent_all.push_back(x);
}
// not needed anymore
nondirs.clear();
// sort entries
if (_sort != sort_mode::none) {
ent_all.sort(
[this](const x_entry_t & a, const x_entry_t & b) -> bool {
// in case of directory entries
if (a.name_hash == b.name_hash) return a.root_id < b.root_id;
return custom_sort(a.name.str, b.name.str, _sort) < 0;
}
);
}
auto seen_dirs = std::set<xxhash_t>();
for (const auto & x : ent_all) {
if (x.root_id < 0) continue;
ovl_entry_kind etype = dirent_to_entry_kind(x.type);
if (etype == ovl_entry_kind::unknown) continue;
if (etype == ovl_entry_kind::directory) {
// skip if directory with the same name is already handled
if (set_contains(seen_dirs, x.name_hash)) continue;
(void) seen_dirs.emplace(x.name_hash);
}
ovl_entry_t t;
t.type = etype;
t.root_id = x.root_id;
t.name_hash = x.name_hash;
t.name.len = x.name.len;
(void) memcpy(t.name.str, x.name.str, t.name.len);
if (t.type == ovl_entry_kind::directory) {
// root_id is meaningless for directories
t.root_id = 0;
auto child_paths = dirspec_vec();
for (size_t i = 0; i < paths.size(); i++) {
child_paths.push_back(x_dirspec_t{ .dev = 0, .ino = 0, .fd = -1 });
}
for (const auto & x : dirs) {
if (x.name_hash != t.name_hash) continue;
int fd = openat(paths[x.root_id].fd, t.name.str, O_DIRECTORY | O_RDONLY);
if (fd < 0) {
// TODO: report warning about unsuccessful openat()
// (see function' comment)
continue;
}
child_paths[x.root_id].dev = x.dev;
child_paths[x.root_id].ino = x.ino;
child_paths[x.root_id].fd = fd;
}
t.children = _impl(child_paths, depth - 1);
for (const auto & x : child_paths) {
if (x.fd < 0) continue;
(void) close(x.fd);
}
child_paths.clear();
child_paths.shrink_to_fit();
}
rv.push_back(t);
}
// not needed anymore
dirs.clear();
return rv;
}
public:
overlay_processor(const ovl_dirspec_vec & roots, unsigned int depth, sort_mode sort) {
_roots = ovl_dirspec_vec();
for (const auto & r : roots) {
_roots.push_back(r);
}
_sort = sort;
_depth = (depth > ovl_depth_max) ? ovl_depth_max : depth;
}
ovl_result run(void) {
auto rv = ovl_result();
auto paths = dirspec_vec();
for (const auto & r : _roots) {
rv.roots.push_back(r.root);
paths.push_back(x_dirspec_t{ .dev = r.dev, .ino = r.ino, .fd = r.fd });
}
rv.entries = _impl(paths, _depth);
return rv;
}
};
ovl_result process_overlay(ovl_dirspec_vec & roots, sort_mode sort, unsigned int depth)
{
auto rv = ovl_result();
if (!depth) return rv;
if (roots.empty()) return rv;
rv = overlay_processor(roots, depth, sort).run();
// close all open file descriptors
for (auto & r : roots) {
(void) close(r.fd);
r.fd = -1;
}
return rv;
}
class overlay_printer {
print_mode _print;
ovl_result _ovl;
void _impl(const ovl_entry_vec & entries, const ovl_path_t * subpath = nullptr) {
for (const auto & e : entries) {
switch (e.type) {
case ovl_entry_kind::file:
print_path(_print, &(_ovl.roots[e.root_id]), subpath, &(e.name));
break;
case ovl_entry_kind::directory:
// NOT printing directory name
ovl_path_t n_subpath;
(void) memset(&n_subpath, 0, sizeof(n_subpath));
if (subpath) {
n_subpath.len = subpath->len + 1 + e.name.len;
if (n_subpath.len >= ovl_path_max) {
(void) fprintf(stderr, "subpath too long: /%s/%s/\n", subpath->str, e.name.str);
continue;
}
/* (void) snprintf(n_subpath.str, ovl_path_max, "%s/%s", subpath->str, e.name.str); */
size_t off = 0;
(void) memcpy(n_subpath.str, subpath->str, subpath->len);
off = subpath->len;
n_subpath.str[off++] = '/';
(void) memcpy(n_subpath.str + off, e.name.str, e.name.len);
} else {
// ovl_name_max is definitely smaller than ovl_path_max
n_subpath.len = e.name.len;
(void) memcpy(n_subpath.str, e.name.str, e.name.len);
}
_impl(e.children, &n_subpath);
break;
default:
break;
}
}
}
public:
overlay_printer(const ovl_result & ovl, print_mode print) {
_ovl = ovl;
_print = print;
}
void run(void) {
_impl(_ovl.entries);
}
};
void list_overlay(const ovl_result & overlay, print_mode print)
{
overlay_printer(overlay, print).run();
}
class overlay_merger {
ovl_result _ovl;
ovl_dirspec_t _target;
int _impl(const ovl_entry_vec & entries, int dirfd, const ovl_path_t * subpath = nullptr) {
int rv = 0;
int new_fd;
ovl_path_t t;
for (const auto & e : entries) {
switch (e.type) {
case ovl_entry_kind::file:
(void) memset(&t, 0, sizeof(t));
if (subpath) {
t.len = _ovl.roots[e.root_id].len + 1 + subpath->len + 1 + e.name.len;
if (t.len >= ovl_path_max) {
(void) fprintf(stderr, "name too long: %s/%s/%s\n", _ovl.roots[e.root_id].str, subpath->str, e.name.str);
return ENAMETOOLONG;
}
/* (void) snprintf(t.str, ovl_path_max, "%s/%s/%s\n", _ovl.roots[e.root_id].str, subpath->str, e.name.str); */
size_t off = 0;
(void) memcpy(t.str, _ovl.roots[e.root_id].str, _ovl.roots[e.root_id].len);
off = _ovl.roots[e.root_id].len;
t.str[off++] = '/';
(void) memcpy(t.str + off, subpath->str, subpath->len);
off += subpath->len;
t.str[off++] = '/';
(void) memcpy(t.str + off, e.name.str, e.name.len);
} else {
t.len = _ovl.roots[e.root_id].len + 1 + e.name.len;
if (t.len >= ovl_path_max) {
(void) fprintf(stderr, "name too long: %s/%s\n", _ovl.roots[e.root_id].str, e.name.str);
return ENAMETOOLONG;
}
/* (void) snprintf(t.str, ovl_path_max, "%s/%s\n", _ovl.roots[e.root_id].str, e.name.str); */
size_t off = 0;
(void) memcpy(t.str, _ovl.roots[e.root_id].str, _ovl.roots[e.root_id].len);
off = _ovl.roots[e.root_id].len;
t.str[off++] = '/';
(void) memcpy(t.str + off, e.name.str, e.name.len);
}
if (symlinkat(t.str, dirfd, e.name.str) < 0) {
rv = errno;
if (subpath) {
(void) fprintf(stderr, "symlinkat() error: \"%s\" with %s/%s/%s -> %s\n", strerror(rv), _target.root.str, subpath->str, e.name.str, t.str);
} else {
(void) fprintf(stderr, "symlinkat() error: \"%s\" with %s/%s -> %s\n", strerror(rv), _target.root.str, e.name.str, t.str);
}
return rv;
}
break;
case ovl_entry_kind::directory:
(void) memset(&t, 0, sizeof(t));
if (subpath) {
t.len = subpath->len + 1 /* "/" */ + e.name.len;
if (t.len >= ovl_path_max) {
(void) fprintf(stderr, "subpath too long: /%s/%s/\n", subpath->str, e.name.str);
return ENAMETOOLONG;
}
/* (void) snprintf(t.str, ovl_path_max, "%s/%s", subpath->str, e.name.str); */
(void) memcpy(t.str, subpath->str, subpath->len);
t.str[subpath->len] = '/';
(void) memcpy(t.str + subpath->len + 1, e.name.str, e.name.len);
} else {
// ovl_name_max is definitely smaller than ovl_path_max
t.len = e.name.len;
(void) memcpy(t.str, e.name.str, e.name.len);
}
if (mkdirat(dirfd, e.name.str, 0755) < 0) {
rv = errno;
if (subpath) {
(void) fprintf(stderr, "mkdirat() error: \"%s\" with path %s/%s/%s\n", strerror(rv), _target.root.str, subpath->str, e.name.str);
} else {
(void) fprintf(stderr, "mkdirat() error: \"%s\" with path %s/%s\n", strerror(rv), _target.root.str, e.name.str);
}
return rv;
}
new_fd = openat(dirfd, e.name.str, O_DIRECTORY | O_RDONLY);
if (new_fd < 0) {
rv = errno;
if (subpath) {
(void) fprintf(stderr, "openat() error: \"%s\" with path %s/%s/%s\n", strerror(rv), _target.root.str, subpath->str, e.name.str);
} else {
(void) fprintf(stderr, "openat() error: \"%s\" with path %s/%s\n", strerror(rv), _target.root.str, e.name.str);
}
return rv;
}
rv = _impl(e.children, new_fd, &t);
(void) close(new_fd);
if (rv != 0) return rv;
break;
default:
break;
}
}
return rv;
}
public:
overlay_merger(const ovl_result & ovl, const ovl_dirspec_t & target) {
_ovl = ovl;
_target = target;
}
int run(void) {
return _impl(_ovl.entries, _target.fd);
}
};
int merge_overlay(const ovl_result & overlay, const ovl_dirspec_t & target)
{
if (!is_empty_dir(target.fd)) return -1;
return overlay_merger(overlay, target).run();
}
static
size_t procfs_fd2name(int fd, char * buffer, size_t size)
{
// internal method, parameters are already checked
/* if ((fd < 0) || (!buffer) || (!size)) return 0; */
// "/proc/self/fd/" - 14, "%d" - up to 10
char procfs_link[32];
(void) memset(procfs_link, 0, sizeof(procfs_link));
(void) snprintf(procfs_link, sizeof(procfs_link) - 1, "/proc/self/fd/%d", fd);
(void) memset(buffer, 0, size);
ssize_t x = readlink(procfs_link, buffer, size - 1);
x = (x > 0) ? x : 0;
// not really needed
buffer[x] = 0;
// already clamped
return (size_t) x;
}
#ifndef HAVE_CPP_SET_CONTAINS
#ifdef __cplusplus
#if __cplusplus >= 202002L
#define HAVE_CPP_SET_CONTAINS 1
#endif
#endif
#endif
#ifndef HAVE_CPP_SET_CONTAINS
#define HAVE_CPP_SET_CONTAINS 0
#endif
template <typename T>
static inline
bool set_contains(const std::set<T> & set, const T & needle)
{
#if HAVE_CPP_SET_CONTAINS
return set.contains(needle);
#else
return (set.find(needle) != set.cend());
#endif
}
static inline
bool filter_out_dots(const char * name)
{
/*
if (strcmp(name, ".") == 0) return false;
if (strcmp(name, "..") == 0) return false;
return true;
*/
if (name[0] != '.') return true;
switch (name[1]) {
case 0: return false;
case '.': return (name[2] != 0);
}
return true;
}
static inline
bool handle_file_type(int type, bool allow_symlinks)
{
switch (type) {
// common file types
case DT_REG: /* -fallthrough */
case DT_DIR: /* -fallthrough */
// less common types (but also allowed)
case DT_BLK: /* -fallthrough */
case DT_CHR: /* -fallthrough */
case DT_FIFO: /* -fallthrough */
case DT_SOCK:
return true;
// symlinks should be handled separately!
case DT_LNK:
return !allow_symlinks;
default:
return false;
}
}
static inline
ovl_entry_kind dirent_to_entry_kind(typeof(dirent::d_type) d_type) {
switch (d_type) {
case DT_DIR:
return ovl_entry_kind::directory;
case DT_REG: /* -fallthrough */
case DT_BLK: /* -fallthrough */
case DT_CHR: /* -fallthrough */
case DT_FIFO: /* -fallthrough */
case DT_SOCK:
return ovl_entry_kind::file;
// symlinks should be already handled!
}
return ovl_entry_kind::unknown;
}

90
overlay.hh Normal file
View File

@@ -0,0 +1,90 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_OVERLAY_HH
#define INCLUDE_OVERLAY_HH 1
#include <cstring>
extern "C" {
#include <sys/types.h>
}
#include "overlay-common.hh"
#include "print.hh"
#include "sort.hh"
#include "xxhash.hh"
struct ovl_dirspec_t {
dev_t dev;
ino_t ino;
int fd;
ovl_path_t root;
};
typedef std::vector<ovl_dirspec_t> ovl_dirspec_vec;
enum ovl_rootspec_flags : uint32_t {
none = 0,
// relative path (if possible)
relative = (1 << 0),
};
// forward declaration
struct ovl_entry_t;
typedef std::vector<ovl_entry_t> ovl_entry_vec;
enum struct ovl_entry_kind : uint32_t {
unknown = 0,
// entry may have it's own entries
directory,
// entry may NOT have it's own entries
// this applies to:
// - regular files
// - directories at maximum depth (!)
// - block and character devices
// - pipes (FIFO)
// - sockets
file,
};
struct ovl_entry_t {
ovl_entry_kind type;
int32_t root_id;
xxhash_t name_hash;
ovl_entry_vec children;
ovl_name_t name;
ovl_entry_t() {
root_id = -1;
type = ovl_entry_kind::unknown;
name_hash = 0;
children = ovl_entry_vec();
(void) memset(&name, 0, sizeof(name));
}
};
struct ovl_result {
ovl_path_vec roots;
ovl_entry_vec entries;
ovl_result() {
roots = ovl_path_vec();
entries = ovl_entry_vec();
}
};
bool arg_to_rootspec(const char * arg, ovl_dirspec_t * path, uint32_t flags = ovl_rootspec_flags::none);
bool is_empty_dir(int dirfd);
ovl_result process_overlay(ovl_dirspec_vec & roots, sort_mode sort = sort_mode::none, unsigned int depth = ovl_depth_max);
void list_overlay(const ovl_result & overlay, print_mode print = print_mode::normal);
int merge_overlay(const ovl_result & overlay, const ovl_dirspec_t & target);
#endif /* INCLUDE_OVERLAY_HH */

62
print.cc Normal file
View File

@@ -0,0 +1,62 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#include "print.hh"
#include <cstdio>
#include <cstring>
extern "C" {
#include <unistd.h>
}
static
void print_simple(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name);
// TODO: shell-escape mode
/*
static
void print_escape(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name);
*/
void print_path(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name)
{
// "root" and "name" are mandatory
/* if ((!root) || (!name)) return; */
switch (mode) {
case print_mode::normal: /* -fallthrough */
case print_mode::zero:
print_simple(mode, root, subpath, name);
return;
// TODO: shell-escape mode
/*
case print_mode::shell_escape:
print_escape(mode, root, subpath, name);
return;
*/
}
}
static
void print_simple(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name)
{
// internal method, parameters are already checked
/* if ((!root) || (!name)) return 0; */
char eol = (mode == print_mode::zero) ? 0 : '\n';
if (subpath) {
(void) fprintf(stdout, "%s/%s/%s%c", root->str, subpath->str, name->str, eol);
} else {
(void) fprintf(stdout, "%s/%s%c", root->str, name->str, eol);
}
}
// TODO: shell-escape mode
/*
static
void print_escape(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name)
{
}
*/

22
print.hh Normal file
View File

@@ -0,0 +1,22 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_LIST_MODE_HH
#define INCLUDE_LIST_MODE_HH 1
#include "overlay-common.hh"
enum struct print_mode : unsigned int {
normal = 0,
zero,
// TODO
/* shell_escape, */
_default = normal,
};
void print_path(print_mode mode, const ovl_path_t * root, const ovl_path_t * subpath, const ovl_name_t * name);
#endif /* INCLUDE_LIST_MODE_HH */

29
sort.cc Normal file
View File

@@ -0,0 +1,29 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#include "sort.hh"
#include <cstring>
#include "coreutils-sort.hh"
int custom_sort(const char * a, const char * b, sort_mode mode)
{
switch (mode) {
case sort_mode::glibc_version_sort:
return strverscmp(a, b);
case sort_mode::coreutils_version_sort:
return coreutils_version_sort(a, b);
// not a actual version sort
/*
case sort_mode::simple_sort:
return strcmp(a, b);
*/
default:
return strcmp(a, b);
}
}

26
sort.hh Normal file
View File

@@ -0,0 +1,26 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_SORT_MODE_HH
#define INCLUDE_SORT_MODE_HH 1
enum struct sort_mode : unsigned int {
// don't sort entries at all
none = 0,
// sort entries using version sort from coreutils
coreutils_version_sort,
// sort entries using version sort from glibc
glibc_version_sort,
// simpliest sort via strcmp()
simple_sort,
_default = coreutils_version_sort,
};
int custom_sort(const char * a, const char * b, sort_mode mode);
#endif /* INCLUDE_SORT_MODE_HH */

26
xxhash.cc Normal file
View File

@@ -0,0 +1,26 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#define XXH_IMPLEMENTATION
#include "xxhash.hh"
xxhash_t xxhash(const void * input, size_t len, xxhash_t seed)
{
#ifndef XXH_NO_LONG_LONG
XXH64_state_t st;
XXH64_reset(&st, seed);
XXH64_update(&st, input, len);
return XXH64_digest(&st);
#else
XXH32_state_t st;
XXH32_reset(&st, seed);
XXH32_update(&st, input, len);
return XXH32_digest(&st);
#endif
}

49
xxhash.hh Normal file
View File

@@ -0,0 +1,49 @@
/* SPDX-License-Identifier: Apache-2.0
* (c) 2025, Konstantin Demin
*/
#ifndef INCLUDE_XXHASH_HH
#define INCLUDE_XXHASH_HH 1
#include <limits.h>
#if LONG_MAX == INT_MAX
#define XXH_NO_LONG_LONG
#endif
// #define XXH_NO_EXTERNC_GUARD
#define XXH_NO_STDLIB
#define XXH_STATIC_LINKING_ONLY
#define XXH_NO_XXH3
#define XXH_NO_INLINE_HINTS 1
#define XXH_SIZE_OPT 2
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define XXH_CPU_LITTLE_ENDIAN 1
#endif
#endif
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define XXH_CPU_LITTLE_ENDIAN 0
#endif
#endif
#ifdef USE_BUNDLED_XXHASH
#include "xxhash.h"
#else /* !USE_BUNDLED_XXHASH */
#include <xxhash.h>
#endif /* USE_BUNDLED_XXHASH */
#ifndef XXH_NO_LONG_LONG
#define xxhash_t XXH64_hash_t
#else
#define xxhash_t XXH32_hash_t
#endif
xxhash_t xxhash(const void * input, size_t len, xxhash_t seed = 0);
#endif /* INCLUDE_XXHASH_HH */