2 * unix_capture.c: Capture a directory tree on UNIX.
6 * Copyright (C) 2012-2016 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
31 #include <limits.h> /* for PATH_MAX */
33 #include <sys/types.h>
34 #ifdef HAVE_SYS_XATTR_H
35 # include <sys/xattr.h>
39 #include "wimlib/blob_table.h"
40 #include "wimlib/dentry.h"
41 #include "wimlib/error.h"
42 #include "wimlib/reparse.h"
43 #include "wimlib/scan.h"
44 #include "wimlib/timestamp.h"
45 #include "wimlib/unix_data.h"
46 #include "wimlib/xattr.h"
49 # define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
52 my_fdopendir(int *dirfd_p)
57 old_pwd = open(".", O_RDONLY);
59 if (!fchdir(*dirfd_p)) {
63 *dirfd_p = dirfd(dir);
74 # define my_openat(full_path, dirfd, relpath, flags) \
75 openat((dirfd), (relpath), (flags))
77 # define my_openat(full_path, dirfd, relpath, flags) \
78 open((full_path), (flags))
81 #ifdef HAVE_READLINKAT
82 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
83 readlinkat((dirfd), (relpath), (buf), (bufsize))
85 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
86 readlink((full_path), (buf), (bufsize))
90 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
91 fstatat((dirfd), (relpath), (stbuf), (flags))
93 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
94 ((flags) & AT_SYMLINK_NOFOLLOW) ? \
95 lstat((full_path), (stbuf)) : \
96 stat((full_path), (stbuf))
100 # define AT_FDCWD -100
103 #ifndef AT_SYMLINK_NOFOLLOW
104 # define AT_SYMLINK_NOFOLLOW 0x100
107 #ifdef HAVE_XATTR_SUPPORT
109 * Retrieves the values of the xattrs named by the null-terminated @names of the
110 * file at @path and serializes the xattr names and values into @entries. If
111 * successful, returns the number of bytes used in @entries. If unsuccessful,
112 * returns -1 and sets errno (ERANGE if @entries was too small).
115 gather_xattr_entries(const char *path, const char *names, size_t names_size,
116 void *entries, size_t entries_size)
118 const char * const names_end = names + names_size;
119 void * const entries_end = entries + entries_size;
120 const char *name = names;
121 struct wimlib_xattr_entry *entry = entries;
123 wimlib_assert((uintptr_t)entries % 4 == 0 &&
124 entries_size % 4 == 0 && names_size != 0);
126 size_t name_len = strnlen(name, names_end - name);
130 if (name_len == 0 || name_len >= names_end - name ||
131 (u16)name_len != name_len) {
132 ERROR("\"%s\": malformed extended attribute names list",
139 * Note: we take care to always call lgetxattr() with a nonzero
140 * size, since zero size means to return the value length only.
142 if (entries_end - (void *)entry <= sizeof(*entry) + name_len) {
147 entry->name_len = cpu_to_le16(name_len);
149 value = mempcpy(entry->name, name, name_len);
151 value_len = lgetxattr(path, name, value, entries_end - value);
153 if (errno != ERANGE) {
154 ERROR_WITH_ERRNO("\"%s\": unable to read extended attribute \"%s\"",
159 if ((u32)value_len != value_len) {
160 ERROR("\"%s\": value of extended attribute \"%s\" is too large",
165 entry->value_len = cpu_to_le32(value_len);
168 * Zero-pad the entry to the next 4-byte boundary.
169 * Note: because we've guaranteed that @entries_size is a
170 * multiple of 4, this cannot overflow the @entries buffer.
173 while ((uintptr_t)value & 3) {
179 name += name_len + 1;
180 } while (name < names_end);
182 return (void *)entry - entries;
186 create_xattr_item(const char *path, struct wim_inode *inode,
187 const char *names, size_t names_size)
189 char _entries[1024] _aligned_attribute(4);
190 char *entries = _entries;
191 size_t entries_avail = ARRAY_LEN(_entries);
192 ssize_t entries_size;
196 /* Serialize the xattrs into @entries */
197 entries_size = gather_xattr_entries(path, names, names_size,
198 entries, entries_avail);
199 if (entries_size < 0) {
200 ret = WIMLIB_ERR_STAT;
203 /* Not enough space in @entries. Reallocate it. */
204 if (entries != _entries)
206 ret = WIMLIB_ERR_NOMEM;
208 entries = MALLOC(entries_avail);
214 /* Copy @entries into an xattr item associated with @inode */
215 if ((u32)entries_size != entries_size) {
216 ERROR("\"%s\": too much xattr data!", path);
217 ret = WIMLIB_ERR_STAT;
220 ret = WIMLIB_ERR_NOMEM;
221 if (!inode_set_linux_xattrs(inode, entries, entries_size))
226 if (entries != _entries)
232 * If the file at @path has Linux-style extended attributes, read them into
233 * memory and add them to @inode as a tagged item.
235 static noinline_for_stack int
236 scan_linux_xattrs(const char *path, struct wim_inode *inode)
239 char *names = _names;
240 ssize_t names_size = ARRAY_LEN(_names);
244 /* Gather the names of the xattrs of the file at @path */
245 names_size = llistxattr(path, names, names_size);
246 if (names_size == 0) /* No xattrs? */
248 if (names_size < 0) {
249 /* xattrs unsupported or disabled? */
250 if (errno == ENOTSUP || errno == ENOSYS)
252 if (errno == ERANGE) {
254 * Not enough space in @names. Ask for how much space
255 * we need, then try again.
257 names_size = llistxattr(path, NULL, 0);
260 if (names_size > 0) {
263 names = MALLOC(names_size);
265 ret = WIMLIB_ERR_NOMEM;
271 /* Some other error occurred. */
272 ERROR_WITH_ERRNO("\"%s\": unable to list extended attributes",
274 ret = WIMLIB_ERR_STAT;
279 * We have a nonempty list of xattr names. Gather the xattr values and
280 * add them as a tagged item.
282 ret = create_xattr_item(path, inode, names, names_size);
288 #endif /* HAVE_XATTR_SUPPORT */
291 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
292 struct wim_inode *inode,
293 struct list_head *unhashed_blobs)
295 struct blob_descriptor *blob = NULL;
296 struct wim_inode_stream *strm;
299 * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
300 * expected given its size.
302 if (blocks < DIV_ROUND_UP(size, 512))
303 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
305 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
308 blob = new_blob_descriptor();
311 blob->file_on_disk = STRDUP(path);
312 if (unlikely(!blob->file_on_disk))
314 blob->blob_location = BLOB_IN_FILE_ON_DISK;
316 blob->file_inode = inode;
319 strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
323 prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
327 free_blob_descriptor(blob);
328 return WIMLIB_ERR_NOMEM;
332 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
333 char *path, size_t path_len,
334 int dirfd, const char *relpath,
335 struct scan_params *params);
338 unix_scan_directory(struct wim_dentry *dir_dentry,
339 char *full_path, size_t full_path_len,
340 int parent_dirfd, const char *dir_relpath,
341 struct scan_params *params)
348 dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
350 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
351 return WIMLIB_ERR_OPENDIR;
354 dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
355 dir = my_fdopendir(&dirfd);
357 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
359 return WIMLIB_ERR_OPENDIR;
364 struct dirent *entry;
365 struct wim_dentry *child;
369 entry = readdir(dir);
372 ret = WIMLIB_ERR_READ;
373 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
379 name_len = strlen(entry->d_name);
381 if (should_ignore_filename(entry->d_name, name_len))
384 full_path[full_path_len] = '/';
385 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
386 ret = unix_build_dentry_tree_recursive(&child,
388 full_path_len + 1 + name_len,
390 &full_path[full_path_len + 1],
392 full_path[full_path_len] = '\0';
395 attach_scanned_tree(dir_dentry, child, params->blob_table);
402 * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
403 * determine whether it points into the directory identified by @ino and @dev.
404 * If yes, return the suffix of @target which is relative to this directory, but
405 * retaining leading slashes. If no, return @target.
407 * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
409 * Original target New target
410 * --------------- ----------
412 * /home/e/test/ /test/
413 * //home//e//test// //test//
414 * /home/e (empty string)
416 * /usr/lib /usr/lib (external link)
418 * Because of the possibility of other links into the @ino/@dev directory and/or
419 * multiple path separators, we can't simply do a string comparison; instead we
420 * need to stat() each ancestor directory.
422 * If the link points directly to the @ino/@dev directory with no trailing
423 * slashes, then the new target will be an empty string. This is not a valid
424 * UNIX symlink target, but we store this in the archive anyway since the target
425 * is intended to be de-relativized when the link is extracted.
428 unix_relativize_link_target(char *target, u64 ino, u64 dev)
437 /* Skip slashes (guaranteed to be at least one here) */
446 /* Skip non-slashes (guaranteed to be at least one here) */
449 } while (*p && *p != '/');
451 /* Get the inode and device numbers for this prefix. */
454 ret = stat(target, &stbuf);
458 /* stat() failed. Assume the link points outside the
459 * directory tree being captured. */
463 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
464 /* Link points inside directory tree being captured.
465 * Return abbreviated path. */
470 /* Link does not point inside directory tree being captured. */
474 static noinline_for_stack int
475 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
476 struct wim_inode *inode, struct scan_params *params)
478 char orig_target[REPARSE_POINT_MAX_SIZE];
479 char *target = orig_target;
482 /* Read the UNIX symbolic link target. */
483 ret = my_readlinkat(full_path, dirfd, relpath, target,
484 sizeof(orig_target));
485 if (unlikely(ret < 0)) {
486 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
488 return WIMLIB_ERR_READLINK;
490 if (unlikely(ret >= sizeof(orig_target))) {
491 ERROR("\"%s\": target of symbolic link is too long", full_path);
492 return WIMLIB_ERR_READLINK;
496 /* If the link is absolute and reparse point fixups are enabled, then
497 * change it to be "absolute" relative to the tree being captured. */
498 if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
499 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
501 params->progress.scan.cur_path = full_path;
502 params->progress.scan.symlink_target = target;
504 target = unix_relativize_link_target(target,
505 params->capture_root_ino,
506 params->capture_root_dev);
507 if (target != orig_target) {
508 /* Link target was fixed. */
509 inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
510 status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
512 ret = do_scan_progress(params, status, NULL);
517 /* Translate the UNIX symlink target into a Windows reparse point. */
518 ret = wim_inode_set_symlink(inode, target, params->blob_table);
520 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
521 ERROR("\"%s\": target of symbolic link is not valid "
522 "UTF-8. This is not supported.", full_path);
527 /* On Windows, a reparse point can be set on both directory and
528 * non-directory files. Usually, a link that is intended to point to a
529 * (non-)directory is stored as a reparse point on a (non-)directory
530 * file. Replicate this behavior by examining the target file. */
532 if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
533 S_ISDIR(stbuf.st_mode))
534 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
539 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
540 char *full_path, size_t full_path_len,
541 int dirfd, const char *relpath,
542 struct scan_params *params)
544 struct wim_dentry *tree = NULL;
545 struct wim_inode *inode = NULL;
550 ret = try_exclude(full_path, params);
551 if (unlikely(ret < 0)) /* Excluded? */
553 if (unlikely(ret > 0)) /* Error? */
556 if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
557 WIMLIB_ADD_FLAG_ROOT))
560 stat_flags = AT_SYMLINK_NOFOLLOW;
562 ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
565 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
566 ret = WIMLIB_ERR_STAT;
570 if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
571 if (unlikely(!S_ISREG(stbuf.st_mode) &&
572 !S_ISDIR(stbuf.st_mode) &&
573 !S_ISLNK(stbuf.st_mode)))
575 if (params->add_flags &
576 WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
578 ERROR("\"%s\": File type is unsupported",
580 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
583 params->progress.scan.cur_path = full_path;
584 ret = do_scan_progress(params,
585 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
591 ret = inode_table_new_dentry(params->inode_table, relpath,
592 stbuf.st_ino, stbuf.st_dev, false, &tree);
594 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
595 ERROR("\"%s\": filename is not valid UTF-8. "
596 "This is not supported.", full_path);
601 inode = tree->d_inode;
603 /* Already seen this inode? */
604 if (inode->i_nlink > 1)
607 #ifdef HAVE_STAT_NANOSECOND_PRECISION
608 inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
609 inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
610 inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
612 inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
613 inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
614 inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
616 if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
617 struct wimlib_unix_data unix_data;
619 unix_data.uid = stbuf.st_uid;
620 unix_data.gid = stbuf.st_gid;
621 unix_data.mode = stbuf.st_mode;
622 unix_data.rdev = stbuf.st_rdev;
623 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
624 ret = WIMLIB_ERR_NOMEM;
627 #ifdef HAVE_XATTR_SUPPORT
628 ret = scan_linux_xattrs(full_path, inode);
634 if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
635 params->capture_root_ino = stbuf.st_ino;
636 params->capture_root_dev = stbuf.st_dev;
637 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
640 if (S_ISREG(stbuf.st_mode)) {
641 ret = unix_scan_regular_file(full_path, stbuf.st_blocks,
642 stbuf.st_size, inode,
643 params->unhashed_blobs);
644 } else if (S_ISDIR(stbuf.st_mode)) {
645 ret = unix_scan_directory(tree, full_path, full_path_len,
646 dirfd, relpath, params);
647 } else if (S_ISLNK(stbuf.st_mode)) {
648 ret = unix_scan_symlink(full_path, dirfd, relpath,
656 params->progress.scan.cur_path = full_path;
658 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
660 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
663 free_dentry_tree(tree, params->blob_table);
665 ret = report_scan_error(params, ret, full_path);
672 * unix_build_dentry_tree():
673 * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
674 * version; no NTFS-specific data is captured).
676 * @root_ret: Place to return a pointer to the root of the dentry tree. Set
677 * to NULL if the file or directory was excluded from capture.
679 * @root_disk_path: The path to the root of the directory tree on disk.
681 * @params: See doc for `struct scan_params'.
683 * @return: 0 on success, nonzero on failure. It is a failure if any of
684 * the files cannot be `stat'ed, or if any of the needed
685 * directories cannot be opened or read. Failure to add the files
686 * to the WIM may still occur later when trying to actually read
687 * the on-disk files during a call to wimlib_write() or
688 * wimlib_overwrite().
691 unix_build_dentry_tree(struct wim_dentry **root_ret,
692 const char *root_disk_path, struct scan_params *params)
699 path_len = strlen(root_disk_path);
700 path_bufsz = min(32790, PATH_MAX + 1);
702 if (path_len >= path_bufsz)
703 return WIMLIB_ERR_INVALID_PARAM;
705 path_buf = MALLOC(path_bufsz);
707 return WIMLIB_ERR_NOMEM;
708 memcpy(path_buf, root_disk_path, path_len + 1);
710 params->capture_root_nchars = path_len;
712 ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
713 AT_FDCWD, path_buf, params);
718 #endif /* !__WIN32__ */