2 * unix_capture.c: Capture a directory tree on UNIX.
6 * Copyright (C) 2012-2016 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
31 #include <limits.h> /* for PATH_MAX */
33 #include <sys/types.h>
34 #ifdef HAVE_SYS_XATTR_H
35 # include <sys/xattr.h>
39 #include "wimlib/blob_table.h"
40 #include "wimlib/dentry.h"
41 #include "wimlib/error.h"
42 #include "wimlib/reparse.h"
43 #include "wimlib/scan.h"
44 #include "wimlib/timestamp.h"
45 #include "wimlib/unix_data.h"
46 #include "wimlib/xattr.h"
49 # define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
52 my_fdopendir(int *dirfd_p)
57 old_pwd = open(".", O_RDONLY);
59 if (!fchdir(*dirfd_p)) {
63 *dirfd_p = dirfd(dir);
74 # define my_openat(full_path, dirfd, relpath, flags) \
75 openat((dirfd), (relpath), (flags))
77 # define my_openat(full_path, dirfd, relpath, flags) \
78 open((full_path), (flags))
81 #ifdef HAVE_READLINKAT
82 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
83 readlinkat((dirfd), (relpath), (buf), (bufsize))
85 # define my_readlinkat(full_path, dirfd, relpath, buf, bufsize) \
86 readlink((full_path), (buf), (bufsize))
90 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
91 fstatat((dirfd), (relpath), (stbuf), (flags))
93 # define my_fstatat(full_path, dirfd, relpath, stbuf, flags) \
94 ((flags) & AT_SYMLINK_NOFOLLOW) ? \
95 lstat((full_path), (stbuf)) : \
96 stat((full_path), (stbuf))
100 # define AT_FDCWD -100
103 #ifndef AT_SYMLINK_NOFOLLOW
104 # define AT_SYMLINK_NOFOLLOW 0x100
107 #ifdef HAVE_XATTR_SUPPORT
109 * Retrieves the values of the xattrs named by the null-terminated @names of the
110 * file at @path and serializes the xattr names and values into @entries. If
111 * successful, returns the number of bytes used in @entries. If unsuccessful,
112 * returns -1 and sets errno (ERANGE if @entries was too small).
115 gather_xattr_entries(const char *path, const char *names, size_t names_size,
116 void *entries, size_t entries_size)
118 const char * const names_end = names + names_size;
119 void * const entries_end = entries + entries_size;
120 const char *name = names;
121 struct wimlib_xattr_entry *entry = entries;
123 wimlib_assert((uintptr_t)entries % 4 == 0 &&
124 entries_size % 4 == 0 && names_size != 0);
126 size_t name_len = strnlen(name, names_end - name);
130 if (name_len == 0 || name_len >= names_end - name ||
131 (u16)name_len != name_len)
135 * Note: we take care to always call lgetxattr() with a nonzero
136 * size, since zero size means to return the value length only.
138 if (entries_end - (void *)entry <= sizeof(*entry) + name_len) {
143 entry->name_len = cpu_to_le16(name_len);
144 entry->reserved = cpu_to_le16(0);
145 value = mempcpy(entry->name, name, name_len);
147 value_len = lgetxattr(path, name, value, entries_end - value);
149 if (errno != ERANGE) {
150 ERROR_WITH_ERRNO("\"%s\": unable to read "
151 "extended attribute \"%s\"",
156 if ((u32)value_len != value_len)
158 entry->value_len = cpu_to_le32(value_len);
161 /* pad value to next 4-byte boundary */
162 memset(value, 0, -(uintptr_t)value & 3);
163 value += -(uintptr_t)value & 3;
166 name += name_len + 1;
167 } while (name < names_end);
169 return (void *)entry - entries;
173 ERROR("\"%s\": malformed extended attribute name list", path);
178 create_xattr_stream(const char *path, struct wim_inode *inode,
179 struct blob_table *blob_table,
180 const char *names, size_t names_size)
182 char _entries[1024] _aligned_attribute(4);
183 char *entries = _entries;
184 size_t entries_avail = ARRAY_LEN(_entries);
185 ssize_t entries_size;
186 struct wim_inode_stream *strm;
190 /* Serialize the xattrs into @entries */
191 entries_size = gather_xattr_entries(path, names, names_size,
192 entries, entries_avail);
193 if (entries_size < 0) {
194 ret = WIMLIB_ERR_STAT;
197 /* Not enough space in @entries. Reallocate it. */
198 if (entries != _entries)
200 ret = WIMLIB_ERR_NOMEM;
202 entries = MALLOC(entries_avail);
208 /* Add the xattr stream from the serialized xattrs in @entries */
209 ret = WIMLIB_ERR_NOMEM;
210 strm = inode_add_stream_with_data(inode, STREAM_TYPE_LINUX_XATTR,
211 NO_STREAM_NAME, entries, entries_size,
216 if (!inode_set_linux_xattr_hash(inode, stream_hash(strm))) {
217 inode_remove_stream(inode, strm, blob_table);
223 if (entries != _entries)
229 * If the file at @path has Linux-style extended attributes, read them into
230 * memory and add them to @inode as a stream (deduplicated in @blob_table).
232 static noinline_for_stack int
233 scan_linux_xattrs(const char *path, struct wim_inode *inode,
234 struct blob_table *blob_table)
237 char *names = _names;
238 ssize_t names_size = ARRAY_LEN(_names);
242 /* Gather the names of the xattrs of the file at @path */
244 names_size = llistxattr(path, names, names_size);
245 if (names_size == 0) /* No xattrs? */
247 if (names_size < 0) {
249 if (errno == ENOTSUP) /* No xattrs (unsupported or disabled)? */
251 if (errno == ERANGE) {
253 * Not enough space in @names. Ask for how much space
254 * we need, then try again.
256 names_size = llistxattr(path, NULL, 0);
257 if (names_size >= 0) {
260 ret = WIMLIB_ERR_NOMEM;
261 names = MALLOC(names_size);
267 WARNING_WITH_ERRNO("\"%s\": unable to list extended attributes",
269 ret = WIMLIB_ERR_STAT;
274 * We have a nonempty list of xattr names. Gather the xattr values and
275 * add the xattr stream.
277 ret = create_xattr_stream(path, inode, blob_table, names, names_size);
283 #endif /* HAVE_XATTR_SUPPORT */
286 unix_scan_regular_file(const char *path, u64 blocks, u64 size,
287 struct wim_inode *inode,
288 struct list_head *unhashed_blobs)
290 struct blob_descriptor *blob = NULL;
291 struct wim_inode_stream *strm;
294 * Set FILE_ATTRIBUTE_SPARSE_FILE if the file uses less disk space than
295 * expected given its size.
297 if (blocks < DIV_ROUND_UP(size, 512))
298 inode->i_attributes = FILE_ATTRIBUTE_SPARSE_FILE;
300 inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
303 blob = new_blob_descriptor();
306 blob->file_on_disk = STRDUP(path);
307 if (unlikely(!blob->file_on_disk))
309 blob->blob_location = BLOB_IN_FILE_ON_DISK;
311 blob->file_inode = inode;
314 strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
318 prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
322 free_blob_descriptor(blob);
323 return WIMLIB_ERR_NOMEM;
327 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
328 char *path, size_t path_len,
329 int dirfd, const char *relpath,
330 struct scan_params *params);
333 unix_scan_directory(struct wim_dentry *dir_dentry,
334 char *full_path, size_t full_path_len,
335 int parent_dirfd, const char *dir_relpath,
336 struct scan_params *params)
343 dirfd = my_openat(full_path, parent_dirfd, dir_relpath, O_RDONLY);
345 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
346 return WIMLIB_ERR_OPENDIR;
349 dir_dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
350 dir = my_fdopendir(&dirfd);
352 ERROR_WITH_ERRNO("\"%s\": Can't open directory", full_path);
354 return WIMLIB_ERR_OPENDIR;
359 struct dirent *entry;
360 struct wim_dentry *child;
364 entry = readdir(dir);
367 ret = WIMLIB_ERR_READ;
368 ERROR_WITH_ERRNO("\"%s\": Error reading directory",
374 name_len = strlen(entry->d_name);
376 if (should_ignore_filename(entry->d_name, name_len))
379 full_path[full_path_len] = '/';
380 memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
381 ret = unix_build_dentry_tree_recursive(&child,
383 full_path_len + 1 + name_len,
385 &full_path[full_path_len + 1],
387 full_path[full_path_len] = '\0';
390 attach_scanned_tree(dir_dentry, child, params->blob_table);
397 * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
398 * determine whether it points into the directory identified by @ino and @dev.
399 * If yes, return the suffix of @target which is relative to this directory, but
400 * retaining leading slashes. If no, return @target.
402 * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
404 * Original target New target
405 * --------------- ----------
407 * /home/e/test/ /test/
408 * //home//e//test// //test//
409 * /home/e (empty string)
411 * /usr/lib /usr/lib (external link)
413 * Because of the possibility of other links into the @ino/@dev directory and/or
414 * multiple path separators, we can't simply do a string comparison; instead we
415 * need to stat() each ancestor directory.
417 * If the link points directly to the @ino/@dev directory with no trailing
418 * slashes, then the new target will be an empty string. This is not a valid
419 * UNIX symlink target, but we store this in the archive anyway since the target
420 * is intended to be de-relativized when the link is extracted.
423 unix_relativize_link_target(char *target, u64 ino, u64 dev)
432 /* Skip slashes (guaranteed to be at least one here) */
441 /* Skip non-slashes (guaranteed to be at least one here) */
444 } while (*p && *p != '/');
446 /* Get the inode and device numbers for this prefix. */
449 ret = stat(target, &stbuf);
453 /* stat() failed. Assume the link points outside the
454 * directory tree being captured. */
458 if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
459 /* Link points inside directory tree being captured.
460 * Return abbreviated path. */
465 /* Link does not point inside directory tree being captured. */
469 static noinline_for_stack int
470 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
471 struct wim_inode *inode, struct scan_params *params)
473 char orig_target[REPARSE_POINT_MAX_SIZE];
474 char *target = orig_target;
477 /* Read the UNIX symbolic link target. */
478 ret = my_readlinkat(full_path, dirfd, relpath, target,
479 sizeof(orig_target));
480 if (unlikely(ret < 0)) {
481 ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
483 return WIMLIB_ERR_READLINK;
485 if (unlikely(ret >= sizeof(orig_target))) {
486 ERROR("\"%s\": target of symbolic link is too long", full_path);
487 return WIMLIB_ERR_READLINK;
491 /* If the link is absolute and reparse point fixups are enabled, then
492 * change it to be "absolute" relative to the tree being captured. */
493 if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
494 int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
496 params->progress.scan.cur_path = full_path;
497 params->progress.scan.symlink_target = target;
499 target = unix_relativize_link_target(target,
500 params->capture_root_ino,
501 params->capture_root_dev);
502 if (target != orig_target) {
503 /* Link target was fixed. */
504 inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
505 status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
507 ret = do_scan_progress(params, status, NULL);
512 /* Translate the UNIX symlink target into a Windows reparse point. */
513 ret = wim_inode_set_symlink(inode, target, params->blob_table);
515 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
516 ERROR("\"%s\": target of symbolic link is not valid "
517 "UTF-8. This is not supported.", full_path);
522 /* On Windows, a reparse point can be set on both directory and
523 * non-directory files. Usually, a link that is intended to point to a
524 * (non-)directory is stored as a reparse point on a (non-)directory
525 * file. Replicate this behavior by examining the target file. */
527 if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
528 S_ISDIR(stbuf.st_mode))
529 inode->i_attributes |= FILE_ATTRIBUTE_DIRECTORY;
534 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
535 char *full_path, size_t full_path_len,
536 int dirfd, const char *relpath,
537 struct scan_params *params)
539 struct wim_dentry *tree = NULL;
540 struct wim_inode *inode = NULL;
545 ret = try_exclude(full_path, params);
546 if (unlikely(ret < 0)) /* Excluded? */
548 if (unlikely(ret > 0)) /* Error? */
551 if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
552 WIMLIB_ADD_FLAG_ROOT))
555 stat_flags = AT_SYMLINK_NOFOLLOW;
557 ret = my_fstatat(full_path, dirfd, relpath, &stbuf, stat_flags);
560 ERROR_WITH_ERRNO("\"%s\": Can't read metadata", full_path);
561 ret = WIMLIB_ERR_STAT;
565 if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
566 if (unlikely(!S_ISREG(stbuf.st_mode) &&
567 !S_ISDIR(stbuf.st_mode) &&
568 !S_ISLNK(stbuf.st_mode)))
570 if (params->add_flags &
571 WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
573 ERROR("\"%s\": File type is unsupported",
575 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
578 params->progress.scan.cur_path = full_path;
579 ret = do_scan_progress(params,
580 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
586 ret = inode_table_new_dentry(params->inode_table, relpath,
587 stbuf.st_ino, stbuf.st_dev, false, &tree);
589 if (ret == WIMLIB_ERR_INVALID_UTF8_STRING) {
590 ERROR("\"%s\": filename is not valid UTF-8. "
591 "This is not supported.", full_path);
596 inode = tree->d_inode;
598 /* Already seen this inode? */
599 if (inode->i_nlink > 1)
602 #ifdef HAVE_STAT_NANOSECOND_PRECISION
603 inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
604 inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
605 inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
607 inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
608 inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
609 inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
611 if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
612 struct wimlib_unix_data unix_data;
614 unix_data.uid = stbuf.st_uid;
615 unix_data.gid = stbuf.st_gid;
616 unix_data.mode = stbuf.st_mode;
617 unix_data.rdev = stbuf.st_rdev;
618 if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
619 ret = WIMLIB_ERR_NOMEM;
622 #ifdef HAVE_XATTR_SUPPORT
623 ret = scan_linux_xattrs(full_path, inode, params->blob_table);
629 if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
630 params->capture_root_ino = stbuf.st_ino;
631 params->capture_root_dev = stbuf.st_dev;
632 params->add_flags &= ~WIMLIB_ADD_FLAG_ROOT;
635 if (S_ISREG(stbuf.st_mode)) {
636 ret = unix_scan_regular_file(full_path, stbuf.st_blocks,
637 stbuf.st_size, inode,
638 params->unhashed_blobs);
639 } else if (S_ISDIR(stbuf.st_mode)) {
640 ret = unix_scan_directory(tree, full_path, full_path_len,
641 dirfd, relpath, params);
642 } else if (S_ISLNK(stbuf.st_mode)) {
643 ret = unix_scan_symlink(full_path, dirfd, relpath,
651 params->progress.scan.cur_path = full_path;
653 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
655 ret = do_scan_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
658 free_dentry_tree(tree, params->blob_table);
660 ret = report_scan_error(params, ret, full_path);
667 * unix_build_dentry_tree():
668 * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
669 * version; no NTFS-specific data is captured).
671 * @root_ret: Place to return a pointer to the root of the dentry tree. Set
672 * to NULL if the file or directory was excluded from capture.
674 * @root_disk_path: The path to the root of the directory tree on disk.
676 * @params: See doc for `struct scan_params'.
678 * @return: 0 on success, nonzero on failure. It is a failure if any of
679 * the files cannot be `stat'ed, or if any of the needed
680 * directories cannot be opened or read. Failure to add the files
681 * to the WIM may still occur later when trying to actually read
682 * the on-disk files during a call to wimlib_write() or
683 * wimlib_overwrite().
686 unix_build_dentry_tree(struct wim_dentry **root_ret,
687 const char *root_disk_path, struct scan_params *params)
694 path_len = strlen(root_disk_path);
695 path_bufsz = min(32790, PATH_MAX + 1);
697 if (path_len >= path_bufsz)
698 return WIMLIB_ERR_INVALID_PARAM;
700 path_buf = MALLOC(path_bufsz);
702 return WIMLIB_ERR_NOMEM;
703 memcpy(path_buf, root_disk_path, path_len + 1);
705 params->capture_root_nchars = path_len;
707 ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len,
708 AT_FDCWD, path_buf, params);
713 #endif /* !__WIN32__ */