add System Compression support
[wimlib] / src / unix_capture.c
index 281cbcb..2843c36 100644 (file)
@@ -5,20 +5,18 @@
 /*
  * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifndef __WIN32__
 #include <sys/stat.h>
 #include <unistd.h>
 
+#include "wimlib/blob_table.h"
 #include "wimlib/capture.h"
 #include "wimlib/dentry.h"
 #include "wimlib/error.h"
-#include "wimlib/lookup_table.h"
 #include "wimlib/reparse.h"
 #include "wimlib/timestamp.h"
+#include "wimlib/unix_data.h"
 
 #ifdef HAVE_FDOPENDIR
 #  define my_fdopendir(dirfd_p) fdopendir(*(dirfd_p))
@@ -102,44 +101,48 @@ my_fdopendir(int *dirfd_p)
 
 static int
 unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode,
-                      struct list_head *unhashed_streams)
+                      struct list_head *unhashed_blobs)
 {
-       struct wim_lookup_table_entry *lte;
-       char *file_on_disk;
+       struct blob_descriptor *blob = NULL;
+       struct wim_inode_stream *strm;
 
        inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
 
-       /* Empty files do not have to have a lookup table entry. */
-       if (!size)
-               return 0;
-
-       file_on_disk = STRDUP(path);
-       if (!file_on_disk)
-               return WIMLIB_ERR_NOMEM;
-       lte = new_lookup_table_entry();
-       if (!lte) {
-               FREE(file_on_disk);
-               return WIMLIB_ERR_NOMEM;
+       if (size) {
+               blob = new_blob_descriptor();
+               if (unlikely(!blob))
+                       goto err_nomem;
+               blob->file_on_disk = STRDUP(path);
+               if (unlikely(!blob->file_on_disk))
+                       goto err_nomem;
+               blob->blob_location = BLOB_IN_FILE_ON_DISK;
+               blob->size = size;
+               blob->file_inode = inode;
        }
-       lte->file_on_disk = file_on_disk;
-       lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
-       lte->size = size;
-       add_unhashed_stream(lte, inode, 0, unhashed_streams);
-       inode->i_lte = lte;
+
+       strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
+       if (unlikely(!strm))
+               goto err_nomem;
+
+       prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
        return 0;
+
+err_nomem:
+       free_blob_descriptor(blob);
+       return WIMLIB_ERR_NOMEM;
 }
 
 static int
 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
                                 char *path, size_t path_len,
                                 int dirfd, const char *relpath,
-                                struct add_image_params *params);
+                                struct capture_params *params);
 
 static int
 unix_scan_directory(struct wim_dentry *dir_dentry,
                    char *full_path, size_t full_path_len,
                    int parent_dirfd, const char *dir_relpath,
-                   struct add_image_params *params)
+                   struct capture_params *params)
 {
 
        int dirfd;
@@ -177,13 +180,12 @@ unix_scan_directory(struct wim_dentry *dir_dentry,
                        break;
                }
 
-               if (entry->d_name[0] == '.' &&
-                   (entry->d_name[1] == '\0' ||
-                    (entry->d_name[1] == '.' && entry->d_name[2] == '\0')))
+               name_len = strlen(entry->d_name);
+
+               if (should_ignore_filename(entry->d_name, name_len))
                        continue;
 
                full_path[full_path_len] = '/';
-               name_len = strlen(entry->d_name);
                memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
                ret = unix_build_dentry_tree_recursive(&child,
                                                       full_path,
@@ -194,39 +196,66 @@ unix_scan_directory(struct wim_dentry *dir_dentry,
                full_path[full_path_len] = '\0';
                if (ret)
                        break;
-               if (child)
-                       dentry_add_child(dir_dentry, child);
+               attach_scanned_tree(dir_dentry, child, params->blob_table);
        }
        closedir(dir);
        return ret;
 }
 
-/* Given an absolute symbolic link target @dest (UNIX-style, beginning
- * with '/'), determine whether it points into the directory specified by
- * @ino and @dev.  If so, return the target modified to be "absolute"
- * relative to this directory.  Otherwise, return NULL.  */
+/*
+ * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
+ * determine whether it points into the directory identified by @ino and @dev.
+ * If yes, return the suffix of @target which is relative to this directory, but
+ * retaining leading slashes.  If no, return @target.
+ *
+ * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
+ *
+ *     Original target         New target
+ *     ---------------         ----------
+ *     /home/e/test            /test
+ *     /home/e/test/           /test/
+ *     //home//e//test//       //test//
+ *     /home/e                                         (empty string)
+ *     /home/e/                /
+ *     /usr/lib                /usr/lib                (external link)
+ *
+ * Because of the possibility of other links into the @ino/@dev directory and/or
+ * multiple path separators, we can't simply do a string comparison; instead we
+ * need to stat() each ancestor directory.
+ *
+ * If the link points directly to the @ino/@dev directory with no trailing
+ * slashes, then the new target will be an empty string.  This is not a valid
+ * UNIX symlink target, but we store this in the archive anyway since the target
+ * is intended to be de-relativized when the link is extracted.
+ */
 static char *
-unix_fixup_abslink(char *dest, u64 ino, u64 dev)
+unix_relativize_link_target(char *target, u64 ino, u64 dev)
 {
-       char *p = dest;
+       char *p = target;
 
        do {
                char save;
                struct stat stbuf;
                int ret;
 
-               /* Skip non-slashes.  */
-               while (*p && *p != '/')
+               /* Skip slashes (guaranteed to be at least one here)  */
+               do {
                        p++;
+               } while (*p == '/');
+
+               /* End of string?  */
+               if (!*p)
+                       break;
 
-               /* Skip slashes.  */
-               while (*p && *p == '/')
+               /* Skip non-slashes (guaranteed to be at least one here)  */
+               do {
                        p++;
+               } while (*p && *p != '/');
 
-               /* Get inode and device for this prefix.  */
+               /* Get the inode and device numbers for this prefix.  */
                save = *p;
                *p = '\0';
-               ret = stat(dest, &stbuf);
+               ret = stat(target, &stbuf);
                *p = save;
 
                if (ret) {
@@ -238,74 +267,66 @@ unix_fixup_abslink(char *dest, u64 ino, u64 dev)
                if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
                        /* Link points inside directory tree being captured.
                         * Return abbreviated path.  */
-                       *--p = '/';
-                       while (p > dest && *(p - 1) == '/')
-                               p--;
                        return p;
                }
        } while (*p);
 
        /* Link does not point inside directory tree being captured.  */
-       return NULL;
+       return target;
 }
 
-static int
-unix_scan_symlink(struct wim_dentry **root_p, const char *full_path,
-                 int dirfd, const char *relpath,
-                 struct wim_inode *inode, struct add_image_params *params)
+static noinline_for_stack int
+unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
+                 struct wim_inode *inode, struct capture_params *params)
 {
-       char deref_name_buf[4096];
-       ssize_t deref_name_len;
-       char *dest;
+       char orig_target[REPARSE_POINT_MAX_SIZE];
+       char *target = orig_target;
        int ret;
 
-       inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
-       inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
-
-       /* The idea here is to call readlink() to get the UNIX target of the
-        * symbolic link, then turn the target into a reparse point data buffer
-        * that contains a relative or absolute symbolic link. */
-       deref_name_len = my_readlinkat(full_path, dirfd, relpath,
-                                      deref_name_buf, sizeof(deref_name_buf) - 1);
-       if (deref_name_len < 0) {
+       /* Read the UNIX symbolic link target.  */
+       ret = my_readlinkat(full_path, dirfd, relpath, target,
+                           sizeof(orig_target));
+       if (unlikely(ret < 0)) {
                ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
                                 full_path);
                return WIMLIB_ERR_READLINK;
        }
+       if (unlikely(ret >= sizeof(orig_target))) {
+               ERROR("\"%s\": target of symbolic link is too long", full_path);
+               return WIMLIB_ERR_READLINK;
+       }
+       target[ret] = '\0';
 
-       dest = deref_name_buf;
-
-       dest[deref_name_len] = '\0';
-
-       if ((params->add_flags & WIMLIB_ADD_FLAG_RPFIX) &&
-            dest[0] == '/')
-       {
-               dest = unix_fixup_abslink(dest,
-                                         params->capture_root_ino,
-                                         params->capture_root_dev);
-               if (!dest) {
-                       /* RPFIX (reparse point fixup) mode:  Ignore
-                        * absolute symbolic link that points out of the
-                        * tree to be captured.  */
-                       free_dentry(*root_p);
-                       *root_p = NULL;
-                       params->progress.scan.cur_path = full_path;
-                       params->progress.scan.symlink_target = deref_name_buf;
-                       do_capture_progress(params,
-                                           WIMLIB_SCAN_DENTRY_EXCLUDED_SYMLINK,
-                                           NULL);
-                       return 0;
+       /* If the link is absolute and reparse point fixups are enabled, then
+        * change it to be "absolute" relative to the tree being captured.  */
+       if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
+               int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
+
+               params->progress.scan.cur_path = full_path;
+               params->progress.scan.symlink_target = target;
+
+               target = unix_relativize_link_target(target,
+                                                    params->capture_root_ino,
+                                                    params->capture_root_dev);
+               if (target != orig_target) {
+                       /* Link target was fixed.  */
+                       inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
+                       status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
                }
-               inode->i_not_rpfixed = 0;
+               ret = do_capture_progress(params, status, NULL);
+               if (ret)
+                       return ret;
        }
-       ret = wim_inode_set_symlink(inode, dest, params->lookup_table);
+
+       /* Translate the UNIX symlink target into a Windows reparse point.  */
+       ret = wim_inode_set_symlink(inode, target, params->blob_table);
        if (ret)
                return ret;
 
-       /* Unfortunately, Windows seems to have the concept of "file" symbolic
-        * links as being different from "directory" symbolic links...  so
-        * FILE_ATTRIBUTE_DIRECTORY needs to be set on the symbolic link if the
-        * *target* of the symbolic link is a directory.  */
+       /* On Windows, a reparse point can be set on both directory and
+        * non-directory files.  Usually, a link that is intended to point to a
+        * (non-)directory is stored as a reparse point on a (non-)directory
+        * file.  Replicate this behavior by examining the target file.  */
        struct stat stbuf;
        if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
            S_ISDIR(stbuf.st_mode))
@@ -317,7 +338,7 @@ static int
 unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
                                 char *full_path, size_t full_path_len,
                                 int dirfd, const char *relpath,
-                                struct add_image_params *params)
+                                struct capture_params *params)
 {
        struct wim_dentry *tree = NULL;
        struct wim_inode *inode = NULL;
@@ -325,13 +346,11 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
        struct stat stbuf;
        int stat_flags;
 
-       if (should_exclude_path(full_path + params->capture_root_nchars,
-                               full_path_len - params->capture_root_nchars,
-                               params->config))
-       {
-               ret = 0;
+       ret = try_exclude(full_path, params);
+       if (unlikely(ret < 0)) /* Excluded? */
                goto out_progress;
-       }
+       if (unlikely(ret > 0)) /* Error? */
+               goto out;
 
        if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
                                 WIMLIB_ADD_FLAG_ROOT))
@@ -347,50 +366,58 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
                goto out;
        }
 
-       if (unlikely(!S_ISREG(stbuf.st_mode) &&
-                    !S_ISDIR(stbuf.st_mode) &&
-                    !S_ISLNK(stbuf.st_mode)))
-       {
-               if (params->add_flags & WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
+       if (!(params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA)) {
+               if (unlikely(!S_ISREG(stbuf.st_mode) &&
+                            !S_ISDIR(stbuf.st_mode) &&
+                            !S_ISLNK(stbuf.st_mode)))
                {
-                       ERROR("\"%s\": File type is unsupported", full_path);
-                       ret = WIMLIB_ERR_UNSUPPORTED_FILE;
+                       if (params->add_flags &
+                           WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
+                       {
+                               ERROR("\"%s\": File type is unsupported",
+                                     full_path);
+                               ret = WIMLIB_ERR_UNSUPPORTED_FILE;
+                               goto out;
+                       }
+                       params->progress.scan.cur_path = full_path;
+                       ret = do_capture_progress(params,
+                                                 WIMLIB_SCAN_DENTRY_UNSUPPORTED,
+                                                 NULL);
                        goto out;
                }
-               params->progress.scan.cur_path = full_path;
-               do_capture_progress(params, WIMLIB_SCAN_DENTRY_UNSUPPORTED, NULL);
-               ret = 0;
-               goto out;
        }
 
        ret = inode_table_new_dentry(params->inode_table, relpath,
-                                    stbuf.st_ino, stbuf.st_dev,
-                                    S_ISDIR(stbuf.st_mode), &tree);
+                                    stbuf.st_ino, stbuf.st_dev, false, &tree);
        if (ret)
                goto out;
 
        inode = tree->d_inode;
 
-       if (inode->i_nlink > 1) {
-               /* Already seen this inode?  */
-               ret = 0;
+       /* Already seen this inode?  */
+       if (inode->i_nlink > 1)
                goto out_progress;
-       }
 
 #ifdef HAVE_STAT_NANOSECOND_PRECISION
-       inode->i_creation_time = timespec_to_wim_timestamp(stbuf.st_mtim);
-       inode->i_last_write_time = timespec_to_wim_timestamp(stbuf.st_mtim);
-       inode->i_last_access_time = timespec_to_wim_timestamp(stbuf.st_atim);
+       inode->i_creation_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
+       inode->i_last_write_time = timespec_to_wim_timestamp(&stbuf.st_mtim);
+       inode->i_last_access_time = timespec_to_wim_timestamp(&stbuf.st_atim);
 #else
-       inode->i_creation_time = unix_timestamp_to_wim(stbuf.st_mtime);
-       inode->i_last_write_time = unix_timestamp_to_wim(stbuf.st_mtime);
-       inode->i_last_access_time = unix_timestamp_to_wim(stbuf.st_atime);
+       inode->i_creation_time = time_t_to_wim_timestamp(stbuf.st_mtime);
+       inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime);
+       inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime);
 #endif
-       inode->i_resolved = 1;
        if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) {
-               inode->i_unix_data.uid = stbuf.st_uid;
-               inode->i_unix_data.gid = stbuf.st_gid;
-               inode->i_unix_data.mode = stbuf.st_mode;
+               struct wimlib_unix_data unix_data;
+
+               unix_data.uid = stbuf.st_uid;
+               unix_data.gid = stbuf.st_gid;
+               unix_data.mode = stbuf.st_mode;
+               unix_data.rdev = stbuf.st_rdev;
+               if (!inode_set_unix_data(inode, &unix_data, UNIX_DATA_ALL)) {
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out;
+               }
        }
 
        if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
@@ -401,15 +428,13 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
 
        if (S_ISREG(stbuf.st_mode)) {
                ret = unix_scan_regular_file(full_path, stbuf.st_size,
-                                            inode, params->unhashed_streams);
+                                            inode, params->unhashed_blobs);
        } else if (S_ISDIR(stbuf.st_mode)) {
                ret = unix_scan_directory(tree, full_path, full_path_len,
                                          dirfd, relpath, params);
-       } else {
-               ret = unix_scan_symlink(&tree, full_path, dirfd, relpath,
+       } else if (S_ISLNK(stbuf.st_mode)) {
+               ret = unix_scan_symlink(full_path, dirfd, relpath,
                                        inode, params);
-               if (!tree)
-                       goto out;
        }
 
        if (ret)
@@ -418,14 +443,16 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
 out_progress:
        params->progress.scan.cur_path = full_path;
        if (likely(tree))
-               do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
+               ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
        else
-               do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
+               ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
 out:
-       if (likely(ret == 0))
-               *tree_ret = tree;
-       else
-               free_dentry_tree(tree, params->lookup_table);
+       if (unlikely(ret)) {
+               free_dentry_tree(tree, params->blob_table);
+               tree = NULL;
+               ret = report_capture_error(params, ret, full_path);
+       }
+       *tree_ret = tree;
        return ret;
 }
 
@@ -440,7 +467,7 @@ out:
  *
  * @root_disk_path:  The path to the root of the directory tree on disk.
  *
- * @params:     See doc for `struct add_image_params'.
+ * @params:     See doc for `struct capture_params'.
  *
  * @return:    0 on success, nonzero on failure.  It is a failure if any of
  *             the files cannot be `stat'ed, or if any of the needed
@@ -452,7 +479,7 @@ out:
 int
 unix_build_dentry_tree(struct wim_dentry **root_ret,
                       const char *root_disk_path,
-                      struct add_image_params *params)
+                      struct capture_params *params)
 {
        size_t path_len;
        size_t path_bufsz;