add System Compression support
[wimlib] / src / unix_capture.c
index a61d91f..2843c36 100644 (file)
@@ -103,35 +103,33 @@ static int
 unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode,
                       struct list_head *unhashed_blobs)
 {
-       struct blob_descriptor *blob;
+       struct blob_descriptor *blob = NULL;
        struct wim_inode_stream *strm;
 
        inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
 
        if (size) {
-               char *file_on_disk = STRDUP(path);
-               if (!file_on_disk)
-                       return WIMLIB_ERR_NOMEM;
                blob = new_blob_descriptor();
-               if (!blob) {
-                       FREE(file_on_disk);
-                       return WIMLIB_ERR_NOMEM;
-               }
-               blob->file_on_disk = file_on_disk;
-               blob->file_inode = inode;
+               if (unlikely(!blob))
+                       goto err_nomem;
+               blob->file_on_disk = STRDUP(path);
+               if (unlikely(!blob->file_on_disk))
+                       goto err_nomem;
                blob->blob_location = BLOB_IN_FILE_ON_DISK;
                blob->size = size;
-       } else {
-               blob = NULL;
+               blob->file_inode = inode;
        }
 
        strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob);
-       if (!strm) {
-               free_blob_descriptor(blob);
-               return WIMLIB_ERR_NOMEM;
-       }
+       if (unlikely(!strm))
+               goto err_nomem;
+
        prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
        return 0;
+
+err_nomem:
+       free_blob_descriptor(blob);
+       return WIMLIB_ERR_NOMEM;
 }
 
 static int
@@ -182,13 +180,12 @@ unix_scan_directory(struct wim_dentry *dir_dentry,
                        break;
                }
 
-               if (entry->d_name[0] == '.' &&
-                   (entry->d_name[1] == '\0' ||
-                    (entry->d_name[1] == '.' && entry->d_name[2] == '\0')))
+               name_len = strlen(entry->d_name);
+
+               if (should_ignore_filename(entry->d_name, name_len))
                        continue;
 
                full_path[full_path_len] = '/';
-               name_len = strlen(entry->d_name);
                memcpy(&full_path[full_path_len + 1], entry->d_name, name_len + 1);
                ret = unix_build_dentry_tree_recursive(&child,
                                                       full_path,
@@ -199,39 +196,66 @@ unix_scan_directory(struct wim_dentry *dir_dentry,
                full_path[full_path_len] = '\0';
                if (ret)
                        break;
-               if (child)
-                       dentry_add_child(dir_dentry, child);
+               attach_scanned_tree(dir_dentry, child, params->blob_table);
        }
        closedir(dir);
        return ret;
 }
 
-/* Given an absolute symbolic link target @dest (UNIX-style, beginning
- * with '/'), determine whether it points into the directory specified by
- * @ino and @dev.  If so, return the target modified to be "absolute"
- * relative to this directory.  Otherwise, return NULL.  */
+/*
+ * Given an absolute symbolic link target (UNIX-style, beginning with '/'),
+ * determine whether it points into the directory identified by @ino and @dev.
+ * If yes, return the suffix of @target which is relative to this directory, but
+ * retaining leading slashes.  If no, return @target.
+ *
+ * Here are some examples, assuming that the @ino/@dev directory is "/home/e":
+ *
+ *     Original target         New target
+ *     ---------------         ----------
+ *     /home/e/test            /test
+ *     /home/e/test/           /test/
+ *     //home//e//test//       //test//
+ *     /home/e                                         (empty string)
+ *     /home/e/                /
+ *     /usr/lib                /usr/lib                (external link)
+ *
+ * Because of the possibility of other links into the @ino/@dev directory and/or
+ * multiple path separators, we can't simply do a string comparison; instead we
+ * need to stat() each ancestor directory.
+ *
+ * If the link points directly to the @ino/@dev directory with no trailing
+ * slashes, then the new target will be an empty string.  This is not a valid
+ * UNIX symlink target, but we store this in the archive anyway since the target
+ * is intended to be de-relativized when the link is extracted.
+ */
 static char *
-unix_fixup_abslink(char *dest, u64 ino, u64 dev)
+unix_relativize_link_target(char *target, u64 ino, u64 dev)
 {
-       char *p = dest;
+       char *p = target;
 
        do {
                char save;
                struct stat stbuf;
                int ret;
 
-               /* Skip non-slashes.  */
-               while (*p && *p != '/')
+               /* Skip slashes (guaranteed to be at least one here)  */
+               do {
                        p++;
+               } while (*p == '/');
+
+               /* End of string?  */
+               if (!*p)
+                       break;
 
-               /* Skip slashes.  */
-               while (*p && *p == '/')
+               /* Skip non-slashes (guaranteed to be at least one here)  */
+               do {
                        p++;
+               } while (*p && *p != '/');
 
-               /* Get inode and device for this prefix.  */
+               /* Get the inode and device numbers for this prefix.  */
                save = *p;
                *p = '\0';
-               ret = stat(dest, &stbuf);
+               ret = stat(target, &stbuf);
                *p = save;
 
                if (ret) {
@@ -243,83 +267,66 @@ unix_fixup_abslink(char *dest, u64 ino, u64 dev)
                if (stbuf.st_ino == ino && stbuf.st_dev == dev) {
                        /* Link points inside directory tree being captured.
                         * Return abbreviated path.  */
-                       *--p = '/';
-                       while (p > dest && *(p - 1) == '/')
-                               p--;
                        return p;
                }
        } while (*p);
 
        /* Link does not point inside directory tree being captured.  */
-       return NULL;
+       return target;
 }
 
-static int
+static noinline_for_stack int
 unix_scan_symlink(const char *full_path, int dirfd, const char *relpath,
                  struct wim_inode *inode, struct capture_params *params)
 {
-       char deref_name_buf[4096];
-       ssize_t deref_name_len;
-       char *dest;
+       char orig_target[REPARSE_POINT_MAX_SIZE];
+       char *target = orig_target;
        int ret;
 
-       inode->i_attributes = FILE_ATTRIBUTE_REPARSE_POINT;
-       inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
-
-       /* The idea here is to call readlink() to get the UNIX target of the
-        * symbolic link, then turn the target into a reparse point data buffer
-        * that contains a relative or absolute symbolic link. */
-       deref_name_len = my_readlinkat(full_path, dirfd, relpath,
-                                      deref_name_buf, sizeof(deref_name_buf) - 1);
-       if (deref_name_len < 0) {
+       /* Read the UNIX symbolic link target.  */
+       ret = my_readlinkat(full_path, dirfd, relpath, target,
+                           sizeof(orig_target));
+       if (unlikely(ret < 0)) {
                ERROR_WITH_ERRNO("\"%s\": Can't read target of symbolic link",
                                 full_path);
                return WIMLIB_ERR_READLINK;
        }
+       if (unlikely(ret >= sizeof(orig_target))) {
+               ERROR("\"%s\": target of symbolic link is too long", full_path);
+               return WIMLIB_ERR_READLINK;
+       }
+       target[ret] = '\0';
 
-       dest = deref_name_buf;
-
-       dest[deref_name_len] = '\0';
-
-       if ((params->add_flags & WIMLIB_ADD_FLAG_RPFIX) &&
-            dest[0] == '/')
-       {
-               char *fixed_dest;
+       /* If the link is absolute and reparse point fixups are enabled, then
+        * change it to be "absolute" relative to the tree being captured.  */
+       if (target[0] == '/' && (params->add_flags & WIMLIB_ADD_FLAG_RPFIX)) {
+               int status = WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK;
 
-               /* RPFIX (reparse point fixup) mode:  Change target of absolute
-                * symbolic link to be "absolute" relative to the tree being
-                * captured.  */
-               fixed_dest = unix_fixup_abslink(dest,
-                                               params->capture_root_ino,
-                                               params->capture_root_dev);
                params->progress.scan.cur_path = full_path;
-               params->progress.scan.symlink_target = deref_name_buf;
-               if (fixed_dest) {
-                       /* Link points inside the tree being captured, so it was
-                        * fixed.  */
-                       inode->i_not_rpfixed = 0;
-                       dest = fixed_dest;
-                       ret = do_capture_progress(params,
-                                                 WIMLIB_SCAN_DENTRY_FIXED_SYMLINK,
-                                                 NULL);
-               } else {
-                       /* Link points outside the tree being captured, so it
-                        * was not fixed.  */
-                       ret = do_capture_progress(params,
-                                                 WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK,
-                                                 NULL);
+               params->progress.scan.symlink_target = target;
+
+               target = unix_relativize_link_target(target,
+                                                    params->capture_root_ino,
+                                                    params->capture_root_dev);
+               if (target != orig_target) {
+                       /* Link target was fixed.  */
+                       inode->i_rp_flags &= ~WIM_RP_FLAG_NOT_FIXED;
+                       status = WIMLIB_SCAN_DENTRY_FIXED_SYMLINK;
                }
+               ret = do_capture_progress(params, status, NULL);
                if (ret)
                        return ret;
        }
-       ret = wim_inode_set_symlink(inode, dest, params->blob_table);
+
+       /* Translate the UNIX symlink target into a Windows reparse point.  */
+       ret = wim_inode_set_symlink(inode, target, params->blob_table);
        if (ret)
                return ret;
 
-       /* Unfortunately, Windows seems to have the concept of "file" symbolic
-        * links as being different from "directory" symbolic links...  so
-        * FILE_ATTRIBUTE_DIRECTORY needs to be set on the symbolic link if the
-        * *target* of the symbolic link is a directory.  */
+       /* On Windows, a reparse point can be set on both directory and
+        * non-directory files.  Usually, a link that is intended to point to a
+        * (non-)directory is stored as a reparse point on a (non-)directory
+        * file.  Replicate this behavior by examining the target file.  */
        struct stat stbuf;
        if (my_fstatat(full_path, dirfd, relpath, &stbuf, 0) == 0 &&
            S_ISDIR(stbuf.st_mode))
@@ -339,10 +346,10 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
        struct stat stbuf;
        int stat_flags;
 
-       ret = try_exclude(full_path, full_path_len, params);
-       if (ret < 0) /* Excluded? */
+       ret = try_exclude(full_path, params);
+       if (unlikely(ret < 0)) /* Excluded? */
                goto out_progress;
-       if (ret > 0) /* Error? */
+       if (unlikely(ret > 0)) /* Error? */
                goto out;
 
        if (params->add_flags & (WIMLIB_ADD_FLAG_DEREFERENCE |
@@ -381,8 +388,7 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret,
        }
 
        ret = inode_table_new_dentry(params->inode_table, relpath,
-                                    stbuf.st_ino, stbuf.st_dev,
-                                    S_ISDIR(stbuf.st_mode), &tree);
+                                    stbuf.st_ino, stbuf.st_dev, false, &tree);
        if (ret)
                goto out;