Support extracting "long" paths on Windows
authorEric Biggers <ebiggers3@gmail.com>
Mon, 20 May 2013 20:56:02 +0000 (15:56 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 20 May 2013 20:56:02 +0000 (15:56 -0500)
doc/imagex-apply.1.in
doc/imagex-extract.1.in
include/wimlib/apply.h
src/extract.c

index c7ba4c3..96f2e07 100644 (file)
@@ -193,13 +193,21 @@ considered an error condition.
 .IP \[bu]
 Hard links, if supported by the filesystem.
 .PP
-Note: encrypted files will be extracted as raw encrypted data if the filesystem
-does not support encryption.  Compressed files and directories (with the
-compression attribute set) will be extracted as uncompressed if the filesystem
-does not support transparent compression.
-
-Additional note: files with names that cannot be represented on Windows will not
-be extracted by default; see \fB--including-invalid-names\fR.
+Additional notes about extracting files on Windows:
+.IP \[bu] 4
+Encrypted files will be extracted as raw encrypted data if the filesystem
+does not support encryption.
+.IP \[bu]
+Compressed files and directories (with the compression attribute set) will be
+extracted as uncompressed if the filesystem does not support transparent
+compression.
+.IP \[bu]
+Files with names that cannot be represented on Windows will not
+be extracted by default; see \fB--include-invalid-names\fR.
+.IP \[bu]
+Files with full paths over 260 characters (MAX_PATH) are extracted by using the
+\\\\?\\-prefixed path hack.  But beware that such files will be inaccessible to
+most Windows software and may not be able to be deleted easily.
 .SH SPLIT WIMS
 You may use \fB@IMAGEX_PROGNAME@ apply\fR to apply images from a split WIM.  The
 \fIWIMFILE\fR argument is used to specify the first part of the split WIM, and
@@ -293,7 +301,7 @@ we do not have permission to set the desired one.  On UNIX: with
 \fB--unix-data\fR, fail immediately if the UNIX owner, group, or file mode on an
 extracted file cannot be set for any reason.
 .TP
-\fB--including-invalid-names\fR
+\fB--include-invalid-names\fR
 Extract files and directories with invalid names by replacing characters and
 appending a suffix rather than ignoring them.  The meaning of this is
 platform-dependent.
@@ -309,7 +317,7 @@ conditions as well. However, it is not guaranteed, and in particular a WIM
 image captured with \fB@IMAGEX_PROGNAME@\fR on UNIX could contain such files.
 By default, invalid names will be ignored, and if there are multiple names
 differing only in case, one will be chosen to extract arbitrarily; however,
-with \fB--including-invalid-names\fR, all names will be sanitized and
+with \fB--include-invalid-names\fR, all names will be sanitized and
 extracted in some form.
 .SH NOTES
 \fB@IMAGEX_PROGNAME@ apply\fR calculates the SHA1 message digest of every file stream it
index df078f0..cf49abd 100644 (file)
@@ -66,7 +66,7 @@ See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1).
 \fB--strict-acls\fR
 See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1).
 .TP
-\fB--including-invalid-names\fR
+\fB--include-invalid-names\fR
 See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1).
 .TP
 \fB--to-stdout\fR
index ed2f07a..becef7a 100644 (file)
@@ -10,12 +10,27 @@ struct _ntfs_volume;
 
 struct apply_args {
        WIMStruct *w;
+
+       /* Directory to which we're extracting the WIM image or directory tree,
+        * in user-specified form (may be slightly altered) */
        const tchar *target;
        unsigned target_nchars;
-       struct wim_dentry *extract_root;
-       unsigned long invalid_sequence;
+
+#ifdef __WIN32__
+       /* \\?\-prefixed full path to the above directory; needed to work around
+        * lack of default support for long paths on Windoze. */
+       tchar *target_lowlevel_path;
+       unsigned target_lowlevel_path_nchars;
+#endif
+
+       /* Absolute path to the above directory; on UNIX this is simply a path
+        * beginning with /, while on Windoze this will be a path beginning with
+        * a drive letter followed by a backslash, but not with \\?\. */
        tchar *target_realpath;
        unsigned target_realpath_len;
+
+       struct wim_dentry *extract_root;
+       unsigned long invalid_sequence;
        int extract_flags;
        union wimlib_progress_info progress;
        wimlib_progress_func_t progress_func;
@@ -35,6 +50,7 @@ struct apply_args {
                        unsigned vol_flags;
                        unsigned long num_hard_links_failed;
                        unsigned long num_soft_links_failed;
+                       unsigned long num_long_paths;
                        bool have_vol_flags;
                };
        #else
index d4029ef..df14478 100644 (file)
 #  include "config.h"
 #endif
 
+#ifdef __WIN32__
+#  include "wimlib/win32_common.h" /* For GetFullPathName() */
+#endif
+
 #include "wimlib/apply.h"
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
@@ -51,6 +55,8 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
+#define MAX_LONG_PATH_WARNINGS 5
+
 static int
 do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
            int (*apply_dentry_func)(const tchar *, size_t,
@@ -60,8 +66,21 @@ do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
        size_t extraction_path_nchars;
        struct wim_dentry *d;
        LIST_HEAD(ancestor_list);
+       const tchar *target;
+       size_t target_nchars;
 
-       extraction_path_nchars = args->target_nchars;
+#ifdef __WIN32__
+       if (args->target_lowlevel_path) {
+               target = args->target_lowlevel_path;
+               target_nchars = args->target_lowlevel_path_nchars;
+       } else
+#endif
+       {
+               target = args->target;
+               target_nchars = args->target_nchars;
+       }
+
+       extraction_path_nchars = target_nchars;
 
        for (d = dentry; d != args->extract_root; d = d->parent) {
                if (d->not_extracted)
@@ -71,13 +90,30 @@ do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
        }
 
        tchar extraction_path[extraction_path_nchars + 1];
-       p = tmempcpy(extraction_path, args->target, args->target_nchars);
+       p = tmempcpy(extraction_path, target, target_nchars);
+
 
        list_for_each_entry(d, &ancestor_list, tmp_list) {
                *p++ = OS_PREFERRED_PATH_SEPARATOR;
                p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars);
        }
        *p = T('\0');
+
+#ifdef __WIN32__
+       /* Warn the user if the path exceeds MAX_PATH */
+
+       /* + 1 for '\0', -4 for \\?\.  */
+       if (extraction_path_nchars + 1 - 4 > MAX_PATH) {
+               if (dentry->needs_extraction &&
+                   args->num_long_paths < MAX_LONG_PATH_WARNINGS)
+               {
+                       WARNING("Path \"%ls\" exceeds MAX_PATH and will not be accessible "
+                               "to most Windows software", extraction_path);
+                       if (++args->num_long_paths == MAX_LONG_PATH_WARNINGS)
+                               WARNING("Suppressing further warnings about long paths");
+               }
+       }
+#endif
        return (*apply_dentry_func)(extraction_path, extraction_path_nchars,
                                    dentry, args);
 }
@@ -651,11 +687,41 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
 
        memset(&args, 0, sizeof(args));
 
+
        args.w                      = wim;
        args.target                 = target;
+       args.target_nchars          = tstrlen(target);
        args.extract_flags          = extract_flags;
        args.progress_func          = progress_func;
-       args.target_nchars          = tstrlen(target);
+
+#ifdef __WIN32__
+       /* Work around defective behavior in Windows where paths longer than 260
+        * characters are not supported by default; instead they need to be
+        * turned into absolute paths and prefixed with "\\?\".  */
+       args.target_lowlevel_path = MALLOC(32768 * sizeof(wchar_t));
+       if (!args.target_lowlevel_path)
+       {
+               ret = WIMLIB_ERR_NOMEM;
+               goto out;
+       }
+       args.target_lowlevel_path[0] = L'\\';
+       args.target_lowlevel_path[1] = L'\\';
+       args.target_lowlevel_path[2] = L'?';
+       args.target_lowlevel_path[3] = L'\\';
+       args.target_lowlevel_path_nchars =
+               GetFullPathName(args.target, 32768 - 4,
+                               &args.target_lowlevel_path[4], NULL);
+
+       if (args.target_lowlevel_path_nchars == 0 ||
+           args.target_lowlevel_path_nchars >= 32768 - 4)
+       {
+               WARNING("Can't get full path name for \"%ls\"", args.target);
+               FREE(args.target_lowlevel_path);
+               args.target_lowlevel_path = NULL;
+       } else {
+               args.target_lowlevel_path_nchars += 4;
+       }
+#endif
 
        if (progress_func) {
                args.progress.extract.wimfile_name = wim->filename;
@@ -675,7 +741,7 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
                        ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
                                         target);
                        ret = WIMLIB_ERR_NTFS_3G;
-                       goto out;
+                       goto out_free_target_lowlevel_path;
                }
                ops = &ntfs_apply_operations;
        } else
@@ -793,6 +859,10 @@ out_ntfs_umount:
                }
        }
 #endif
+out_free_target_lowlevel_path:
+#ifdef __WIN32__
+       FREE(args.target_lowlevel_path);
+#endif
 out:
        return ret;
 }