From: Eric Biggers Date: Mon, 20 May 2013 18:36:07 +0000 (-0500) Subject: Document handling of invalid filenames X-Git-Tag: v1.4.1~83 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=e322cb658a1257d3a5174301b948a4ccad7b3851 Document handling of invalid filenames --- diff --git a/doc/imagex-apply.1.in b/doc/imagex-apply.1.in index 94ca6f5d..c7ba4c31 100644 --- a/doc/imagex-apply.1.in +++ b/doc/imagex-apply.1.in @@ -197,6 +197,9 @@ Note: encrypted files will be extracted as raw encrypted data if the filesystem does not support encryption. Compressed files and directories (with the compression attribute set) will be extracted as uncompressed if the filesystem does not support transparent compression. + +Additional note: files with names that cannot be represented on Windows will not +be extracted by default; see \fB--including-invalid-names\fR. .SH SPLIT WIMS You may use \fB@IMAGEX_PROGNAME@ apply\fR to apply images from a split WIM. The \fIWIMFILE\fR argument is used to specify the first part of the split WIM, and @@ -289,6 +292,25 @@ with the SACL omitted, then only the default inherited security descriptor, if we do not have permission to set the desired one. On UNIX: with \fB--unix-data\fR, fail immediately if the UNIX owner, group, or file mode on an extracted file cannot be set for any reason. +.TP +\fB--including-invalid-names\fR +Extract files and directories with invalid names by replacing characters and +appending a suffix rather than ignoring them. The meaning of this is +platform-dependent. +.IP "" 6 +On UNIX, filenames are case-sensitive and may contain any byte except '\\0' and +\'/', so on UNIX this option will only have an effect in the unlikely case that +the WIM image for some reason has a filename containing one of these characters. +.IP "" 6 +On Windows, filenames are case-insensitive, cannot include the characters '/', +\'\\0', '\\', ':', '*', '?', '"', '<', '>', or '|', and cannot end with a +space or period. Ordinarily, files in WIM images should meet these +conditions as well. However, it is not guaranteed, and in particular a WIM +image captured with \fB@IMAGEX_PROGNAME@\fR on UNIX could contain such files. +By default, invalid names will be ignored, and if there are multiple names +differing only in case, one will be chosen to extract arbitrarily; however, +with \fB--including-invalid-names\fR, all names will be sanitized and +extracted in some form. .SH NOTES \fB@IMAGEX_PROGNAME@ apply\fR calculates the SHA1 message digest of every file stream it extracts and verifies that it is the same as the SHA1 message digest provided in diff --git a/doc/imagex-capture.1.in b/doc/imagex-capture.1.in index 943f1f4d..a7bf2fe6 100644 --- a/doc/imagex-capture.1.in +++ b/doc/imagex-capture.1.in @@ -245,10 +245,10 @@ absolute symbolic links that point outside the directory tree being captured will be ignored and not be captured at all. When disabled (\fB--norpfix\fR), absolute symbolic links will be captured exactly as is. .IP "" -The default behavior for \fBimagex capture\fR is equivalent to \fB--rpfix\fR. -The default behavior for \fBimagex append\fR will be \fB--rpfix\fR if reparse -point fixups have previously been done on \fIWIMFILE\fR, otherwise -\fB--norpfix\fR. +The default behavior for \fB@IMAGEX_PROGNAME@ capture\fR is equivalent to +\fB--rpfix\fR. The default behavior for \fB@IMAGEX_PROGNAME@ append\fR will be +\fB--rpfix\fR if reparse point fixups have previously been done on +\fIWIMFILE\fR, otherwise \fB--norpfix\fR. .IP "" In the case of a multi-source capture, (\fB--source-list\fR specified), passing \fB--norpfix\fR is recommended. Otherwise, reparse point fixups will be @@ -323,6 +323,14 @@ itself. However, additional symbolic links in subdirectories, or in additional source directories not destined for the WIM image root (with \fB--source-list\fR), are not dereferenced unless \fB--dereference\fR is specified. +.PP +With the UNIX version of \fB@IMAGEX_PROGNAME@\fR, it is possible to create a WIM +image containing files with names differing only in case, or files with names +containing the characters ':', '*', '?', '"', '<', '>', '|', or '\\', which are +valid on POSIX-compliant filesystems but not Windows. Be warned that such files +will not be extracted by default by the Windows version of +\fB@IMAGEX_PROGNAME@\fR, and Microsoft's ImageX might get confused by such +names. .SH EXAMPLES Create a new WIM 'mywim.wim' from the directory 'somedir', using LZX compression and including an integrity table: diff --git a/doc/imagex-extract.1.in b/doc/imagex-extract.1.in index 98e609c6..df078f01 100644 --- a/doc/imagex-extract.1.in +++ b/doc/imagex-extract.1.in @@ -32,7 +32,9 @@ Each \fIPATH\fR specifies a file or directory tree within the WIM image to extract. Each path must be specified as an absolute path starting from the root of the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1) command. Path separators may be forward slashes on UNIX, or either forward -slashes or backward slashes on Windows. The leading slash is optional. +slashes or backward slashes on Windows. The leading slash is optional. On +Windows, the paths are treated case-insensitively, while on UNIX, paths are +treated case-sensitively. .PP If no \fIPATH\fRs are provided, the default behavior is to extract the full image, as if the path "/" had been provided. @@ -64,6 +66,9 @@ See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1). \fB--strict-acls\fR See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1). .TP +\fB--including-invalid-names\fR +See the documentation for this option in \fB@IMAGEX_PROGNAME@-apply\fR (1). +.TP \fB--to-stdout\fR Extract the files to standard output instead of to the filesystem. This can only be provided if all the specified \fIPATH\fRs are to regular files (not diff --git a/doc/imagex-update.1.in b/doc/imagex-update.1.in index 9baafe27..35c2e46b 100644 --- a/doc/imagex-update.1.in +++ b/doc/imagex-update.1.in @@ -139,7 +139,9 @@ subdirectory named "Public" in this directory must be specified as .PP All paths to files or directories within the WIM image must be specified relative to the root of the image. However, the leading slash is optional, and -both forward slashes and backslashes are accepted. +both forward slashes and backslashes are accepted. On Windows, the paths are +treated case-insensitively, while on UNIX, the paths are treated +case-sensitively. .PP The command file (\fICMDFILE\fR) is parsed by \fB@IMAGEX_PROGNAME@ update\fR itself and not by the system shell. Therefore, its syntax is limited. However, diff --git a/doc/imagex.1.in b/doc/imagex.1.in index a9ef7b14..02def4b2 100644 --- a/doc/imagex.1.in +++ b/doc/imagex.1.in @@ -156,6 +156,12 @@ character encodings. .PP On UNIX, wimlib works primarily in the locale-dependent multibyte encoding, which you are strongly recommended to set to UTF-8 to avoid any problems. +.SH CASE SENSITIVITY +The case sensitivity of \fB@IMAGEX_PROGNAME@\fR differs somewhat between UNIX +and Windows. \fB@IMAGEX_PROGNAME@\fR internally treats filenames as +case-sensitive, but on Windows it will treat paths actually provided by the user +as case-insensitive in order to get the "expected" behavior. Otherwise, options +and non-path arguments should be specified in lower case. .SH WARNING Note: \fBwimlib\fR and \fB@IMAGEX_PROGNAME@\fR are experimental. Use Microsoft's imagex.exe if you have to make sure your WIM files are made "correctly". Feel diff --git a/programs/imagex.c b/programs/imagex.c index 9ee6df34..297c65fb 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -104,7 +104,7 @@ IMAGEX_PROGNAME" apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n" " (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n" " [--symlink] [--verbose] [--ref=\"GLOB\"] [--unix-data]\n" " [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n" -" [--force-all-files]\n" +" [--including-invalid-names]\n" ), [CAPTURE] = T( @@ -135,7 +135,7 @@ T( IMAGEX_PROGNAME" extract WIMFILE (IMAGE_NUM | IMAGE_NAME) [PATH...]\n" " [--check] [--ref=\"GLOB\"] [--verbose] [--unix-data]\n" " [--no-acls] [--strict-acls] [--to-stdout] [--dest-dir=DIR]\n" -" [--force-all-files]\n" +" [--including-invalid-names]\n" ), [INFO] = T( @@ -208,9 +208,9 @@ enum { IMAGEX_EXTRACT_XML_OPTION, IMAGEX_FLAGS_OPTION, IMAGEX_FORCE_OPTION, - IMAGEX_FORCE_ALL_FILES_OPTION, IMAGEX_HARDLINK_OPTION, IMAGEX_HEADER_OPTION, + IMAGEX_INCLUDING_INVALID_NAMES_OPTION, IMAGEX_LAZY_OPTION, IMAGEX_LOOKUP_TABLE_OPTION, IMAGEX_METADATA_OPTION, @@ -246,7 +246,7 @@ static const struct option apply_options[] = { {T("strict-acls"), no_argument, NULL, IMAGEX_STRICT_ACLS_OPTION}, {T("rpfix"), no_argument, NULL, IMAGEX_RPFIX_OPTION}, {T("norpfix"), no_argument, NULL, IMAGEX_NORPFIX_OPTION}, - {T("force-all-files"), no_argument, NULL, IMAGEX_FORCE_ALL_FILES_OPTION}, + {T("including-invalid-names"), no_argument, NULL, IMAGEX_INCLUDING_INVALID_NAMES_OPTION}, {NULL, 0, NULL, 0}, }; static const struct option capture_or_append_options[] = { @@ -294,7 +294,7 @@ static const struct option extract_options[] = { {T("strict-acls"), no_argument, NULL, IMAGEX_STRICT_ACLS_OPTION}, {T("dest-dir"), required_argument, NULL, IMAGEX_DEST_DIR_OPTION}, {T("to-stdout"), no_argument, NULL, IMAGEX_TO_STDOUT_OPTION}, - {T("force-all-files"), no_argument, NULL, IMAGEX_FORCE_ALL_FILES_OPTION}, + {T("including-invalid-names"), no_argument, NULL, IMAGEX_INCLUDING_INVALID_NAMES_OPTION}, {NULL, 0, NULL, 0}, }; @@ -1533,7 +1533,7 @@ imagex_apply(int argc, tchar **argv) case IMAGEX_RPFIX_OPTION: extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX; break; - case IMAGEX_FORCE_ALL_FILES_OPTION: + case IMAGEX_INCLUDING_INVALID_NAMES_OPTION: extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES; extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS; break; @@ -2268,7 +2268,7 @@ imagex_extract(int argc, tchar **argv) extract_flags |= WIMLIB_EXTRACT_FLAG_TO_STDOUT; imagex_be_quiet = true; break; - case IMAGEX_FORCE_ALL_FILES_OPTION: + case IMAGEX_INCLUDING_INVALID_NAMES_OPTION: extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES; extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS; break;