*/
/*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
*
* This file is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* - wimlib does not allow *directory* hard links, so a WIM image really does
* have a *tree* of dentries (and not an arbitrary graph of dentries).
*
- * - wimlib indexes dentries both case-insensitively and case-sensitively,
- * allowing either behavior to be used for path lookup.
+ * - wimlib supports both case-sensitive and case-insensitive path lookups.
+ * The implementation uses a single in-memory index per directory, using a
+ * collation order like that used by NTFS; see collate_dentry_names().
*
* - Multiple dentries in a directory might have the same case-insensitive
* name. But wimlib enforces that at most one dentry in a directory can have
* resource for a WIM image. */
struct wim_dentry_on_disk {
- /* Length of this directory entry in bytes, not including any alternate
- * data stream entries. Should be a multiple of 8 so that the following
- * dentry or alternate data stream entry is aligned on an 8-byte
- * boundary. (If not, wimlib will round it up.) It must be at least as
- * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE),
- * plus the lengths of the file name and/or short name if present.
+ /* Length of this directory entry in bytes, not including any extra
+ * stream entries. Should be a multiple of 8 so that the following
+ * dentry or extra stream entry is aligned on an 8-byte boundary. (If
+ * not, wimlib will round it up.) It must be at least as long as the
+ * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the
+ * lengths of the file name and/or short name if present, plus the size
+ * of any "extra" data.
*
- * It is also possible for this field to be 0. This situation, which is
- * undocumented, indicates the end of a list of sibling nodes in a
- * directory. It also means the real length is 8, because the dentry
- * included only the length field, but that takes up 8 bytes. */
+ * It is also possible for this field to be 0. This case indicates the
+ * end of a list of sibling entries in a directory. It also means the
+ * real length is 8, because the dentry included only the length field,
+ * but that takes up 8 bytes. */
le64 length;
- /* Attributes of the file or directory. This is a bitwise OR of the
- * FILE_ATTRIBUTE_* constants and should correspond to the value
+ /* File attributes for the file or directory. This is a bitwise OR of
+ * the FILE_ATTRIBUTE_* constants and should correspond to the value
* retrieved by GetFileAttributes() on Windows. */
le32 attributes;
/* A value that specifies the security descriptor for this file or
- * directory. If -1, the file or directory has no security descriptor.
- * Otherwise, it is a 0-based index into the WIM image's table of
- * security descriptors (see: `struct wim_security_data') */
- sle32 security_id;
+ * directory. If 0xFFFFFFFF, the file or directory has no security
+ * descriptor. Otherwise, it is a 0-based index into the WIM image's
+ * table of security descriptors (see: `struct wim_security_data') */
+ le32 security_id;
/* Offset, in bytes, from the start of the uncompressed metadata
* resource of this directory's child directory entries, or 0 if this
le64 last_access_time;
le64 last_write_time;
- /* Vaguely, the SHA-1 message digest ("hash") of the file's contents.
- * More specifically, this is for the "unnamed data stream" rather than
- * any "alternate data streams". This hash value is used to look up the
- * corresponding entry in the WIM's stream lookup table to actually find
- * the file contents within the WIM.
+ /*
+ * Usually this is the SHA-1 message digest of the file's "contents"
+ * (the unnamed data stream).
*
- * If the file has no unnamed data stream (e.g. is a directory), then
- * this field will be all zeroes. If the unnamed data stream is empty
- * (i.e. an "empty file"), then this field is also expected to be all
- * zeroes. (It will be if wimlib created the WIM image, at least;
- * otherwise it can't be ruled out that the SHA-1 message digest of 0
- * bytes of data is given explicitly.)
+ * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is
+ * instead usually the SHA-1 message digest of the uncompressed reparse
+ * point data.
*
- * If the file has reparse data, then this field will instead specify
- * the SHA-1 message digest of the reparse data. If it is somehow
- * possible for a file to have both an unnamed data stream and reparse
- * data, then this is not handled by wimlib.
- *
- * As a further special case, if this field is all zeroes but there is
- * an alternate data stream entry with no name and a nonzero SHA-1
- * message digest field, then that hash must be used instead of this
- * one. In fact, when named data streams are present, some versions of
- * Windows PE contain a bug where they only look in the alternate data
- * stream entries for the unnamed data stream, not here.
+ * However, there are some special rules that need to be applied to
+ * interpret this field correctly when extra stream entries are present.
+ * See the code for details.
*/
- u8 unnamed_stream_hash[SHA1_HASH_SIZE];
+ u8 default_hash[SHA1_HASH_SIZE];
+
+ /* Unknown field (maybe accidental padding) */
+ le32 unknown_0x54;
- /* The format of the following data is not yet completely known and they
- * do not correspond to Microsoft's documentation.
+ /*
+ * The following 8-byte union contains either information about the
+ * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or
+ * the "hard link group ID" (for other files).
+ *
+ * The reparse point information contains ReparseTag and ReparseReserved
+ * from the header of the reparse point buffer. It also contains a flag
+ * that indicates whether a reparse point fixup (for the target of an
+ * absolute symbolic link or junction) was done or not.
*
- * If this directory entry is for a reparse point (has
- * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the
- * version of the following fields containing the reparse tag is valid.
- * Furthermore, the field notated as not_rpfixed, as far as I can tell,
- * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
- * targets of absolute symbolic links) were *not* done, and otherwise 0.
+ * The "hard link group ID" is like an inode number; all dentries for
+ * the same inode share the same value. See inode_fixup.c for more
+ * information.
*
- * If this directory entry is not for a reparse point, then the version
- * of the following fields containing the hard_link_group_id is valid.
- * All MS says about this field is that "If this file is part of a hard
- * link set, all the directory entries in the set will share the same
- * value in this field.". However, more specifically I have observed
- * the following:
- * - If the file is part of a hard link set of size 1, then the
- * hard_link_group_id should be set to either 0, which is treated
- * specially as indicating "not hardlinked", or any unique value.
- * - The specific nonzero values used to identity hard link sets do
- * not matter, as long as they are unique.
- * - However, due to bugs in Microsoft's software, it is actually NOT
- * guaranteed that directory entries that share the same hard link
- * group ID are actually hard linked to each either. See
- * inode_fixup.c for the code that handles this.
+ * Note that this union creates the limitation that reparse point files
+ * cannot have multiple names (hard links).
*/
union {
struct {
- le32 rp_unknown_1;
le32 reparse_tag;
- le16 rp_unknown_2;
- le16 not_rpfixed;
+ le16 rp_reserved;
+ le16 rp_flags;
} _packed_attribute reparse;
struct {
- le32 rp_unknown_1;
le64 hard_link_group_id;
} _packed_attribute nonreparse;
};
- /* Number of alternate data stream entries that directly follow this
- * dentry on-disk. */
- le16 num_alternate_data_streams;
+ /* Number of extra stream entries that directly follow this dentry
+ * on-disk. */
+ le16 num_extra_streams;
/* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
* encoded short name (8.3 DOS-compatible name), excluding the null
* encoded "long" name, excluding the null terminator. If zero, then
* this file has no long name. The root dentry should not have a long
* name, but all other dentries in the image should have long names. */
- le16 file_name_nbytes;
+ le16 name_nbytes;
/* Beginning of optional, variable-length fields */
- /* If file_name_nbytes != 0, the next field will be the UTF-16LE encoded
- * long file name. This will be null-terminated, so the size of this
- * field will really be file_name_nbytes + 2. */
- /*utf16lechar file_name[];*/
+ /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long
+ * name. This will be null-terminated, so the size of this field will
+ * really be name_nbytes + 2. */
+ /*utf16lechar name[];*/
/* If short_name_nbytes != 0, the next field will be the UTF-16LE
* encoded short name. This will be null-terminated, so the size of
/* u8 tagged_items[] _aligned_attribute(8); */
} _packed_attribute;
- /* If num_alternate_data_streams != 0, then there are that many
- * alternate data stream entries following the dentry, on an 8-byte
- * aligned boundary. They are not counted in the 'length' field of the
- * dentry. */
+ /* If num_extra_streams != 0, then there are that many extra stream
+ * entries following the dentry, starting on the next 8-byte aligned
+ * boundary. They are not counted in the 'length' field of the dentry.
+ */
-/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
- * that has names of the specified lengths. (Zero length means the
- * corresponding name actually does not exist.) The returned value excludes
- * tagged metadata items as well as any alternate data stream entries that may
- * need to follow the dentry. */
-static u64
-dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
-{
- u64 length = sizeof(struct wim_dentry_on_disk);
- if (file_name_nbytes)
- length += (u32)file_name_nbytes + 2;
- if (short_name_nbytes)
- length += (u32)short_name_nbytes + 2;
- return length;
-}
+/* On-disk format of an extra stream entry. This represents an extra NTFS-style
+ * "stream" associated with the file, such as a named data stream. */
+struct wim_extra_stream_entry_on_disk {
+
+ /* Length of this extra stream entry, in bytes. This includes all
+ * fixed-length fields, plus the name and null terminator if present,
+ * and any needed padding such that the length is a multiple of 8. */
+ le64 length;
+
+ /* Reserved field */
+ le64 reserved;
+
+ /* SHA-1 message digest of this stream's uncompressed data, or all
+ * zeroes if this stream's data is of zero length. */
+ u8 hash[SHA1_HASH_SIZE];
+
+ /* Length of this stream's name, in bytes and excluding the null
+ * terminator; or 0 if this stream is unnamed. */
+ le16 name_nbytes;
+
+ /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding
+ * the null terminator. There is a null terminator character if
+ * @name_nbytes != 0; i.e., if this stream is named. */
+ utf16lechar name[];
+} _packed_attribute;
static void
-do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name,
- size_t file_name_nbytes)
+do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name,
+ size_t name_nbytes)
{
- FREE(dentry->file_name);
- dentry->file_name = file_name;
- dentry->file_name_nbytes = file_name_nbytes;
+ FREE(dentry->d_name);
+ dentry->d_name = name;
+ dentry->d_name_nbytes = name_nbytes;
if (dentry_has_short_name(dentry)) {
- FREE(dentry->short_name);
- dentry->short_name = NULL;
- dentry->short_name_nbytes = 0;
+ FREE(dentry->d_short_name);
+ dentry->d_short_name = NULL;
+ dentry->d_short_name_nbytes = 0;
}
}
return 0;
}
-/* Return the length, in bytes, required for the specified alternate data stream
- * (ADS) entry on-disk. This accounts for the fixed-length portion of the ADS
- * entry, the {stream name and its null terminator} if present, and the padding
- * after the entry to align the next ADS entry or dentry on an 8-byte boundary
- * in the uncompressed metadata resource buffer. */
-static u64
-ads_entry_out_total_length(const struct wim_ads_entry *entry)
+/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
+ * that has names of the specified lengths. (Zero length means the
+ * corresponding name actually does not exist.) The returned value excludes
+ * tagged metadata items as well as any extra stream entries that may need to
+ * follow the dentry. */
+static size_t
+dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes)
{
- u64 len = sizeof(struct wim_ads_entry_on_disk);
- if (entry->stream_name_nbytes)
- len += (u32)entry->stream_name_nbytes + 2;
- return (len + 7) & ~7;
+ size_t length = sizeof(struct wim_dentry_on_disk);
+ if (name_nbytes)
+ length += (u32)name_nbytes + 2;
+ if (short_name_nbytes)
+ length += (u32)short_name_nbytes + 2;
+ return length;
}
-/*
- * Determine whether to include a "dummy" stream when writing a WIM dentry.
- *
- * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0,
- * for example) contain a bug where they assume the first alternate data stream
- * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed
- * stream, even if it has a name and the unnamed stream is already specified in
- * the hash field of the dentry itself.
- *
- * wimlib has to work around this behavior by carefully emulating the behavior
- * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference
- * into the alternate stream entries whenever there are named data streams, even
- * though there is already a field in the dentry itself for the unnamed stream
- * reference, which then goes to waste.
- */
-static inline bool
-inode_needs_dummy_stream(const struct wim_inode *inode)
+
+/* Return the length, in bytes, required for the specified stream on-disk, when
+ * represented as an extra stream entry. */
+static size_t
+stream_out_total_length(const struct wim_inode_stream *strm)
{
- return (inode->i_num_ads > 0 &&
- inode->i_num_ads < 0xffff && /* overflow check */
- inode->i_canonical_streams); /* assume the dentry is okay if it
- already had an unnamed ADS entry
- when it was read in */
+ /* Account for the fixed length portion */
+ size_t len = sizeof(struct wim_extra_stream_entry_on_disk);
+
+ /* For named streams, account for the variable-length name. */
+ if (stream_is_named(strm))
+ len += utf16le_len_bytes(strm->stream_name) + 2;
+
+ /* Account for any necessary padding to the next 8-byte boundary. */
+ return ALIGN(len, 8);
}
-/* Calculate the total number of bytes that will be consumed when a dentry is
+/*
+ * Calculate the total number of bytes that will be consumed when a dentry is
* written. This includes the fixed-length portion of the dentry, the name
- * fields, any tagged metadata items, and any alternate data stream entries.
- * Also includes all alignment bytes. */
-u64
+ * fields, any tagged metadata items, and any extra stream entries. This also
+ * includes all alignment bytes.
+ */
+size_t
dentry_out_total_length(const struct wim_dentry *dentry)
{
const struct wim_inode *inode = dentry->d_inode;
- u64 len;
-
- len = dentry_min_len_with_names(dentry->file_name_nbytes,
- dentry->short_name_nbytes);
- len = (len + 7) & ~7;
-
- if (inode->i_extra_size) {
- len += inode->i_extra_size;
- len = (len + 7) & ~7;
- }
-
- if (unlikely(inode->i_num_ads)) {
- if (inode_needs_dummy_stream(inode))
- len += ads_entry_out_total_length(&(struct wim_ads_entry){});
+ size_t len;
+
+ len = dentry_min_len_with_names(dentry->d_name_nbytes,
+ dentry->d_short_name_nbytes);
+ len = ALIGN(len, 8);
+
+ if (inode->i_extra)
+ len += ALIGN(inode->i_extra->size, 8);
+
+ if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
+ /*
+ * Extra stream entries:
+ *
+ * - Use one extra stream entry for each named data stream
+ * - Use one extra stream entry for the unnamed data stream when there is either:
+ * - a reparse point stream
+ * - at least one named data stream (for Windows PE bug workaround)
+ * - Use one extra stream entry for the reparse point stream if there is one
+ */
+ bool have_named_data_stream = false;
+ bool have_reparse_point_stream = false;
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ const struct wim_inode_stream *strm = &inode->i_streams[i];
+ if (stream_is_named_data_stream(strm)) {
+ len += stream_out_total_length(strm);
+ have_named_data_stream = true;
+ } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
+ wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
+ have_reparse_point_stream = true;
+ }
+ }
- for (u16 i = 0; i < inode->i_num_ads; i++)
- len += ads_entry_out_total_length(&inode->i_ads_entries[i]);
+ if (have_named_data_stream || have_reparse_point_stream) {
+ if (have_reparse_point_stream)
+ len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
+ len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
+ }
}
return len;
* @arg will be passed as the second argument to each invocation of @visitor.
*
* This function does a pre-order traversal --- that is, a parent will be
- * visited before its children. It also will visit siblings in order of
- * case-sensitive filename. Equivalently, this function visits the entire tree
- * in the case-sensitive lexicographic order of the full paths.
+ * visited before its children. Furthermore, siblings will be visited in their
+ * collation order.
*
* It is safe to pass NULL for @root, which means that the dentry tree is empty.
* In this case, this function does nothing.
/*
* Calculate the full path to @dentry within the WIM image, if not already done.
*
- * The full name will be saved in the cached value 'dentry->_full_path'.
+ * The full name will be saved in the cached value 'dentry->d_full_path'.
*
* Whenever possible, use dentry_full_path() instead of calling this and
- * accessing _full_path directly.
+ * accessing d_full_path directly.
*
* Returns 0 or an error code resulting from a failed string conversion.
*/
calculate_dentry_full_path(struct wim_dentry *dentry)
{
size_t ulen;
- size_t dummy;
const struct wim_dentry *d;
- if (dentry->_full_path)
+ if (dentry->d_full_path)
return 0;
ulen = 0;
d = dentry;
do {
- ulen += d->file_name_nbytes / sizeof(utf16lechar);
+ ulen += d->d_name_nbytes / sizeof(utf16lechar);
ulen++;
d = d->d_parent; /* assumes d == d->d_parent for root */
} while (!dentry_is_root(d));
d = dentry;
do {
- p -= d->file_name_nbytes / sizeof(utf16lechar);
- memcpy(p, d->file_name, d->file_name_nbytes);
+ p -= d->d_name_nbytes / sizeof(utf16lechar);
+ if (d->d_name_nbytes)
+ memcpy(p, d->d_name, d->d_name_nbytes);
*--p = cpu_to_le16(WIM_PATH_SEPARATOR);
d = d->d_parent; /* assumes d == d->d_parent for root */
} while (!dentry_is_root(d));
wimlib_assert(p == ubuf);
return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
- &dentry->_full_path, &dummy);
+ &dentry->d_full_path, NULL);
}
/*
dentry_full_path(struct wim_dentry *dentry)
{
calculate_dentry_full_path(dentry);
- return dentry->_full_path;
+ return dentry->d_full_path;
}
static int
struct wim_dentry *child;
/* Set offset of directory's child dentries */
- dentry->subdir_offset = *subdir_offset_p;
+ dentry->d_subdir_offset = *subdir_offset_p;
/* Account for child dentries */
for_dentry_child(child, dentry)
/* Account for end-of-directory entry */
*subdir_offset_p += 8;
} else {
- /* Not a directory; set subdir_offset to 0 */
- dentry->subdir_offset = 0;
+ /* Not a directory; set the subdir offset to 0 */
+ dentry->d_subdir_offset = 0;
}
return 0;
}
for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
}
-/* Compare the UTF-16LE long filenames of two dentries case insensitively. */
-static int
-dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
- const struct wim_dentry *d2)
-{
- return cmp_utf16le_strings(d1->file_name,
- d1->file_name_nbytes / 2,
- d2->file_name,
- d2->file_name_nbytes / 2,
- true);
-}
-
-/* Compare the UTF-16LE long filenames of two dentries case sensitively. */
static int
-dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
- const struct wim_dentry *d2)
+dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2,
+ bool ignore_case)
{
- return cmp_utf16le_strings(d1->file_name,
- d1->file_name_nbytes / 2,
- d2->file_name,
- d2->file_name_nbytes / 2,
- false);
-}
-
-static int
-_avl_dentry_compare_names_ci(const struct avl_tree_node *n1,
- const struct avl_tree_node *n2)
-{
- const struct wim_dentry *d1, *d2;
-
- d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci);
- d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci);
- return dentry_compare_names_case_insensitive(d1, d2);
+ return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2,
+ d2->d_name, d2->d_name_nbytes / 2,
+ ignore_case);
}
+/*
+ * Collate (compare) the long filenames of two dentries. This first compares
+ * the names ignoring case, then falls back to a case-sensitive comparison if
+ * the names are the same ignoring case.
+ */
static int
-_avl_dentry_compare_names(const struct avl_tree_node *n1,
- const struct avl_tree_node *n2)
+collate_dentry_names(const struct avl_tree_node *n1,
+ const struct avl_tree_node *n2)
{
const struct wim_dentry *d1, *d2;
+ int res;
d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
- return dentry_compare_names_case_sensitive(d1, d2);
+
+ res = dentry_compare_names(d1, d2, true);
+ if (res)
+ return res;
+ return dentry_compare_names(d1, d2, false);
}
/* Default case sensitivity behavior for searches with
#endif
;
-/* Case-sensitive dentry lookup. Only @file_name and @file_name_nbytes of
- * @dummy must be valid. */
-static struct wim_dentry *
-dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy)
+/*
+ * Find the dentry within the given directory that has the given UTF-16LE
+ * filename. Return it if found, otherwise return NULL. This has configurable
+ * case sensitivity, and @name need not be null-terminated.
+ */
+struct wim_dentry *
+get_dentry_child_with_utf16le_name(const struct wim_dentry *dir,
+ const utf16lechar *name,
+ size_t name_nbytes,
+ CASE_SENSITIVITY_TYPE case_type)
{
- struct avl_tree_node *node;
+ struct wim_dentry wanted;
+ struct avl_tree_node *cur = dir->d_inode->i_children;
+ struct wim_dentry *ci_match = NULL;
- node = avl_tree_lookup_node(dir->i_children,
- &dummy->d_index_node,
- _avl_dentry_compare_names);
- if (!node)
- return NULL;
- return avl_tree_entry(node, struct wim_dentry, d_index_node);
-}
+ wanted.d_name = (utf16lechar *)name;
+ wanted.d_name_nbytes = name_nbytes;
-/* Case-insensitive dentry lookup. Only @file_name and @file_name_nbytes of
- * @dummy must be valid. */
-static struct wim_dentry *
-dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy)
-{
- struct avl_tree_node *node;
+ if (unlikely(wanted.d_name_nbytes != name_nbytes))
+ return NULL; /* overflow */
- node = avl_tree_lookup_node(dir->i_children_ci,
- &dummy->d_index_node_ci,
- _avl_dentry_compare_names_ci);
- if (!node)
- return NULL;
- return avl_tree_entry(node, struct wim_dentry, d_index_node_ci);
-}
+ /* Note: we can't use avl_tree_lookup_node() here because we need to
+ * save case-insensitive matches. */
+ while (cur) {
+ struct wim_dentry *child;
+ int res;
-/* Given a UTF-16LE filename and a directory, look up the dentry for the file.
- * Return it if found, otherwise NULL. This has configurable case sensitivity,
- * and @name need not be null-terminated. */
-struct wim_dentry *
-get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
- const utf16lechar *name,
- size_t name_nbytes,
- CASE_SENSITIVITY_TYPE case_ctype)
-{
- const struct wim_inode *dir = dentry->d_inode;
- bool ignore_case = will_ignore_case(case_ctype);
- struct wim_dentry dummy;
- struct wim_dentry *child;
+ child = avl_tree_entry(cur, struct wim_dentry, d_index_node);
- dummy.file_name = (utf16lechar*)name;
- dummy.file_name_nbytes = name_nbytes;
+ res = dentry_compare_names(&wanted, child, true);
+ if (!res) {
+ /* case-insensitive match found */
+ ci_match = child;
- if (!ignore_case)
- /* Case-sensitive lookup. */
- return dir_lookup(dir, &dummy);
+ res = dentry_compare_names(&wanted, child, false);
+ if (!res)
+ return child; /* case-sensitive match found */
+ }
+
+ if (res < 0)
+ cur = cur->left;
+ else
+ cur = cur->right;
+ }
- /* Case-insensitive lookup. */
+ /* No case-sensitive match; use a case-insensitive match if possible. */
- child = dir_lookup_ci(dir, &dummy);
- if (!child)
+ if (!will_ignore_case(case_type))
return NULL;
- if (likely(list_empty(&child->d_ci_conflict_list)))
- /* Only one dentry has this case-insensitive name; return it */
- return child;
+ if (ci_match) {
+ size_t num_other_ci_matches = 0;
+ struct wim_dentry *other_ci_match, *d;
- /* Multiple dentries have the same case-insensitive name. Choose the
- * dentry with the same case-sensitive name, if one exists; otherwise
- * print a warning and choose one of the possible dentries arbitrarily.
- */
- struct wim_dentry *alt = child;
- size_t num_alts = 0;
+ dentry_for_each_ci_match(d, ci_match) {
+ num_other_ci_matches++;
+ other_ci_match = d;
+ }
- do {
- num_alts++;
- if (!dentry_compare_names_case_sensitive(&dummy, alt))
- return alt;
- alt = list_entry(alt->d_ci_conflict_list.next,
- struct wim_dentry, d_ci_conflict_list);
- } while (alt != child);
-
- WARNING("Result of case-insensitive lookup is ambiguous\n"
- " (returning \"%"TS"\" of %zu "
- "possible files, including \"%"TS"\")",
- dentry_full_path(child),
- num_alts,
- dentry_full_path(list_entry(child->d_ci_conflict_list.next,
- struct wim_dentry,
- d_ci_conflict_list)));
- return child;
+ if (num_other_ci_matches != 0) {
+ WARNING("Result of case-insensitive lookup is ambiguous\n"
+ " (returning \"%"TS"\" of %zu "
+ "possible files, including \"%"TS"\")",
+ dentry_full_path(ci_match), num_other_ci_matches,
+ dentry_full_path(other_ci_match));
+ }
+ }
+
+ return ci_match;
}
-/* Given a 'tchar' filename and a directory, look up the dentry for the file.
- * If the filename was successfully converted to UTF-16LE and the dentry was
- * found, return it; otherwise return NULL. This has configurable case
- * sensitivity. */
+/*
+ * Find the dentry within the given directory that has the given 'tstr'
+ * filename. If the filename was successfully converted to UTF-16LE and the
+ * dentry was found, return it; otherwise return NULL. This has configurable
+ * case sensitivity.
+ */
struct wim_dentry *
-get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name,
+get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name,
CASE_SENSITIVITY_TYPE case_type)
{
int ret;
if (ret)
return NULL;
- child = get_dentry_child_with_utf16le_name(dentry,
+ child = get_dentry_child_with_utf16le_name(dir,
name_utf16le,
name_utf16le_nbytes,
case_type);
* *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting
* from a failed string conversion.
*/
-int
+static int
new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
{
struct wim_dentry *dentry;
return 0;
}
-static int
-_new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret,
- bool timeless)
+/* Like new_dentry(), but also allocate an inode and associate it with the
+ * dentry. If set_timestamps=true, the timestamps for the inode will be set to
+ * the current time; otherwise, they will be left 0. */
+int
+new_dentry_with_new_inode(const tchar *name, bool set_timestamps,
+ struct wim_dentry **dentry_ret)
{
struct wim_dentry *dentry;
struct wim_inode *inode;
if (ret)
return ret;
- if (timeless)
- inode = new_timeless_inode();
- else
- inode = new_inode();
+ inode = new_inode(dentry, set_timestamps);
if (!inode) {
free_dentry(dentry);
return WIMLIB_ERR_NOMEM;
}
- d_associate(dentry, inode);
-
*dentry_ret = dentry;
return 0;
}
-/* Like new_dentry(), but also allocate an inode and associate it with the
- * dentry. The timestamps for the inode will be set to the current time. */
-int
-new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret)
-{
- return _new_dentry_with_inode(name, dentry_ret, false);
-}
-
-/* Like new_dentry_with_inode(), but don't bother setting the timestamps for the
- * new inode; instead, just leave them as 0, under the presumption that the
- * caller will set them itself. */
+/* Like new_dentry(), but also associate the new dentry with the specified inode
+ * and acquire a reference to each of the inode's blobs. */
int
-new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret)
+new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode,
+ struct wim_dentry **dentry_ret)
{
- return _new_dentry_with_inode(name, dentry_ret, true);
+ int ret = new_dentry(name, dentry_ret);
+ if (ret)
+ return ret;
+ d_associate(*dentry_ret, inode);
+ inode_ref_blobs(inode);
+ return 0;
}
/* Create an unnamed dentry with a new inode for a directory with the default
int ret;
struct wim_dentry *dentry;
- ret = new_dentry_with_inode(NULL, &dentry);
+ ret = new_dentry_with_new_inode(NULL, true, &dentry);
if (ret)
return ret;
/* Leave the inode number as 0; this is allowed for non
* hard-linked files. */
- dentry->d_inode->i_resolved = 1;
dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
*dentry_ret = dentry;
return 0;
}
-static int
-dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
-{
- dentry->d_inode->i_visited = 0;
- return 0;
-}
-
-void
-dentry_tree_clear_inode_visited(struct wim_dentry *root)
-{
- for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
-}
-
/*
* Free a WIM dentry.
*
{
if (dentry) {
d_disassociate(dentry);
- FREE(dentry->file_name);
- FREE(dentry->short_name);
- FREE(dentry->_full_path);
+ FREE(dentry->d_name);
+ FREE(dentry->d_short_name);
+ FREE(dentry->d_full_path);
FREE(dentry);
}
}
}
static int
-do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table)
+do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table)
{
- inode_unref_streams(dentry->d_inode, lookup_table);
+ inode_unref_blobs(dentry->d_inode, blob_table);
free_dentry(dentry);
return 0;
}
* The root of the dentry tree to free. If NULL, this function has no
* effect.
*
- * @lookup_table:
- * A pointer to the lookup table for the WIM, or NULL if not specified. If
+ * @blob_table:
+ * A pointer to the blob table for the WIM, or NULL if not specified. If
* specified, this function will decrement the reference counts of the
- * single-instance streams referenced by the dentries.
+ * blobs referenced by the dentries.
*
* This function also releases references to the corresponding inodes.
*
* function.
*/
void
-free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
+free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
{
int (*f)(struct wim_dentry *, void *);
- if (lookup_table)
- f = do_free_dentry_and_unref_streams;
+ if (blob_table)
+ f = do_free_dentry_and_unref_blobs;
else
f = do_free_dentry;
- for_dentry_in_tree_depth(root, f, lookup_table);
+ for_dentry_in_tree_depth(root, f, blob_table);
}
-/* Insert the @child dentry into the case sensitive index of the @dir directory.
- * Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate. */
-static struct wim_dentry *
-dir_index_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the first dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_first_ci_match(struct wim_dentry *dentry)
{
- struct avl_tree_node *duplicate;
+ struct wim_dentry *ci_match = dentry;
- duplicate = avl_tree_insert(&dir->i_children,
- &child->d_index_node,
- _avl_dentry_compare_names);
- if (!duplicate)
- return NULL;
- return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
-}
+ for (;;) {
+ struct avl_tree_node *node;
+ struct wim_dentry *prev;
+
+ node = avl_tree_prev_in_order(&ci_match->d_index_node);
+ if (!node)
+ break;
+ prev = avl_tree_entry(node, struct wim_dentry, d_index_node);
+ if (dentry_compare_names(prev, dentry, true))
+ break;
+ ci_match = prev;
+ }
-/* Insert the @child dentry into the case insensitive index of the @dir
- * directory. Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate. */
-static struct wim_dentry *
-dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
- struct avl_tree_node *duplicate;
+ if (ci_match == dentry)
+ return dentry_get_next_ci_match(dentry, dentry);
- duplicate = avl_tree_insert(&dir->i_children_ci,
- &child->d_index_node_ci,
- _avl_dentry_compare_names_ci);
- if (!duplicate)
- return NULL;
- return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci);
+ return ci_match;
}
-/* Remove the specified dentry from its directory's case-sensitive index. */
-static void
-dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the next dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match)
{
- avl_tree_remove(&dir->i_children, &child->d_index_node);
-}
+ do {
+ struct avl_tree_node *node;
-/* Remove the specified dentry from its directory's case-insensitive index. */
-static void
-dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
- avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci);
-}
+ node = avl_tree_next_in_order(&ci_match->d_index_node);
+ if (!node)
+ return NULL;
+ ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node);
+ } while (ci_match == dentry);
-/* Return true iff the specified dentry is in its parent directory's
- * case-insensitive index. */
-static bool
-dentry_in_ci_index(const struct wim_dentry *dentry)
-{
- return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci);
+ if (dentry_compare_names(ci_match, dentry, true))
+ return NULL;
+
+ return ci_match;
}
/*
- * Link a dentry into the tree.
+ * Link a dentry into a directory.
*
* @parent:
- * The dentry that will be the parent of @child. It must name a directory.
+ * The directory into which to link the dentry.
*
* @child:
- * The dentry to link. It must be currently unlinked.
+ * The dentry to link into the directory. It must be currently unlinked.
*
- * Returns NULL if successful. If @parent already contains a dentry with the
- * same case-sensitive name as @child, returns a pointer to this duplicate
- * dentry.
+ * Returns NULL if successful; or, if @parent already contains a dentry with the
+ * same case-sensitive name as @child, then a pointer to this duplicate dentry
+ * is returned.
*/
struct wim_dentry *
dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
{
- struct wim_dentry *duplicate;
- struct wim_inode *dir;
+ struct wim_inode *dir = parent->d_inode;
+ struct avl_tree_node *duplicate;
wimlib_assert(parent != child);
-
- dir = parent->d_inode;
-
wimlib_assert(inode_is_directory(dir));
- duplicate = dir_index_child(dir, child);
+ duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node,
+ collate_dentry_names);
if (duplicate)
- return duplicate;
+ return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
- duplicate = dir_index_child_ci(dir, child);
- if (duplicate) {
- list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list);
- avl_tree_node_set_unlinked(&child->d_index_node_ci);
- } else {
- INIT_LIST_HEAD(&child->d_ci_conflict_list);
- }
child->d_parent = parent;
return NULL;
}
-/* Unlink a dentry from the tree. */
+/* Unlink a dentry from its parent directory. */
void
unlink_dentry(struct wim_dentry *dentry)
{
- struct wim_inode *dir;
-
/* Do nothing if the dentry is root or it's already unlinked. Not
* actually necessary based on the current callers, but we do the check
* here to be safe. */
if (unlikely(dentry->d_parent == dentry))
return;
- dir = dentry->d_parent->d_inode;
-
- dir_unindex_child(dir, dentry);
-
- if (dentry_in_ci_index(dentry)) {
-
- dir_unindex_child_ci(dir, dentry);
-
- if (!list_empty(&dentry->d_ci_conflict_list)) {
- /* Make a different case-insensitively-the-same dentry
- * be the "representative" in the search index. */
- struct list_head *next;
- struct wim_dentry *other;
- struct wim_dentry *existing;
-
- next = dentry->d_ci_conflict_list.next;
- other = list_entry(next, struct wim_dentry, d_ci_conflict_list);
- existing = dir_index_child_ci(dir, other);
- wimlib_assert(existing == NULL);
- }
- }
- list_del(&dentry->d_ci_conflict_list);
+ avl_tree_remove(&dentry->d_parent->d_inode->i_children,
+ &dentry->d_index_node);
/* Not actually necessary, but to be safe don't retain the now-obsolete
* parent pointer. */
p++;
if (unlikely(p < end)) {
- inode->i_extra = memdup(p, end - p);
+ inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) +
+ end - p);
if (!inode->i_extra)
return WIMLIB_ERR_NOMEM;
- inode->i_extra_size = end - p;
+ inode->i_extra->size = end - p;
+ memcpy(inode->i_extra->data, p, end - p);
+ }
+ return 0;
+}
+
+/*
+ * Set the type of each stream for an encrypted file.
+ *
+ * All data streams of the encrypted file should have been packed into a single
+ * stream in the format provided by ReadEncryptedFileRaw() on Windows. We
+ * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA.
+ *
+ * Encrypted files can't have a reparse point stream. In the on-disk NTFS
+ * format they can, but as far as I know the reparse point stream of an
+ * encrypted file can't be stored in the WIM format in a way that's compatible
+ * with WIMGAPI, nor is there even any way for it to be read or written on
+ * Windows when the process does not have access to the file encryption key.
+ */
+static void
+assign_stream_types_encrypted(struct wim_inode *inode)
+{
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct wim_inode_stream *strm = &inode->i_streams[i];
+ if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash))
+ {
+ strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA;
+ return;
+ }
+ }
+}
+
+/*
+ * Set the type of each stream for an unencrypted file.
+ *
+ * There will be an unnamed data stream, a reparse point stream, or both an
+ * unnamed data stream and a reparse point stream. In addition, there may be
+ * named data streams.
+ *
+ * NOTE: if the file has a reparse point stream or at least one named data
+ * stream, then WIMGAPI puts *all* streams in the extra stream entries and
+ * leaves the default stream hash zeroed. wimlib now does the same. However,
+ * for input we still support the default hash field being used, since wimlib
+ * used to use it and MS software is somewhat accepting of it as well.
+ */
+static void
+assign_stream_types_unencrypted(struct wim_inode *inode)
+{
+ bool found_reparse_point_stream = false;
+ bool found_unnamed_data_stream = false;
+ struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL;
+
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct wim_inode_stream *strm = &inode->i_streams[i];
+
+ if (stream_is_named(strm)) {
+ /* Named data stream */
+ strm->stream_type = STREAM_TYPE_DATA;
+ } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) {
+ /* Unnamed stream in the extra stream entries, OR the
+ * default stream in the dentry provided that it has a
+ * nonzero hash. */
+ if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ !found_reparse_point_stream) {
+ found_reparse_point_stream = true;
+ strm->stream_type = STREAM_TYPE_REPARSE_POINT;
+ } else if (!found_unnamed_data_stream) {
+ found_unnamed_data_stream = true;
+ strm->stream_type = STREAM_TYPE_DATA;
+ }
+ } else if (!unnamed_stream_with_zero_hash) {
+ unnamed_stream_with_zero_hash = strm;
+ }
+ }
+
+ if (unnamed_stream_with_zero_hash) {
+ int type = STREAM_TYPE_UNKNOWN;
+ if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ !found_reparse_point_stream) {
+ type = STREAM_TYPE_REPARSE_POINT;
+ } else if (!found_unnamed_data_stream) {
+ type = STREAM_TYPE_DATA;
+ }
+ unnamed_stream_with_zero_hash->stream_type = type;
+ }
+}
+
+/*
+ * Read and interpret the collection of streams for the specified inode.
+ */
+static int
+setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
+ unsigned num_extra_streams, const u8 *default_hash,
+ u64 *offset_p)
+{
+ const u8 *orig_p = p;
+
+ inode->i_num_streams = 1 + num_extra_streams;
+
+ if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
+ inode->i_streams = CALLOC(inode->i_num_streams,
+ sizeof(inode->i_streams[0]));
+ if (!inode->i_streams)
+ return WIMLIB_ERR_NOMEM;
+ }
+
+ /* Use the default hash field for the first stream */
+ inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
+ copy_hash(inode->i_streams[0]._stream_hash, default_hash);
+ inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
+ inode->i_streams[0].stream_id = 0;
+
+ /* Read the extra stream entries */
+ for (unsigned i = 1; i < inode->i_num_streams; i++) {
+ struct wim_inode_stream *strm;
+ const struct wim_extra_stream_entry_on_disk *disk_strm;
+ u64 length;
+ u16 name_nbytes;
+
+ strm = &inode->i_streams[i];
+
+ strm->stream_id = i;
+
+ /* Do we have at least the size of the fixed-length data we know
+ * need? */
+ if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk))
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+ disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
+
+ /* Read the length field */
+ length = ALIGN(le64_to_cpu(disk_strm->length), 8);
+
+ /* Make sure the length field is neither so small it doesn't
+ * include all the fixed-length data nor so large it overflows
+ * the metadata resource buffer. */
+ if (length < sizeof(struct wim_extra_stream_entry_on_disk) ||
+ length > (end - p))
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+ /* Read the rest of the fixed-length data. */
+
+ copy_hash(strm->_stream_hash, disk_strm->hash);
+ name_nbytes = le16_to_cpu(disk_strm->name_nbytes);
+
+ /* If stream_name_nbytes != 0, the stream is named. */
+ if (name_nbytes != 0) {
+ /* The name is encoded in UTF16-LE, which uses 2-byte
+ * coding units, so the length of the name had better be
+ * an even number of bytes. */
+ if (name_nbytes & 1)
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+ /* Add the length of the stream name to get the length
+ * we actually need to read. Make sure this isn't more
+ * than the specified length of the entry. */
+ if (sizeof(struct wim_extra_stream_entry_on_disk) +
+ name_nbytes > length)
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+ strm->stream_name = utf16le_dupz(disk_strm->name,
+ name_nbytes);
+ if (!strm->stream_name)
+ return WIMLIB_ERR_NOMEM;
+ } else {
+ strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
+ }
+
+ strm->stream_type = STREAM_TYPE_UNKNOWN;
+
+ p += length;
}
+
+ inode->i_next_stream_id = inode->i_num_streams;
+
+ /* Now, assign a type to each stream. Unfortunately this requires
+ * various hacks because stream types aren't explicitly provided in the
+ * WIM on-disk format. */
+
+ if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED))
+ assign_stream_types_encrypted(inode);
+ else
+ assign_stream_types_unencrypted(inode);
+
+ *offset_p += p - orig_p;
return 0;
}
-/* Read a dentry, including all alternate data stream entries that follow it,
- * from an uncompressed metadata resource buffer. */
+/* Read a dentry, including all extra stream entries that follow it, from an
+ * uncompressed metadata resource buffer. */
static int
read_dentry(const u8 * restrict buf, size_t buf_len,
u64 *offset_p, struct wim_dentry **dentry_ret)
struct wim_dentry *dentry;
struct wim_inode *inode;
u16 short_name_nbytes;
- u16 file_name_nbytes;
+ u16 name_nbytes;
u64 calculated_size;
int ret;
- BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
+ STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE);
/* Before reading the whole dentry, we need to read just the length.
* This is because a dentry of length 8 (that is, just the length field)
/* Check for buffer overrun. */
if (unlikely(offset + sizeof(u64) > buf_len ||
offset + sizeof(u64) < offset))
- {
- ERROR("Directory entry starting at %"PRIu64" ends past the "
- "end of the metadata resource (size %zu)",
- offset, buf_len);
return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- }
/* Get pointer to the dentry data. */
p = &buf[offset];
disk_dentry = (const struct wim_dentry_on_disk*)p;
/* Get dentry length. */
- length = le64_to_cpu(disk_dentry->length);
+ length = ALIGN(le64_to_cpu(disk_dentry->length), 8);
/* Check for end-of-directory. */
if (length <= 8) {
}
/* Validate dentry length. */
- if (unlikely(length < sizeof(struct wim_dentry_on_disk))) {
- ERROR("Directory entry has invalid length of %"PRIu64" bytes",
- length);
+ if (unlikely(length < sizeof(struct wim_dentry_on_disk)))
return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- }
/* Check for buffer overrun. */
if (unlikely(offset + length > buf_len ||
offset + length < offset))
- {
- ERROR("Directory entry at offset %"PRIu64" and with size "
- "%"PRIu64" ends past the end of the metadata resource "
- "(size %zu)", offset, length, buf_len);
return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- }
/* Allocate new dentry structure, along with a preliminary inode. */
- ret = new_dentry_with_timeless_inode(NULL, &dentry);
+ ret = new_dentry_with_new_inode(NULL, false, &dentry);
if (ret)
return ret;
/* Read more fields: some into the dentry, and some into the inode. */
inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
- dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
+ dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
- copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash);
+ inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54);
- /* I don't know what's going on here. It seems like M$ screwed up the
- * reparse points, then put the fields in the same place and didn't
- * document it. So we have some fields we read for reparse points, and
- * some fields in the same place for non-reparse-points. */
if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
- inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
- inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
- inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
- /* Leave inode->i_ino at 0. Note that this means the WIM file
- * cannot archive hard-linked reparse points. Such a thing
- * doesn't really make sense anyway, although I believe it's
- * theoretically possible to have them on NTFS. */
+ inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved);
+ inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags);
+ /* Leave inode->i_ino at 0. Note: this means that WIM cannot
+ * represent multiple hard links to a reparse point file. */
} else {
- inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
}
- inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams);
/* Now onto reading the names. There are two of them: the (long) file
* name, and the short name. */
short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
- file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
+ name_nbytes = le16_to_cpu(disk_dentry->name_nbytes);
- if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) {
- ERROR("Dentry name is not valid UTF-16 (odd number of bytes)!");
+ if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) {
ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
goto err_free_dentry;
}
/* We now know the length of the file name and short name. Make sure
- * the length of the dentry is large enough to actually hold them.
- *
- * The calculated length here is unaligned to allow for the possibility
- * that the dentry's length is unaligned, although this would be
- * unexpected. */
- calculated_size = dentry_min_len_with_names(file_name_nbytes,
+ * the length of the dentry is large enough to actually hold them. */
+ calculated_size = dentry_min_len_with_names(name_nbytes,
short_name_nbytes);
if (unlikely(length < calculated_size)) {
- ERROR("Unexpected end of directory entry! (Expected "
- "at least %"PRIu64" bytes, got %"PRIu64" bytes.)",
- calculated_size, length);
ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
goto err_free_dentry;
}
/* Read the filename if present. Note: if the filename is empty, there
* is no null terminator following it. */
- if (file_name_nbytes) {
- dentry->file_name = utf16le_dupz(p, file_name_nbytes);
- if (dentry->file_name == NULL) {
+ if (name_nbytes) {
+ dentry->d_name = utf16le_dupz(p, name_nbytes);
+ if (unlikely(!dentry->d_name)) {
ret = WIMLIB_ERR_NOMEM;
goto err_free_dentry;
}
- dentry->file_name_nbytes = file_name_nbytes;
- p += (u32)file_name_nbytes + 2;
+ dentry->d_name_nbytes = name_nbytes;
+ p += (u32)name_nbytes + 2;
}
/* Read the short filename if present. Note: if there is no short
* filename, there is no null terminator following it. */
if (short_name_nbytes) {
- dentry->short_name = utf16le_dupz(p, short_name_nbytes);
- if (dentry->short_name == NULL) {
+ dentry->d_short_name = utf16le_dupz(p, short_name_nbytes);
+ if (unlikely(!dentry->d_short_name)) {
ret = WIMLIB_ERR_NOMEM;
goto err_free_dentry;
}
- dentry->short_name_nbytes = short_name_nbytes;
+ dentry->d_short_name_nbytes = short_name_nbytes;
p += (u32)short_name_nbytes + 2;
}
- /* Read extra data at end of dentry (but before alternate data stream
- * entries). This may contain tagged items. */
+ /* Read extra data at end of dentry (but before extra stream entries).
+ * This may contain tagged metadata items. */
ret = read_extra_data(p, &buf[offset + length], inode);
if (ret)
goto err_free_dentry;
- /* Align the dentry length. */
- length = (length + 7) & ~7;
-
offset += length;
- /* Read the alternate data streams, if present. inode->i_num_ads tells
- * us how many they are, and they will directly follow the dentry in the
- * metadata resource buffer.
- *
- * Note that each alternate data stream entry begins on an 8-byte
- * aligned boundary, and the alternate data stream entries seem to NOT
- * be included in the dentry->length field for some reason. */
- if (unlikely(inode->i_num_ads != 0)) {
- size_t orig_bytes_remaining;
- size_t bytes_remaining;
-
- if (offset > buf_len) {
- ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- goto err_free_dentry;
- }
- bytes_remaining = buf_len - offset;
- orig_bytes_remaining = bytes_remaining;
- ret = read_ads_entries(&buf[offset], inode, &bytes_remaining);
- if (ret)
- goto err_free_dentry;
- offset += (orig_bytes_remaining - bytes_remaining);
- }
+ /* Set up the inode's collection of streams. */
+ ret = setup_inode_streams(&buf[offset],
+ &buf[buf_len],
+ inode,
+ le16_to_cpu(disk_dentry->num_extra_streams),
+ disk_dentry->default_hash,
+ &offset);
+ if (ret)
+ goto err_free_dentry;
*offset_p = offset; /* Sets offset of next dentry in directory */
*dentry_ret = dentry;
return ret;
}
-/* Is the dentry named "." or ".." ? */
static bool
dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
{
- if (dentry->file_name_nbytes <= 4) {
- if (dentry->file_name_nbytes == 4) {
- if (dentry->file_name[0] == cpu_to_le16('.') &&
- dentry->file_name[1] == cpu_to_le16('.'))
+ if (dentry->d_name_nbytes <= 4) {
+ if (dentry->d_name_nbytes == 4) {
+ if (dentry->d_name[0] == cpu_to_le16('.') &&
+ dentry->d_name[1] == cpu_to_le16('.'))
return true;
- } else if (dentry->file_name_nbytes == 2) {
- if (dentry->file_name[0] == cpu_to_le16('.'))
+ } else if (dentry->d_name_nbytes == 2) {
+ if (dentry->d_name[0] == cpu_to_le16('.'))
return true;
}
}
return false;
}
+static bool
+dentry_contains_embedded_null(const struct wim_dentry *dentry)
+{
+ for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++)
+ if (dentry->d_name[i] == cpu_to_le16('\0'))
+ return true;
+ return false;
+}
+
+static bool
+should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry)
+{
+ /* All dentries except the root must be named. */
+ if (!dentry_has_long_name(dentry)) {
+ WARNING("Ignoring unnamed file in directory \"%"TS"\"",
+ dentry_full_path(dir));
+ return true;
+ }
+
+ /* Don't allow files named "." or "..". Such filenames could be used in
+ * path traversal attacks. */
+ if (dentry_is_dot_or_dotdot(dentry)) {
+ WARNING("Ignoring file named \".\" or \"..\" in directory "
+ "\"%"TS"\"", dentry_full_path(dir));
+ return true;
+ }
+
+ /* Don't allow filenames containing embedded null characters. Although
+ * the null character is already considered an unsupported character for
+ * extraction by all targets, it is probably a good idea to just forbid
+ * such names entirely. */
+ if (dentry_contains_embedded_null(dentry)) {
+ WARNING("Ignoring filename with embedded null character in "
+ "directory \"%"TS"\"", dentry_full_path(dir));
+ return true;
+ }
+
+ return false;
+}
+
static int
read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
- struct wim_dentry * restrict dir)
+ struct wim_dentry * restrict dir, unsigned depth)
{
- u64 cur_offset = dir->subdir_offset;
-
- /* Check for cyclic directory structure, which would cause infinite
- * recursion if not handled. */
- for (struct wim_dentry *d = dir->d_parent;
- !dentry_is_root(d); d = d->d_parent)
- {
- if (unlikely(d->subdir_offset == cur_offset)) {
- ERROR("Cyclic directory structure detected: children "
- "of \"%"TS"\" coincide with children of \"%"TS"\"",
- dentry_full_path(dir), dentry_full_path(d));
- return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- }
+ u64 cur_offset = dir->d_subdir_offset;
+
+ /* Disallow extremely deep or cyclic directory structures */
+ if (unlikely(depth >= 16384)) {
+ ERROR("Directory structure too deep!");
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
}
for (;;) {
if (child == NULL)
return 0;
- /* All dentries except the root should be named. */
- if (unlikely(!dentry_has_long_name(child))) {
- WARNING("Ignoring unnamed dentry in "
- "directory \"%"TS"\"", dentry_full_path(dir));
- free_dentry(child);
- continue;
- }
-
- /* Don't allow files named "." or "..". */
- if (unlikely(dentry_is_dot_or_dotdot(child))) {
- WARNING("Ignoring file named \".\" or \"..\"; "
- "potentially malicious archive!!!");
+ /* Ignore dentries with bad names. */
+ if (unlikely(should_ignore_dentry(dir, child))) {
free_dentry(child);
continue;
}
/* If this child is a directory that itself has children, call
* this procedure recursively. */
- if (child->subdir_offset != 0) {
+ if (child->d_subdir_offset != 0) {
if (likely(dentry_is_directory(child))) {
ret = read_dentry_tree_recursive(buf,
buf_len,
- child);
+ child,
+ depth + 1);
if (ret)
return ret;
} else {
int ret;
struct wim_dentry *root;
- DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset);
-
ret = read_dentry(buf, buf_len, &root_offset, &root);
if (ret)
return ret;
goto err_free_dentry_tree;
}
- if (likely(root->subdir_offset != 0)) {
- ret = read_dentry_tree_recursive(buf, buf_len, root);
+ if (likely(root->d_subdir_offset != 0)) {
+ ret = read_dentry_tree_recursive(buf, buf_len, root, 0);
if (ret)
goto err_free_dentry_tree;
}
return ret;
}
-/*
- * Write a WIM alternate data stream (ADS) entry to an output buffer.
- *
- * @ads_entry:
- * The ADS entry to write.
- *
- * @hash:
- * The hash field to use (instead of the one stored directly in the ADS
- * entry, which isn't valid if the inode has been "resolved").
- *
- * @p:
- * The memory location to which to write the data.
- *
- * Returns a pointer to the byte after the last byte written.
- */
static u8 *
-write_ads_entry(const struct wim_ads_entry *ads_entry,
- const u8 *hash, u8 * restrict p)
+write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
+ const u8 * restrict hash)
{
- struct wim_ads_entry_on_disk *disk_ads_entry =
- (struct wim_ads_entry_on_disk*)p;
+ struct wim_extra_stream_entry_on_disk *disk_strm =
+ (struct wim_extra_stream_entry_on_disk *)p;
u8 *orig_p = p;
+ size_t name_nbytes;
- disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
- copy_hash(disk_ads_entry->hash, hash);
- disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
- p += sizeof(struct wim_ads_entry_on_disk);
- if (ads_entry->stream_name_nbytes) {
- p = mempcpy(p, ads_entry->stream_name,
- (u32)ads_entry->stream_name_nbytes + 2);
- }
+ if (name == NO_STREAM_NAME)
+ name_nbytes = 0;
+ else
+ name_nbytes = utf16le_len_bytes(name);
+
+ disk_strm->reserved = 0;
+ copy_hash(disk_strm->hash, hash);
+ disk_strm->name_nbytes = cpu_to_le16(name_nbytes);
+ p += sizeof(struct wim_extra_stream_entry_on_disk);
+ if (name_nbytes != 0)
+ p = mempcpy(p, name, name_nbytes + 2);
/* Align to 8-byte boundary */
while ((uintptr_t)p & 7)
*p++ = 0;
- disk_ads_entry->length = cpu_to_le64(p - orig_p);
+ disk_strm->length = cpu_to_le64(p - orig_p);
return p;
}
/*
* Write a WIM dentry to an output buffer.
*
- * This includes any alternate data stream entries that may follow the dentry
- * itself.
+ * This includes any extra stream entries that may follow the dentry itself.
*
* @dentry:
* The dentry to write.
const struct wim_inode *inode;
struct wim_dentry_on_disk *disk_dentry;
const u8 *orig_p;
- const u8 *hash;
- bool use_dummy_stream;
- u16 num_ads;
wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
orig_p = p;
inode = dentry->d_inode;
- use_dummy_stream = inode_needs_dummy_stream(inode);
disk_dentry = (struct wim_dentry_on_disk*)p;
disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
- disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
+ disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset);
disk_dentry->unused_1 = cpu_to_le64(0);
disk_dentry->unused_2 = cpu_to_le64(0);
disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
- if (use_dummy_stream)
- hash = zero_hash;
- else
- hash = inode_stream_hash(inode, 0);
- copy_hash(disk_dentry->unnamed_stream_hash, hash);
+ disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54);
if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
- disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
- disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
- disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
+ disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved);
+ disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags);
} else {
- disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
disk_dentry->nonreparse.hard_link_group_id =
cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
}
- num_ads = inode->i_num_ads;
- if (use_dummy_stream)
- num_ads++;
- disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads);
- disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
- disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
+
+ disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes);
+ disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes);
p += sizeof(struct wim_dentry_on_disk);
wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
if (dentry_has_long_name(dentry))
- p = mempcpy(p, dentry->file_name, (u32)dentry->file_name_nbytes + 2);
+ p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2);
if (dentry_has_short_name(dentry))
- p = mempcpy(p, dentry->short_name, (u32)dentry->short_name_nbytes + 2);
+ p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2);
/* Align to 8-byte boundary */
while ((uintptr_t)p & 7)
*p++ = 0;
- if (inode->i_extra_size) {
+ if (inode->i_extra) {
/* Extra tagged items --- not usually present. */
- p = mempcpy(p, inode->i_extra, inode->i_extra_size);
+ p = mempcpy(p, inode->i_extra->data, inode->i_extra->size);
+
+ /* Align to 8-byte boundary */
while ((uintptr_t)p & 7)
*p++ = 0;
}
disk_dentry->length = cpu_to_le64(p - orig_p);
- if (use_dummy_stream) {
- hash = inode_unnamed_stream_hash(inode);
- p = write_ads_entry(&(struct wim_ads_entry){}, hash, p);
- }
+ /* Streams */
+
+ if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
+ const struct wim_inode_stream *efs_strm;
+ const u8 *efs_hash;
+
+ efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA);
+ efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
+ copy_hash(disk_dentry->default_hash, efs_hash);
+ disk_dentry->num_extra_streams = cpu_to_le16(0);
+ } else {
+ /*
+ * Extra stream entries:
+ *
+ * - Use one extra stream entry for each named data stream
+ * - Use one extra stream entry for the unnamed data stream when there is either:
+ * - a reparse point stream
+ * - at least one named data stream (for Windows PE bug workaround)
+ * - Use one extra stream entry for the reparse point stream if there is one
+ */
+ bool have_named_data_stream = false;
+ bool have_reparse_point_stream = false;
+ const u8 *unnamed_data_stream_hash = zero_hash;
+ const u8 *reparse_point_hash;
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ const struct wim_inode_stream *strm = &inode->i_streams[i];
+ if (strm->stream_type == STREAM_TYPE_DATA) {
+ if (stream_is_named(strm))
+ have_named_data_stream = true;
+ else
+ unnamed_data_stream_hash = stream_hash(strm);
+ } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
+ have_reparse_point_stream = true;
+ reparse_point_hash = stream_hash(strm);
+ }
+ }
+
+ if (unlikely(have_reparse_point_stream || have_named_data_stream)) {
+
+ unsigned num_extra_streams = 0;
- /* Write the alternate data streams entries, if any. */
- for (u16 i = 0; i < inode->i_num_ads; i++) {
- hash = inode_stream_hash(inode, i + 1);
- p = write_ads_entry(&inode->i_ads_entries[i], hash, p);
+ copy_hash(disk_dentry->default_hash, zero_hash);
+
+ if (have_reparse_point_stream) {
+ p = write_extra_stream_entry(p, NO_STREAM_NAME,
+ reparse_point_hash);
+ num_extra_streams++;
+ }
+
+ p = write_extra_stream_entry(p, NO_STREAM_NAME,
+ unnamed_data_stream_hash);
+ num_extra_streams++;
+
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ const struct wim_inode_stream *strm = &inode->i_streams[i];
+ if (stream_is_named_data_stream(strm)) {
+ p = write_extra_stream_entry(p, strm->stream_name,
+ stream_hash(strm));
+ num_extra_streams++;
+ }
+ }
+ wimlib_assert(num_extra_streams <= 0xFFFF);
+
+ disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
+ } else {
+ copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
+ disk_dentry->num_extra_streams = cpu_to_le16(0);
+ }
}
return p;
static int
write_dir_dentries(struct wim_dentry *dir, void *_pp)
{
- if (dir->subdir_offset != 0) {
+ if (dir->d_subdir_offset != 0) {
u8 **pp = _pp;
u8 *p = *pp;
struct wim_dentry *child;
u8 *
write_dentry_tree(struct wim_dentry *root, u8 *p)
{
- DEBUG("Writing dentry tree.");
-
- wimlib_assert(root != NULL);
-
/* write root dentry and end-of-directory entry following it */
p = write_dentry(root, p);
*(u64*)p = 0;