*/
/*
- * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
*
* This file is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
* - wimlib does not allow *directory* hard links, so a WIM image really does
* have a *tree* of dentries (and not an arbitrary graph of dentries).
*
- * - wimlib indexes dentries both case-insensitively and case-sensitively,
- * allowing either behavior to be used for path lookup.
+ * - wimlib supports both case-sensitive and case-insensitive path lookups.
+ * The implementation uses a single in-memory index per directory, using a
+ * collation order like that used by NTFS; see collate_dentry_names().
*
* - Multiple dentries in a directory might have the same case-insensitive
* name. But wimlib enforces that at most one dentry in a directory can have
le32 attributes;
/* A value that specifies the security descriptor for this file or
- * directory. If -1, the file or directory has no security descriptor.
- * Otherwise, it is a 0-based index into the WIM image's table of
- * security descriptors (see: `struct wim_security_data') */
- sle32 security_id;
+ * directory. If 0xFFFFFFFF, the file or directory has no security
+ * descriptor. Otherwise, it is a 0-based index into the WIM image's
+ * table of security descriptors (see: `struct wim_security_data') */
+ le32 security_id;
/* Offset, in bytes, from the start of the uncompressed metadata
* resource of this directory's child directory entries, or 0 if this
*/
u8 default_hash[SHA1_HASH_SIZE];
- /* The format of the following data is not yet completely known and they
- * do not correspond to Microsoft's documentation.
+ /* Unknown field (maybe accidental padding) */
+ le32 unknown_0x54;
+
+ /*
+ * The following 8-byte union contains either information about the
+ * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or
+ * the "hard link group ID" (for other files).
+ *
+ * The reparse point information contains ReparseTag and ReparseReserved
+ * from the header of the reparse point buffer. It also contains a flag
+ * that indicates whether a reparse point fixup (for the target of an
+ * absolute symbolic link or junction) was done or not.
*
- * If this directory entry is for a reparse point (has
- * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the
- * version of the following fields containing the reparse tag is valid.
- * Furthermore, the field notated as not_rpfixed, as far as I can tell,
- * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
- * targets of absolute symbolic links) were *not* done, and otherwise 0.
+ * The "hard link group ID" is like an inode number; all dentries for
+ * the same inode share the same value. See inode_fixup.c for more
+ * information.
*
- * If this directory entry is not for a reparse point, then the version
- * of the following fields containing the hard_link_group_id is valid.
- * All MS says about this field is that "If this file is part of a hard
- * link set, all the directory entries in the set will share the same
- * value in this field.". However, more specifically I have observed
- * the following:
- * - If the file is part of a hard link set of size 1, then the
- * hard_link_group_id should be set to either 0, which is treated
- * specially as indicating "not hardlinked", or any unique value.
- * - The specific nonzero values used to identity hard link sets do
- * not matter, as long as they are unique.
- * - However, due to bugs in Microsoft's software, it is actually NOT
- * guaranteed that directory entries that share the same hard link
- * group ID are actually hard linked to each either. See
- * inode_fixup.c for the code that handles this.
+ * Note that this union creates the limitation that reparse point files
+ * cannot have multiple names (hard links).
*/
union {
struct {
- le32 rp_unknown_1;
le32 reparse_tag;
- le16 rp_unknown_2;
- le16 not_rpfixed;
- } _packed_attribute reparse;
+ le16 rp_reserved;
+ le16 rp_flags;
+ } __attribute__((packed)) reparse;
struct {
- le32 rp_unknown_1;
le64 hard_link_group_id;
- } _packed_attribute nonreparse;
+ } __attribute__((packed)) nonreparse;
};
/* Number of extra stream entries that directly follow this dentry
* encoded "long" name, excluding the null terminator. If zero, then
* this file has no long name. The root dentry should not have a long
* name, but all other dentries in the image should have long names. */
- le16 file_name_nbytes;
+ le16 name_nbytes;
/* Beginning of optional, variable-length fields */
- /* If file_name_nbytes != 0, the next field will be the UTF-16LE encoded
- * long file name. This will be null-terminated, so the size of this
- * field will really be file_name_nbytes + 2. */
- /*utf16lechar file_name[];*/
+ /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long
+ * name. This will be null-terminated, so the size of this field will
+ * really be name_nbytes + 2. */
+ /*utf16lechar name[];*/
/* If short_name_nbytes != 0, the next field will be the UTF-16LE
* encoded short name. This will be null-terminated, so the size of
* field) after 8-byte alignment, then the remaining space will be a
* variable-length list of tagged metadata items. See tagged_items.c
* for more information. */
- /* u8 tagged_items[] _aligned_attribute(8); */
+ /* u8 tagged_items[] __attribute__((aligned(8))); */
-} _packed_attribute;
+} __attribute__((packed));
/* If num_extra_streams != 0, then there are that many extra stream
* entries following the dentry, starting on the next 8-byte aligned
* boundary. They are not counted in the 'length' field of the dentry.
* the null terminator. There is a null terminator character if
* @name_nbytes != 0; i.e., if this stream is named. */
utf16lechar name[];
-} _packed_attribute;
+} __attribute__((packed));
static void
-do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name,
- size_t file_name_nbytes)
+do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name,
+ size_t name_nbytes)
{
- FREE(dentry->file_name);
- dentry->file_name = file_name;
- dentry->file_name_nbytes = file_name_nbytes;
+ FREE(dentry->d_name);
+ dentry->d_name = name;
+ dentry->d_name_nbytes = name_nbytes;
if (dentry_has_short_name(dentry)) {
- FREE(dentry->short_name);
- dentry->short_name = NULL;
- dentry->short_name_nbytes = 0;
+ FREE(dentry->d_short_name);
+ dentry->d_short_name = NULL;
+ dentry->d_short_name_nbytes = 0;
}
}
* tagged metadata items as well as any extra stream entries that may need to
* follow the dentry. */
static size_t
-dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
+dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes)
{
size_t length = sizeof(struct wim_dentry_on_disk);
- if (file_name_nbytes)
- length += (u32)file_name_nbytes + 2;
+ if (name_nbytes)
+ length += (u32)name_nbytes + 2;
if (short_name_nbytes)
length += (u32)short_name_nbytes + 2;
return length;
len += utf16le_len_bytes(strm->stream_name) + 2;
/* Account for any necessary padding to the next 8-byte boundary. */
- return (len + 7) & ~7;
+ return ALIGN(len, 8);
}
/*
const struct wim_inode *inode = dentry->d_inode;
size_t len;
- len = dentry_min_len_with_names(dentry->file_name_nbytes,
- dentry->short_name_nbytes);
- len = (len + 7) & ~7;
+ len = dentry_min_len_with_names(dentry->d_name_nbytes,
+ dentry->d_short_name_nbytes);
+ len = ALIGN(len, 8);
- if (inode->i_extra_size) {
- len += inode->i_extra_size;
- len = (len + 7) & ~7;
- }
+ if (inode->i_extra)
+ len += ALIGN(inode->i_extra->size, 8);
if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
/*
if (have_named_data_stream || have_reparse_point_stream) {
if (have_reparse_point_stream)
- len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
- len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
+ len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
+ len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
}
}
* @arg will be passed as the second argument to each invocation of @visitor.
*
* This function does a pre-order traversal --- that is, a parent will be
- * visited before its children. It also will visit siblings in order of
- * case-sensitive filename. Equivalently, this function visits the entire tree
- * in the case-sensitive lexicographic order of the full paths.
+ * visited before its children. Furthermore, siblings will be visited in their
+ * collation order.
*
* It is safe to pass NULL for @root, which means that the dentry tree is empty.
* In this case, this function does nothing.
/*
* Calculate the full path to @dentry within the WIM image, if not already done.
*
- * The full name will be saved in the cached value 'dentry->_full_path'.
+ * The full name will be saved in the cached value 'dentry->d_full_path'.
*
* Whenever possible, use dentry_full_path() instead of calling this and
- * accessing _full_path directly.
+ * accessing d_full_path directly.
*
* Returns 0 or an error code resulting from a failed string conversion.
*/
calculate_dentry_full_path(struct wim_dentry *dentry)
{
size_t ulen;
- size_t dummy;
const struct wim_dentry *d;
- if (dentry->_full_path)
+ if (dentry->d_full_path)
return 0;
ulen = 0;
d = dentry;
do {
- ulen += d->file_name_nbytes / sizeof(utf16lechar);
+ ulen += d->d_name_nbytes / sizeof(utf16lechar);
ulen++;
d = d->d_parent; /* assumes d == d->d_parent for root */
} while (!dentry_is_root(d));
d = dentry;
do {
- p -= d->file_name_nbytes / sizeof(utf16lechar);
- memcpy(p, d->file_name, d->file_name_nbytes);
+ p -= d->d_name_nbytes / sizeof(utf16lechar);
+ if (d->d_name_nbytes)
+ memcpy(p, d->d_name, d->d_name_nbytes);
*--p = cpu_to_le16(WIM_PATH_SEPARATOR);
d = d->d_parent; /* assumes d == d->d_parent for root */
} while (!dentry_is_root(d));
wimlib_assert(p == ubuf);
return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
- &dentry->_full_path, &dummy);
+ &dentry->d_full_path, NULL);
}
/*
dentry_full_path(struct wim_dentry *dentry)
{
calculate_dentry_full_path(dentry);
- return dentry->_full_path;
+ return dentry->d_full_path;
}
static int
struct wim_dentry *child;
/* Set offset of directory's child dentries */
- dentry->subdir_offset = *subdir_offset_p;
+ dentry->d_subdir_offset = *subdir_offset_p;
/* Account for child dentries */
for_dentry_child(child, dentry)
/* Account for end-of-directory entry */
*subdir_offset_p += 8;
} else {
- /* Not a directory; set subdir_offset to 0 */
- dentry->subdir_offset = 0;
+ /* Not a directory; set the subdir offset to 0 */
+ dentry->d_subdir_offset = 0;
}
return 0;
}
for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
}
-/* Compare the UTF-16LE long filenames of two dentries case insensitively. */
-static int
-dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
- const struct wim_dentry *d2)
-{
- return cmp_utf16le_strings(d1->file_name,
- d1->file_name_nbytes / 2,
- d2->file_name,
- d2->file_name_nbytes / 2,
- true);
-}
-
-/* Compare the UTF-16LE long filenames of two dentries case sensitively. */
-static int
-dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
- const struct wim_dentry *d2)
-{
- return cmp_utf16le_strings(d1->file_name,
- d1->file_name_nbytes / 2,
- d2->file_name,
- d2->file_name_nbytes / 2,
- false);
-}
-
static int
-_avl_dentry_compare_names_ci(const struct avl_tree_node *n1,
- const struct avl_tree_node *n2)
+dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2,
+ bool ignore_case)
{
- const struct wim_dentry *d1, *d2;
-
- d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci);
- d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci);
- return dentry_compare_names_case_insensitive(d1, d2);
+ return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2,
+ d2->d_name, d2->d_name_nbytes / 2,
+ ignore_case);
}
+/*
+ * Collate (compare) the long filenames of two dentries. This first compares
+ * the names ignoring case, then falls back to a case-sensitive comparison if
+ * the names are the same ignoring case.
+ */
static int
-_avl_dentry_compare_names(const struct avl_tree_node *n1,
- const struct avl_tree_node *n2)
+collate_dentry_names(const struct avl_tree_node *n1,
+ const struct avl_tree_node *n2)
{
const struct wim_dentry *d1, *d2;
+ int res;
d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
- return dentry_compare_names_case_sensitive(d1, d2);
+
+ res = dentry_compare_names(d1, d2, true);
+ if (res)
+ return res;
+ return dentry_compare_names(d1, d2, false);
}
/* Default case sensitivity behavior for searches with
* WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or
* WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */
bool default_ignore_case =
-#ifdef __WIN32__
+#ifdef _WIN32
true
#else
false
#endif
;
-/* Case-sensitive dentry lookup. Only @file_name and @file_name_nbytes of
- * @dummy must be valid. */
-static struct wim_dentry *
-dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy)
+/*
+ * Find the dentry within the given directory that has the given UTF-16LE
+ * filename. Return it if found, otherwise return NULL. This has configurable
+ * case sensitivity, and @name need not be null-terminated.
+ */
+struct wim_dentry *
+get_dentry_child_with_utf16le_name(const struct wim_dentry *dir,
+ const utf16lechar *name,
+ size_t name_nbytes,
+ CASE_SENSITIVITY_TYPE case_type)
{
- struct avl_tree_node *node;
+ struct wim_dentry wanted;
+ struct avl_tree_node *cur = dir->d_inode->i_children;
+ struct wim_dentry *ci_match = NULL;
- node = avl_tree_lookup_node(dir->i_children,
- &dummy->d_index_node,
- _avl_dentry_compare_names);
- if (!node)
- return NULL;
- return avl_tree_entry(node, struct wim_dentry, d_index_node);
-}
+ wanted.d_name = (utf16lechar *)name;
+ wanted.d_name_nbytes = name_nbytes;
-/* Case-insensitive dentry lookup. Only @file_name and @file_name_nbytes of
- * @dummy must be valid. */
-static struct wim_dentry *
-dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy)
-{
- struct avl_tree_node *node;
+ if (unlikely(wanted.d_name_nbytes != name_nbytes))
+ return NULL; /* overflow */
- node = avl_tree_lookup_node(dir->i_children_ci,
- &dummy->d_index_node_ci,
- _avl_dentry_compare_names_ci);
- if (!node)
- return NULL;
- return avl_tree_entry(node, struct wim_dentry, d_index_node_ci);
-}
+ /* Note: we can't use avl_tree_lookup_node() here because we need to
+ * save case-insensitive matches. */
+ while (cur) {
+ struct wim_dentry *child;
+ int res;
-/* Given a UTF-16LE filename and a directory, look up the dentry for the file.
- * Return it if found, otherwise NULL. This has configurable case sensitivity,
- * and @name need not be null-terminated. */
-struct wim_dentry *
-get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
- const utf16lechar *name,
- size_t name_nbytes,
- CASE_SENSITIVITY_TYPE case_ctype)
-{
- const struct wim_inode *dir = dentry->d_inode;
- bool ignore_case = will_ignore_case(case_ctype);
- struct wim_dentry dummy;
- struct wim_dentry *child;
+ child = avl_tree_entry(cur, struct wim_dentry, d_index_node);
- dummy.file_name = (utf16lechar*)name;
- dummy.file_name_nbytes = name_nbytes;
+ res = dentry_compare_names(&wanted, child, true);
+ if (!res) {
+ /* case-insensitive match found */
+ ci_match = child;
- if (!ignore_case)
- /* Case-sensitive lookup. */
- return dir_lookup(dir, &dummy);
+ res = dentry_compare_names(&wanted, child, false);
+ if (!res)
+ return child; /* case-sensitive match found */
+ }
- /* Case-insensitive lookup. */
+ if (res < 0)
+ cur = cur->left;
+ else
+ cur = cur->right;
+ }
- child = dir_lookup_ci(dir, &dummy);
- if (!child)
+ /* No case-sensitive match; use a case-insensitive match if possible. */
+
+ if (!will_ignore_case(case_type))
return NULL;
- if (likely(list_empty(&child->d_ci_conflict_list)))
- /* Only one dentry has this case-insensitive name; return it */
- return child;
+ if (ci_match) {
+ size_t num_other_ci_matches = 0;
+ struct wim_dentry *other_ci_match, *d;
- /* Multiple dentries have the same case-insensitive name. Choose the
- * dentry with the same case-sensitive name, if one exists; otherwise
- * print a warning and choose one of the possible dentries arbitrarily.
- */
- struct wim_dentry *alt = child;
- size_t num_alts = 0;
+ dentry_for_each_ci_match(d, ci_match) {
+ num_other_ci_matches++;
+ other_ci_match = d;
+ }
- do {
- num_alts++;
- if (!dentry_compare_names_case_sensitive(&dummy, alt))
- return alt;
- alt = list_entry(alt->d_ci_conflict_list.next,
- struct wim_dentry, d_ci_conflict_list);
- } while (alt != child);
-
- WARNING("Result of case-insensitive lookup is ambiguous\n"
- " (returning \"%"TS"\" of %zu "
- "possible files, including \"%"TS"\")",
- dentry_full_path(child),
- num_alts,
- dentry_full_path(list_entry(child->d_ci_conflict_list.next,
- struct wim_dentry,
- d_ci_conflict_list)));
- return child;
+ if (num_other_ci_matches != 0) {
+ WARNING("Result of case-insensitive lookup is ambiguous\n"
+ " (returning \"%"TS"\" of %zu "
+ "possible files, including \"%"TS"\")",
+ dentry_full_path(ci_match), num_other_ci_matches,
+ dentry_full_path(other_ci_match));
+ }
+ }
+
+ return ci_match;
}
-/* Given a 'tchar' filename and a directory, look up the dentry for the file.
- * If the filename was successfully converted to UTF-16LE and the dentry was
- * found, return it; otherwise return NULL. This has configurable case
- * sensitivity. */
+/*
+ * Find the dentry within the given directory that has the given 'tstr'
+ * filename. If the filename was successfully converted to UTF-16LE and the
+ * dentry was found, return it; otherwise return NULL. This has configurable
+ * case sensitivity.
+ */
struct wim_dentry *
-get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name,
+get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name,
CASE_SENSITIVITY_TYPE case_type)
{
int ret;
if (ret)
return NULL;
- child = get_dentry_child_with_utf16le_name(dentry,
+ child = get_dentry_child_with_utf16le_name(dir,
name_utf16le,
name_utf16le_nbytes,
case_type);
* *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting
* from a failed string conversion.
*/
-int
+static int
new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
{
struct wim_dentry *dentry;
return 0;
}
-static int
-_new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret,
- bool timeless)
+/* Like new_dentry(), but also allocate an inode and associate it with the
+ * dentry. If set_timestamps=true, the timestamps for the inode will be set to
+ * the current time; otherwise, they will be left 0. */
+int
+new_dentry_with_new_inode(const tchar *name, bool set_timestamps,
+ struct wim_dentry **dentry_ret)
{
struct wim_dentry *dentry;
struct wim_inode *inode;
if (ret)
return ret;
- if (timeless)
- inode = new_timeless_inode();
- else
- inode = new_inode();
+ inode = new_inode(dentry, set_timestamps);
if (!inode) {
free_dentry(dentry);
return WIMLIB_ERR_NOMEM;
}
- d_associate(dentry, inode);
-
*dentry_ret = dentry;
return 0;
}
-/* Like new_dentry(), but also allocate an inode and associate it with the
- * dentry. The timestamps for the inode will be set to the current time. */
-int
-new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret)
-{
- return _new_dentry_with_inode(name, dentry_ret, false);
-}
-
-/* Like new_dentry_with_inode(), but don't bother setting the timestamps for the
- * new inode; instead, just leave them as 0, under the presumption that the
- * caller will set them itself. */
+/* Like new_dentry(), but also associate the new dentry with the specified inode
+ * and acquire a reference to each of the inode's blobs. */
int
-new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret)
+new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode,
+ struct wim_dentry **dentry_ret)
{
- return _new_dentry_with_inode(name, dentry_ret, true);
+ int ret = new_dentry(name, dentry_ret);
+ if (ret)
+ return ret;
+ d_associate(*dentry_ret, inode);
+ inode_ref_blobs(inode);
+ return 0;
}
/* Create an unnamed dentry with a new inode for a directory with the default
int ret;
struct wim_dentry *dentry;
- ret = new_dentry_with_inode(NULL, &dentry);
+ ret = new_dentry_with_new_inode(NULL, true, &dentry);
if (ret)
return ret;
/* Leave the inode number as 0; this is allowed for non
return 0;
}
-static int
-dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
-{
- dentry->d_inode->i_visited = 0;
- return 0;
-}
-
-void
-dentry_tree_clear_inode_visited(struct wim_dentry *root)
-{
- for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
-}
-
/*
* Free a WIM dentry.
*
{
if (dentry) {
d_disassociate(dentry);
- FREE(dentry->file_name);
- FREE(dentry->short_name);
- FREE(dentry->_full_path);
+ FREE(dentry->d_name);
+ FREE(dentry->d_short_name);
+ FREE(dentry->d_full_path);
FREE(dentry);
}
}
for_dentry_in_tree_depth(root, f, blob_table);
}
-/* Insert the @child dentry into the case sensitive index of the @dir directory.
- * Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate. */
-static struct wim_dentry *
-dir_index_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the first dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_first_ci_match(struct wim_dentry *dentry)
{
- struct avl_tree_node *duplicate;
+ struct wim_dentry *ci_match = dentry;
- duplicate = avl_tree_insert(&dir->i_children,
- &child->d_index_node,
- _avl_dentry_compare_names);
- if (!duplicate)
- return NULL;
- return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
-}
+ for (;;) {
+ struct avl_tree_node *node;
+ struct wim_dentry *prev;
+
+ node = avl_tree_prev_in_order(&ci_match->d_index_node);
+ if (!node)
+ break;
+ prev = avl_tree_entry(node, struct wim_dentry, d_index_node);
+ if (dentry_compare_names(prev, dentry, true))
+ break;
+ ci_match = prev;
+ }
-/* Insert the @child dentry into the case insensitive index of the @dir
- * directory. Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate. */
-static struct wim_dentry *
-dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
- struct avl_tree_node *duplicate;
+ if (ci_match == dentry)
+ return dentry_get_next_ci_match(dentry, dentry);
- duplicate = avl_tree_insert(&dir->i_children_ci,
- &child->d_index_node_ci,
- _avl_dentry_compare_names_ci);
- if (!duplicate)
- return NULL;
- return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci);
+ return ci_match;
}
-/* Remove the specified dentry from its directory's case-sensitive index. */
-static void
-dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the next dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match)
{
- avl_tree_remove(&dir->i_children, &child->d_index_node);
-}
+ do {
+ struct avl_tree_node *node;
-/* Remove the specified dentry from its directory's case-insensitive index. */
-static void
-dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
- avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci);
-}
+ node = avl_tree_next_in_order(&ci_match->d_index_node);
+ if (!node)
+ return NULL;
+ ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node);
+ } while (ci_match == dentry);
-/* Return true iff the specified dentry is in its parent directory's
- * case-insensitive index. */
-static bool
-dentry_in_ci_index(const struct wim_dentry *dentry)
-{
- return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci);
+ if (dentry_compare_names(ci_match, dentry, true))
+ return NULL;
+
+ return ci_match;
}
/*
- * Link a dentry into the tree.
+ * Link a dentry into a directory.
*
* @parent:
- * The dentry that will be the parent of @child. It must name a directory.
+ * The directory into which to link the dentry.
*
* @child:
- * The dentry to link. It must be currently unlinked.
+ * The dentry to link into the directory. It must be currently unlinked.
*
- * Returns NULL if successful. If @parent already contains a dentry with the
- * same case-sensitive name as @child, returns a pointer to this duplicate
- * dentry.
+ * Returns NULL if successful; or, if @parent already contains a dentry with the
+ * same case-sensitive name as @child, then a pointer to this duplicate dentry
+ * is returned.
*/
struct wim_dentry *
dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
{
- struct wim_dentry *duplicate;
- struct wim_inode *dir;
+ struct wim_inode *dir = parent->d_inode;
+ struct avl_tree_node *duplicate;
wimlib_assert(parent != child);
-
- dir = parent->d_inode;
-
wimlib_assert(inode_is_directory(dir));
- duplicate = dir_index_child(dir, child);
+ duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node,
+ collate_dentry_names);
if (duplicate)
- return duplicate;
+ return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
- duplicate = dir_index_child_ci(dir, child);
- if (duplicate) {
- list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list);
- avl_tree_node_set_unlinked(&child->d_index_node_ci);
- } else {
- INIT_LIST_HEAD(&child->d_ci_conflict_list);
- }
child->d_parent = parent;
return NULL;
}
-/* Unlink a dentry from the tree. */
+/* Unlink a dentry from its parent directory. */
void
unlink_dentry(struct wim_dentry *dentry)
{
- struct wim_inode *dir;
-
/* Do nothing if the dentry is root or it's already unlinked. Not
* actually necessary based on the current callers, but we do the check
* here to be safe. */
if (unlikely(dentry->d_parent == dentry))
return;
- dir = dentry->d_parent->d_inode;
-
- dir_unindex_child(dir, dentry);
-
- if (dentry_in_ci_index(dentry)) {
-
- dir_unindex_child_ci(dir, dentry);
-
- if (!list_empty(&dentry->d_ci_conflict_list)) {
- /* Make a different case-insensitively-the-same dentry
- * be the "representative" in the search index. */
- struct list_head *next;
- struct wim_dentry *other;
- struct wim_dentry *existing;
-
- next = dentry->d_ci_conflict_list.next;
- other = list_entry(next, struct wim_dentry, d_ci_conflict_list);
- existing = dir_index_child_ci(dir, other);
- wimlib_assert(existing == NULL);
- }
- }
- list_del(&dentry->d_ci_conflict_list);
+ avl_tree_remove(&dentry->d_parent->d_inode->i_children,
+ &dentry->d_index_node);
/* Not actually necessary, but to be safe don't retain the now-obsolete
* parent pointer. */
p++;
if (unlikely(p < end)) {
- inode->i_extra = memdup(p, end - p);
+ inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) +
+ end - p);
if (!inode->i_extra)
return WIMLIB_ERR_NOMEM;
- inode->i_extra_size = end - p;
+ inode->i_extra->size = end - p;
+ memcpy(inode->i_extra->data, p, end - p);
}
return 0;
}
* There will be an unnamed data stream, a reparse point stream, or both an
* unnamed data stream and a reparse point stream. In addition, there may be
* named data streams.
+ *
+ * NOTE: if the file has a reparse point stream or at least one named data
+ * stream, then WIMGAPI puts *all* streams in the extra stream entries and
+ * leaves the default stream hash zeroed. wimlib now does the same. However,
+ * for input we still support the default hash field being used, since wimlib
+ * used to use it and MS software is somewhat accepting of it as well.
*/
static void
assign_stream_types_unencrypted(struct wim_inode *inode)
if (stream_is_named(strm)) {
/* Named data stream */
strm->stream_type = STREAM_TYPE_DATA;
- } else if (!is_zero_hash(strm->_stream_hash)) {
+ } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) {
+ /* Unnamed stream in the extra stream entries, OR the
+ * default stream in the dentry provided that it has a
+ * nonzero hash. */
if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
!found_reparse_point_stream) {
found_reparse_point_stream = true;
found_unnamed_data_stream = true;
strm->stream_type = STREAM_TYPE_DATA;
}
- } else {
- /* If no stream name is specified and the hash is zero,
- * then remember this stream for later so that we can
- * assign it to the unnamed data stream if we don't find
- * a better candidate. */
+ } else if (!unnamed_stream_with_zero_hash) {
unnamed_stream_with_zero_hash = strm;
}
}
- if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL)
- unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA;
+ if (unnamed_stream_with_zero_hash) {
+ int type = STREAM_TYPE_UNKNOWN;
+ if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ !found_reparse_point_stream) {
+ type = STREAM_TYPE_REPARSE_POINT;
+ } else if (!found_unnamed_data_stream) {
+ type = STREAM_TYPE_DATA;
+ }
+ unnamed_stream_with_zero_hash->stream_type = type;
+ }
}
/*
inode->i_num_streams = 1 + num_extra_streams;
- if (likely(inode->i_num_streams <= ARRAY_LEN(inode->i_embedded_streams))) {
- inode->i_streams = inode->i_embedded_streams;
- } else {
+ if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
inode->i_streams = CALLOC(inode->i_num_streams,
sizeof(inode->i_streams[0]));
if (!inode->i_streams)
disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
/* Read the length field */
- length = le64_to_cpu(disk_strm->length);
-
- /* 8-byte align the length */
- length = (length + 7) & ~7;
+ length = ALIGN(le64_to_cpu(disk_strm->length), 8);
/* Make sure the length field is neither so small it doesn't
* include all the fixed-length data nor so large it overflows
struct wim_dentry *dentry;
struct wim_inode *inode;
u16 short_name_nbytes;
- u16 file_name_nbytes;
+ u16 name_nbytes;
u64 calculated_size;
int ret;
- BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
+ STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE);
/* Before reading the whole dentry, we need to read just the length.
* This is because a dentry of length 8 (that is, just the length field)
disk_dentry = (const struct wim_dentry_on_disk*)p;
/* Get dentry length. */
- length = (le64_to_cpu(disk_dentry->length) + 7) & ~7;
+ length = ALIGN(le64_to_cpu(disk_dentry->length), 8);
/* Check for end-of-directory. */
if (length <= 8) {
return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
/* Allocate new dentry structure, along with a preliminary inode. */
- ret = new_dentry_with_timeless_inode(NULL, &dentry);
+ ret = new_dentry_with_new_inode(NULL, false, &dentry);
if (ret)
return ret;
/* Read more fields: some into the dentry, and some into the inode. */
inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
- dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
+ dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
+ inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54);
- /* I don't know what's going on here. It seems like M$ screwed up the
- * reparse points, then put the fields in the same place and didn't
- * document it. So we have some fields we read for reparse points, and
- * some fields in the same place for non-reparse-points. */
if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
- inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
- inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
- inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
+ inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved);
+ inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags);
/* Leave inode->i_ino at 0. Note: this means that WIM cannot
* represent multiple hard links to a reparse point file. */
} else {
- inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
}
* name, and the short name. */
short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
- file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
+ name_nbytes = le16_to_cpu(disk_dentry->name_nbytes);
- if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) {
+ if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) {
ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
goto err_free_dentry;
}
/* We now know the length of the file name and short name. Make sure
* the length of the dentry is large enough to actually hold them. */
- calculated_size = dentry_min_len_with_names(file_name_nbytes,
+ calculated_size = dentry_min_len_with_names(name_nbytes,
short_name_nbytes);
if (unlikely(length < calculated_size)) {
/* Read the filename if present. Note: if the filename is empty, there
* is no null terminator following it. */
- if (file_name_nbytes) {
- dentry->file_name = utf16le_dupz(p, file_name_nbytes);
- if (dentry->file_name == NULL) {
+ if (name_nbytes) {
+ dentry->d_name = utf16le_dupz(p, name_nbytes);
+ if (unlikely(!dentry->d_name)) {
ret = WIMLIB_ERR_NOMEM;
goto err_free_dentry;
}
- dentry->file_name_nbytes = file_name_nbytes;
- p += (u32)file_name_nbytes + 2;
+ dentry->d_name_nbytes = name_nbytes;
+ p += (u32)name_nbytes + 2;
}
/* Read the short filename if present. Note: if there is no short
* filename, there is no null terminator following it. */
if (short_name_nbytes) {
- dentry->short_name = utf16le_dupz(p, short_name_nbytes);
- if (dentry->short_name == NULL) {
+ dentry->d_short_name = utf16le_dupz(p, short_name_nbytes);
+ if (unlikely(!dentry->d_short_name)) {
ret = WIMLIB_ERR_NOMEM;
goto err_free_dentry;
}
- dentry->short_name_nbytes = short_name_nbytes;
+ dentry->d_short_name_nbytes = short_name_nbytes;
p += (u32)short_name_nbytes + 2;
}
return ret;
}
-/* Is the dentry named "." or ".." ? */
static bool
dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
{
- if (dentry->file_name_nbytes <= 4) {
- if (dentry->file_name_nbytes == 4) {
- if (dentry->file_name[0] == cpu_to_le16('.') &&
- dentry->file_name[1] == cpu_to_le16('.'))
+ if (dentry->d_name_nbytes <= 4) {
+ if (dentry->d_name_nbytes == 4) {
+ if (dentry->d_name[0] == cpu_to_le16('.') &&
+ dentry->d_name[1] == cpu_to_le16('.'))
return true;
- } else if (dentry->file_name_nbytes == 2) {
- if (dentry->file_name[0] == cpu_to_le16('.'))
+ } else if (dentry->d_name_nbytes == 2) {
+ if (dentry->d_name[0] == cpu_to_le16('.'))
return true;
}
}
return false;
}
+static bool
+dentry_contains_embedded_null(const struct wim_dentry *dentry)
+{
+ for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++)
+ if (dentry->d_name[i] == cpu_to_le16('\0'))
+ return true;
+ return false;
+}
+
+static bool
+should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry)
+{
+ /* All dentries except the root must be named. */
+ if (!dentry_has_long_name(dentry)) {
+ WARNING("Ignoring unnamed file in directory \"%"TS"\"",
+ dentry_full_path(dir));
+ return true;
+ }
+
+ /* Don't allow files named "." or "..". Such filenames could be used in
+ * path traversal attacks. */
+ if (dentry_is_dot_or_dotdot(dentry)) {
+ WARNING("Ignoring file named \".\" or \"..\" in directory "
+ "\"%"TS"\"", dentry_full_path(dir));
+ return true;
+ }
+
+ /* Don't allow filenames containing embedded null characters. Although
+ * the null character is already considered an unsupported character for
+ * extraction by all targets, it is probably a good idea to just forbid
+ * such names entirely. */
+ if (dentry_contains_embedded_null(dentry)) {
+ WARNING("Ignoring filename with embedded null character in "
+ "directory \"%"TS"\"", dentry_full_path(dir));
+ return true;
+ }
+
+ return false;
+}
+
static int
read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
- struct wim_dentry * restrict dir)
+ struct wim_dentry * restrict dir, unsigned depth)
{
- u64 cur_offset = dir->subdir_offset;
-
- /* Check for cyclic directory structure, which would cause infinite
- * recursion if not handled. */
- for (struct wim_dentry *d = dir->d_parent;
- !dentry_is_root(d); d = d->d_parent)
- {
- if (unlikely(d->subdir_offset == cur_offset)) {
- ERROR("Cyclic directory structure detected: children "
- "of \"%"TS"\" coincide with children of \"%"TS"\"",
- dentry_full_path(dir), dentry_full_path(d));
- return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
- }
+ u64 cur_offset = dir->d_subdir_offset;
+
+ /* Disallow extremely deep or cyclic directory structures */
+ if (unlikely(depth >= 16384)) {
+ ERROR("Directory structure too deep!");
+ return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
}
for (;;) {
if (child == NULL)
return 0;
- /* All dentries except the root should be named. */
- if (unlikely(!dentry_has_long_name(child))) {
- WARNING("Ignoring unnamed dentry in "
- "directory \"%"TS"\"", dentry_full_path(dir));
- free_dentry(child);
- continue;
- }
-
- /* Don't allow files named "." or "..". */
- if (unlikely(dentry_is_dot_or_dotdot(child))) {
- WARNING("Ignoring file named \".\" or \"..\"; "
- "potentially malicious archive!!!");
+ /* Ignore dentries with bad names. */
+ if (unlikely(should_ignore_dentry(dir, child))) {
free_dentry(child);
continue;
}
/* If this child is a directory that itself has children, call
* this procedure recursively. */
- if (child->subdir_offset != 0) {
+ if (child->d_subdir_offset != 0) {
if (likely(dentry_is_directory(child))) {
ret = read_dentry_tree_recursive(buf,
buf_len,
- child);
+ child,
+ depth + 1);
if (ret)
return ret;
} else {
int ret;
struct wim_dentry *root;
- DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset);
-
ret = read_dentry(buf, buf_len, &root_offset, &root);
if (ret)
return ret;
goto err_free_dentry_tree;
}
- if (likely(root->subdir_offset != 0)) {
- ret = read_dentry_tree_recursive(buf, buf_len, root);
+ if (likely(root->d_subdir_offset != 0)) {
+ ret = read_dentry_tree_recursive(buf, buf_len, root, 0);
if (ret)
goto err_free_dentry_tree;
}
disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
- disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
+ disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset);
disk_dentry->unused_1 = cpu_to_le64(0);
disk_dentry->unused_2 = cpu_to_le64(0);
disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
+ disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54);
if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
- disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
- disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
- disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
+ disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved);
+ disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags);
} else {
- disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
disk_dentry->nonreparse.hard_link_group_id =
cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
}
- disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
- disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
+ disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes);
+ disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes);
p += sizeof(struct wim_dentry_on_disk);
wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
if (dentry_has_long_name(dentry))
- p = mempcpy(p, dentry->file_name, (u32)dentry->file_name_nbytes + 2);
+ p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2);
if (dentry_has_short_name(dentry))
- p = mempcpy(p, dentry->short_name, (u32)dentry->short_name_nbytes + 2);
+ p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2);
/* Align to 8-byte boundary */
while ((uintptr_t)p & 7)
*p++ = 0;
- if (inode->i_extra_size) {
+ if (inode->i_extra) {
/* Extra tagged items --- not usually present. */
- p = mempcpy(p, inode->i_extra, inode->i_extra_size);
+ p = mempcpy(p, inode->i_extra->data, inode->i_extra->size);
/* Align to 8-byte boundary */
while ((uintptr_t)p & 7)
const struct wim_inode_stream *efs_strm;
const u8 *efs_hash;
- efs_strm = inode_get_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA,
- NO_STREAM_NAME);
+ efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA);
efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
copy_hash(disk_dentry->default_hash, efs_hash);
disk_dentry->num_extra_streams = cpu_to_le16(0);
*/
bool have_named_data_stream = false;
bool have_reparse_point_stream = false;
- u16 num_extra_streams = 0;
const u8 *unnamed_data_stream_hash = zero_hash;
const u8 *reparse_point_hash;
for (unsigned i = 0; i < inode->i_num_streams; i++) {
}
}
- if (have_reparse_point_stream || have_named_data_stream) {
+ if (unlikely(have_reparse_point_stream || have_named_data_stream)) {
+
+ unsigned num_extra_streams = 0;
copy_hash(disk_dentry->default_hash, zero_hash);
p = write_extra_stream_entry(p, NO_STREAM_NAME,
unnamed_data_stream_hash);
num_extra_streams++;
- } else {
- copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
- }
- for (unsigned i = 0; i < inode->i_num_streams; i++) {
- const struct wim_inode_stream *strm = &inode->i_streams[i];
- if (stream_is_named_data_stream(strm)) {
- p = write_extra_stream_entry(p, strm->stream_name,
- stream_hash(strm));
- num_extra_streams++;
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ const struct wim_inode_stream *strm = &inode->i_streams[i];
+ if (stream_is_named_data_stream(strm)) {
+ p = write_extra_stream_entry(p, strm->stream_name,
+ stream_hash(strm));
+ num_extra_streams++;
+ }
}
+ wimlib_assert(num_extra_streams <= 0xFFFF);
+
+ disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
+ } else {
+ copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
+ disk_dentry->num_extra_streams = cpu_to_le16(0);
}
- disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
}
return p;
static int
write_dir_dentries(struct wim_dentry *dir, void *_pp)
{
- if (dir->subdir_offset != 0) {
+ if (dir->d_subdir_offset != 0) {
u8 **pp = _pp;
u8 *p = *pp;
struct wim_dentry *child;
u8 *
write_dentry_tree(struct wim_dentry *root, u8 *p)
{
- DEBUG("Writing dentry tree.");
-
- wimlib_assert(root != NULL);
-
/* write root dentry and end-of-directory entry following it */
p = write_dentry(root, p);
*(u64*)p = 0;