#define WIM_ADS_ENTRY_DISK_SIZE 38
-/* WIM directory entry (on-disk format) */
+/* On-disk format of a WIM dentry (directory entry), located in the metadata
+ * resource for a WIM image. */
struct wim_dentry_on_disk {
+
+ /* Length of this directory entry in bytes, not including any alternate
+ * data stream entries. Should be a multiple of 8 so that the following
+ * dentry or alternate data stream entry is aligned on an 8-byte
+ * boundary. (If not, wimlib will round it up.)
+ *
+ * It is also possible for this field to be 0. This situation, which is
+ * undocumented, indicates the end of a list of sibling nodes in a
+ * directory. It also means the real length is 8, because the dentry
+ * included only the length field, but that takes up 8 bytes. */
le64 length;
+
+ /* Attributes of the file or directory. This is a bitwise OR of the
+ * FILE_ATTRIBUTE_* constants and should correspond to the value
+ * retrieved by GetFileAttributes() on Windows. */
le32 attributes;
+
+ /* A value that specifies the security descriptor for this file or
+ * directory. If -1, the file or directory has no security descriptor.
+ * Otherwise, it is a 0-based index into the WIM image's table of
+ * security descriptors (see: `struct wim_security_data') */
sle32 security_id;
+
+ /* Offset from the start of the uncompressed metadata resource of this
+ * directory's child directory entries, or 0 if this directory entry
+ * does not correspond to a directory or otherwise does not have any
+ * children. */
le64 subdir_offset;
+
+ /* Reserved fields */
le64 unused_1;
le64 unused_2;
+
+ /* The following three time fields should correspond to those gotten by
+ * calling GetFileTime() on Windows. */
+
+ /* Creation time, in 100-nanosecond intervals since January 1, 1601. */
le64 creation_time;
+
+ /* Last access time, in 100-nanosecond intervals since January 1, 1601. */
le64 last_access_time;
+
+ /* Last write time, in 100-nanosecond intervals since January 1, 1601. */
le64 last_write_time;
+
+ /* Vaguely, the SHA-1 message digest ("hash") of the file's contents.
+ * More specifically, this is for the "unnamed data stream" rather than
+ * any "alternate data streams". This hash value is used to look up the
+ * corresponding entry in the WIM's stream lookup table to actually find
+ * the file contents within the WIM.
+ *
+ * If the file has no unnamed data stream (e.g. is a directory), then
+ * this field will be all zeroes. If the unnamed data stream is empty
+ * (i.e. an "empty file"), then this field is also expected to be all
+ * zeroes. (It will be if wimlib created the WIM image, at least;
+ * otherwise it can't be ruled out that the SHA-1 message digest of 0
+ * bytes of data is given explicitly.)
+ *
+ * If the file has reparse data, then this field will instead specify
+ * the SHA-1 message digest of the reparse data. If it is somehow
+ * possible for a file to have both an unnamed data stream and reparse
+ * data, then this is not handled by wimlib.
+ *
+ * As a further special case, if this field is all zeroes but there is
+ * an alternate data stream entry with no name and a nonzero SHA-1
+ * message digest field, then that hash must be used instead of this
+ * one. (wimlib does not use this quirk on WIM images it creates.)
+ */
u8 unnamed_stream_hash[SHA1_HASH_SIZE];
+
+ /* The format of the following data is not yet completely known and they
+ * do not correspond to Microsoft's documentation.
+ *
+ * If this directory entry is for a reparse point (has
+ * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the
+ * version of the following fields containing the reparse tag is valid.
+ * Furthermore, the field notated as not_rpfixed, as far as I can tell,
+ * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
+ * targets of absolute symbolic links) were done, and otherwise 0.
+ *
+ * If this directory entry is not for a reparse point, then the version
+ * of the following fields containing the hard_link_group_id is valid.
+ * All MS says about this field is that "If this file is part of a hard
+ * link set, all the directory entries in the set will share the same
+ * value in this field.". However, more specifically I have observed
+ * the following:
+ * - If the file is part of a hard link set of size 1, then the
+ * hard_link_group_id should be set to either 0, which is treated
+ * specially as indicating "not hardlinked", or any unique value.
+ * - The specific nonzero values used to identity hard link sets do
+ * not matter, as long as they are unique.
+ * - However, due to bugs in Microsoft's software, it is actually NOT
+ * guaranteed that directory entries that share the same hard link
+ * group ID are actually hard linked to each either. We have to
+ * handle this by using special code to use distinguishing features
+ * (possible because some information about the underlying inode is
+ * repeated in each dentry) to split up these fake hard link groups
+ * into what they actually are supposed to be.
+ */
union {
struct {
le32 rp_unknown_1;
le64 hard_link_group_id;
} _packed_attribute nonreparse;
};
+
+ /* Number of alternate data stream entries that directly follow this
+ * dentry on-disk. */
le16 num_alternate_data_streams;
+
+ /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible
+ * name), if present, in bytes, excluding the null terminator. If this
+ * file has no short name, then this field should be 0. */
le16 short_name_nbytes;
+
+ /* Length of this file's UTF-16LE encoded "long" name, excluding the
+ * null terminator. If this file has no short name, then this field
+ * should be 0. It's expected that only the root dentry has this field
+ * set to 0. */
le16 file_name_nbytes;
- /* Follewed by variable length file name, if file_name_nbytes != 0 */
+ /* Follewed by variable length file name, in UTF16-LE, if
+ * file_name_nbytes != 0. Includes null terminator. */
utf16lechar file_name[];
- /* Followed by variable length short name, if short_name_nbytes != 0 */
+ /* Followed by variable length short name, in UTF16-LE, if
+ * short_name_nbytes != 0. Includes null terminator. */
/*utf16lechar short_name[];*/
} _packed_attribute;
return 0;
if (dentry_is_root(dentry)) {
- full_path = TSTRDUP(T("/"));
+ static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')};
+ full_path = TSTRDUP(_root_path);
if (!full_path)
return WIMLIB_ERR_NOMEM;
full_path_nbytes = 1 * sizeof(tchar);
if (!full_path)
return WIMLIB_ERR_NOMEM;
memcpy(full_path, parent_full_path, parent_full_path_nbytes);
- full_path[parent_full_path_nbytes / sizeof(tchar)] = T('/');
+ full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR;
#if TCHAR_IS_UTF16LE
memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1],
dentry->file_name,
}
}
-/* UNIX: Case-sensitive UTF-16LE dentry or stream name comparison. We call this
- * on Windows as well to distinguish true duplicates from names differing by
- * case only. */
+/* Case-sensitive UTF-16LE dentry or stream name comparison. Used on both UNIX
+ * (always) and Windows (sometimes) */
static int
compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
const utf16lechar *name2, size_t nbytes2)
entry->stream_name_nbytes);
}
+/* Given a UTF-16LE filename and a directory, look up the dentry for the file.
+ * Return it if found, otherwise NULL. This is case-sensitive on UNIX and
+ * case-insensitive on Windows. */
struct wim_dentry *
get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
const utf16lechar *name,
size_t name_nbytes)
{
- struct rb_node *node = dentry->d_inode->i_children.rb_node;
+ struct rb_node *node;
+
+#ifdef __WIN32__
+ node = dentry->d_inode->i_children_case_insensitive.rb_node;
+#else
+ node = dentry->d_inode->i_children.rb_node;
+#endif
+
struct wim_dentry *child;
while (node) {
+ #ifdef __WIN32__
+ child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive);
+ #else
child = rbnode_dentry(node);
+ #endif
int result = compare_utf16le_names(name, name_nbytes,
child->file_name,
child->file_name_nbytes);
}
p = path;
while (1) {
- while (*p == cpu_to_le16('/'))
+ while (*p == cpu_to_le16(WIM_PATH_SEPARATOR))
p++;
if (*p == cpu_to_le16('\0'))
break;
pp = p;
- while (*pp != cpu_to_le16('/') && *pp != cpu_to_le16('\0'))
+ while (*pp != cpu_to_le16(WIM_PATH_SEPARATOR) &&
+ *pp != cpu_to_le16('\0'))
pp++;
cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p,
to_parent_name(tchar *buf, size_t len)
{
ssize_t i = (ssize_t)len - 1;
- while (i >= 0 && buf[i] == T('/'))
+ while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
i--;
- while (i >= 0 && buf[i] != T('/'))
+ while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR)
i--;
- while (i >= 0 && buf[i] == T('/'))
+ while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
i--;
buf[i + 1] = T('\0');
}
#endif
/* HACK: This may instead delete the inode from i_list, but the
* hlist_del() behaves the same as list_del(). */
- hlist_del(&inode->i_hlist);
+ if (!hlist_unhashed(&inode->i_hlist))
+ hlist_del(&inode->i_hlist);
FREE(inode->i_extracted_file);
FREE(inode);
}
for_dentry_in_tree_depth(root, do_free_dentry, lookup_table);
}
+#ifdef __WIN32__
+
+/* Insert a dentry into the case insensitive index for a directory.
+ *
+ * This is a red-black tree, but when multiple dentries share the same
+ * case-insensitive name, only one is inserted into the tree itself; the rest
+ * are connected in a list.
+ */
+static struct wim_dentry *
+dentry_add_child_case_insensitive(struct wim_dentry *parent,
+ struct wim_dentry *child)
+{
+ struct rb_root *root;
+ struct rb_node **new;
+ struct rb_node *rb_parent;
+
+ root = &parent->d_inode->i_children_case_insensitive;
+ new = &root->rb_node;
+ rb_parent = NULL;
+ while (*new) {
+ struct wim_dentry *this = container_of(*new, struct wim_dentry,
+ rb_node_case_insensitive);
+ int result = dentry_compare_names_case_insensitive(child, this);
+
+ rb_parent = *new;
+
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return this;
+ }
+ rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
+ rb_insert_color(&child->rb_node_case_insensitive, root);
+ return NULL;
+}
+#endif
+
/*
* Links a dentry into the directory tree.
*
rb_insert_color(&child->rb_node, root);
#ifdef __WIN32__
- /* Case insensitive child dentry index */
- root = &parent->d_inode->i_children_case_insensitive;
- new = &root->rb_node;
- rb_parent = NULL;
- while (*new) {
- struct wim_dentry *this = container_of(*new, struct wim_dentry,
- rb_node_case_insensitive);
- int result = dentry_compare_names_case_insensitive(child, this);
-
- rb_parent = *new;
-
- if (result < 0)
- new = &((*new)->rb_left);
- else if (result > 0)
- new = &((*new)->rb_right);
- else {
+ {
+ struct wim_dentry *existing;
+ existing = dentry_add_child_case_insensitive(parent, child);
+ if (existing) {
list_add(&child->case_insensitive_conflict_list,
- &this->case_insensitive_conflict_list);
- return NULL;
-
+ &existing->case_insensitive_conflict_list);
+ child->rb_node_case_insensitive.__rb_parent_color = 0;
+ } else {
+ INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
}
}
- rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
- rb_insert_color(&child->rb_node_case_insensitive, root);
- INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
#endif
return NULL;
}
void
unlink_dentry(struct wim_dentry *dentry)
{
- if (!dentry_is_root(dentry)) {
- rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children);
- #ifdef __WIN32__
+ struct wim_dentry *parent = dentry->parent;
+
+ if (parent == dentry)
+ return;
+ rb_erase(&dentry->rb_node, &parent->d_inode->i_children);
+#ifdef __WIN32__
+ if (dentry->rb_node_case_insensitive.__rb_parent_color) {
+ /* This dentry was in the case-insensitive red-black tree. */
rb_erase(&dentry->rb_node_case_insensitive,
- &dentry->parent->d_inode->i_children_case_insensitive);
- list_del(&dentry->case_insensitive_conflict_list);
- #endif
+ &parent->d_inode->i_children_case_insensitive);
+ if (!list_empty(&dentry->case_insensitive_conflict_list)) {
+ /* Make a different case-insensitively-the-same dentry
+ * be the "representative" in the red-black tree. */
+ struct list_head *next;
+ struct wim_dentry *other;
+ struct wim_dentry *existing;
+
+ next = dentry->case_insensitive_conflict_list.next;
+ other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list);
+ existing = dentry_add_child_case_insensitive(parent, other);
+ wimlib_assert(existing == NULL);
+ }
}
+ list_del(&dentry->case_insensitive_conflict_list);
+#endif
}
/*
* fixed-length fields */
if (dentry->length < sizeof(struct wim_dentry_on_disk)) {
ERROR("Directory entry has invalid length of %"PRIu64" bytes",
- dentry->length);
+ entry->length);
return WIMLIB_ERR_INVALID_DENTRY;
}
u64 cur_offset = dentry->subdir_offset;
struct wim_dentry *child;
struct wim_dentry *duplicate;
+ struct wim_dentry *parent;
struct wim_dentry cur_child;
int ret;
if (cur_offset == 0)
return 0;
+ /* Check for cyclic directory structure */
+ for (parent = dentry->parent; !dentry_is_root(parent); parent = parent->parent)
+ {
+ if (unlikely(parent->subdir_offset == cur_offset)) {
+ ERROR("Cyclic directory structure directed: children "
+ "of \"%"TS"\" coincide with children of \"%"TS"\"",
+ dentry_full_path(dentry),
+ dentry_full_path(parent));
+ return WIMLIB_ERR_INVALID_DENTRY;
+ }
+ }
+
/* Find and read all the children of @dentry. */
for (;;) {
* entries. */
cur_offset += dentry_total_length(child);
+ if (unlikely(!dentry_has_long_name(child))) {
+ WARNING("Ignoring unnamed dentry in "
+ "directory \"%"TS"\"",
+ dentry_full_path(dentry));
+ free_dentry(child);
+ continue;
+ }
+
duplicate = dentry_add_child(dentry, child);
- if (duplicate) {
+ if (unlikely(duplicate)) {
const tchar *child_type, *duplicate_type;
child_type = dentry_get_file_type_string(child);
duplicate_type = dentry_get_file_type_string(duplicate);
"at that path with the exact same name)",
child_type, dentry_full_path(duplicate),
duplicate_type);
- } else {
- inode_add_dentry(child, child->d_inode);
- /* If there are children of this child, call this
- * procedure recursively. */
- if (child->subdir_offset != 0) {
- if (dentry_is_directory(child)) {
- ret = read_dentry_tree(metadata_resource,
- metadata_resource_len,
- child);
- if (ret)
- break;
- } else {
- WARNING("Ignoring children of non-directory \"%"TS"\"",
- dentry_full_path(child));
- }
- }
+ free_dentry(child);
+ continue;
+ }
+ inode_add_dentry(child, child->d_inode);
+ /* If there are children of this child, call this
+ * procedure recursively. */
+ if (child->subdir_offset != 0) {
+ if (likely(dentry_is_directory(child))) {
+ ret = read_dentry_tree(metadata_resource,
+ metadata_resource_len,
+ child);
+ if (ret)
+ break;
+ } else {
+ WARNING("Ignoring children of non-directory \"%"TS"\"",
+ dentry_full_path(child));
+ }
}
}
return ret;
disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
p += sizeof(struct wim_dentry_on_disk);
+ wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
+
if (dentry_has_long_name(dentry))
p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2);