X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=47ea13445d76b53dbe4ae5b430a7ec67330a45ea;hp=c62a694f77ae3284e6309ee11d0007df79cb0c20;hb=b40ff7c0765979aedb2766534aa9cee44bfb1b69;hpb=ac0f66feae348981def9e4fcf0af84868ac0a731 diff --git a/src/dentry.c b/src/dentry.c index c62a694f..47ea1344 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -72,18 +72,107 @@ struct wim_ads_entry_on_disk { #define WIM_ADS_ENTRY_DISK_SIZE 38 -/* WIM directory entry (on-disk format) */ +/* On-disk format of a WIM dentry (directory entry), located in the metadata + * resource for a WIM image. */ struct wim_dentry_on_disk { + + /* Length of this directory entry in bytes, not including any alternate + * data stream entries. Should be a multiple of 8 so that the following + * dentry or alternate data stream entry is aligned on an 8-byte + * boundary. (If not, wimlib will round it up.) It must be at least as + * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), + * plus the lengths of the file name and/or short name if present. + * + * It is also possible for this field to be 0. This situation, which is + * undocumented, indicates the end of a list of sibling nodes in a + * directory. It also means the real length is 8, because the dentry + * included only the length field, but that takes up 8 bytes. */ le64 length; + + /* Attributes of the file or directory. This is a bitwise OR of the + * FILE_ATTRIBUTE_* constants and should correspond to the value + * retrieved by GetFileAttributes() on Windows. */ le32 attributes; + + /* A value that specifies the security descriptor for this file or + * directory. If -1, the file or directory has no security descriptor. + * Otherwise, it is a 0-based index into the WIM image's table of + * security descriptors (see: `struct wim_security_data') */ sle32 security_id; + + /* Offset, in bytes, from the start of the uncompressed metadata + * resource of this directory's child directory entries, or 0 if this + * directory entry does not correspond to a directory or otherwise does + * not have any children. */ le64 subdir_offset; + + /* Reserved fields */ le64 unused_1; le64 unused_2; + + + /* Creation time, last access time, and last write time, in + * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They + * should correspond to the times gotten by calling GetFileTime() on + * Windows. */ le64 creation_time; le64 last_access_time; le64 last_write_time; + + /* Vaguely, the SHA-1 message digest ("hash") of the file's contents. + * More specifically, this is for the "unnamed data stream" rather than + * any "alternate data streams". This hash value is used to look up the + * corresponding entry in the WIM's stream lookup table to actually find + * the file contents within the WIM. + * + * If the file has no unnamed data stream (e.g. is a directory), then + * this field will be all zeroes. If the unnamed data stream is empty + * (i.e. an "empty file"), then this field is also expected to be all + * zeroes. (It will be if wimlib created the WIM image, at least; + * otherwise it can't be ruled out that the SHA-1 message digest of 0 + * bytes of data is given explicitly.) + * + * If the file has reparse data, then this field will instead specify + * the SHA-1 message digest of the reparse data. If it is somehow + * possible for a file to have both an unnamed data stream and reparse + * data, then this is not handled by wimlib. + * + * As a further special case, if this field is all zeroes but there is + * an alternate data stream entry with no name and a nonzero SHA-1 + * message digest field, then that hash must be used instead of this + * one. (wimlib does not use this quirk on WIM images it creates.) + */ u8 unnamed_stream_hash[SHA1_HASH_SIZE]; + + /* The format of the following data is not yet completely known and they + * do not correspond to Microsoft's documentation. + * + * If this directory entry is for a reparse point (has + * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the + * version of the following fields containing the reparse tag is valid. + * Furthermore, the field notated as not_rpfixed, as far as I can tell, + * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the + * targets of absolute symbolic links) were *not* done, and otherwise 0. + * + * If this directory entry is not for a reparse point, then the version + * of the following fields containing the hard_link_group_id is valid. + * All MS says about this field is that "If this file is part of a hard + * link set, all the directory entries in the set will share the same + * value in this field.". However, more specifically I have observed + * the following: + * - If the file is part of a hard link set of size 1, then the + * hard_link_group_id should be set to either 0, which is treated + * specially as indicating "not hardlinked", or any unique value. + * - The specific nonzero values used to identity hard link sets do + * not matter, as long as they are unique. + * - However, due to bugs in Microsoft's software, it is actually NOT + * guaranteed that directory entries that share the same hard link + * group ID are actually hard linked to each either. We have to + * handle this by using special code to use distinguishing features + * (which is possible because some information about the underlying + * inode is repeated in each dentry) to split up these fake hard link + * groups into what they actually are supposed to be. + */ union { struct { le32 rp_unknown_1; @@ -96,14 +185,28 @@ struct wim_dentry_on_disk { le64 hard_link_group_id; } _packed_attribute nonreparse; }; + + /* Number of alternate data stream entries that directly follow this + * dentry on-disk. */ le16 num_alternate_data_streams; + + /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible + * name), if present, in bytes, excluding the null terminator. If this + * file has no short name, then this field should be 0. */ le16 short_name_nbytes; + + /* Length of this file's UTF-16LE encoded "long" name, excluding the + * null terminator. If this file has no short name, then this field + * should be 0. It's expected that only the root dentry has this field + * set to 0. */ le16 file_name_nbytes; - /* Follewed by variable length file name, if file_name_nbytes != 0 */ - utf16lechar file_name[]; + /* Followed by variable length file name, in UTF16-LE, if + * file_name_nbytes != 0. Includes null terminator. */ + /*utf16lechar file_name[];*/ - /* Followed by variable length short name, if short_name_nbytes != 0 */ + /* Followed by variable length short name, in UTF16-LE, if + * short_name_nbytes != 0. Includes null terminator. */ /*utf16lechar short_name[];*/ } _packed_attribute; @@ -880,13 +983,6 @@ new_timeless_inode(void) inode->i_next_stream_id = 1; inode->i_not_rpfixed = 1; INIT_LIST_HEAD(&inode->i_list); - #ifdef WITH_FUSE - if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) { - ERROR_WITH_ERRNO("Error initializing mutex"); - FREE(inode); - return NULL; - } - #endif INIT_LIST_HEAD(&inode->i_dentry); } return inode; @@ -1025,16 +1121,10 @@ free_inode(struct wim_inode *inode) destroy_ads_entry(&inode->i_ads_entries[i]); FREE(inode->i_ads_entries); } - #ifdef WITH_FUSE - wimlib_assert(inode->i_num_opened_fds == 0); - FREE(inode->i_fds); - pthread_mutex_destroy(&inode->i_mutex); - #endif /* HACK: This may instead delete the inode from i_list, but the * hlist_del() behaves the same as list_del(). */ if (!hlist_unhashed(&inode->i_hlist)) hlist_del(&inode->i_hlist); - FREE(inode->i_extracted_file); FREE(inode); } } @@ -1900,12 +1990,14 @@ dentry_get_file_type_string(const struct wim_dentry *dentry) * Returns zero on success; nonzero on failure. */ int -read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, - struct wim_dentry *dentry) +read_dentry_tree(const u8 * restrict metadata_resource, + u64 metadata_resource_len, + struct wim_dentry * restrict dentry) { u64 cur_offset = dentry->subdir_offset; struct wim_dentry *child; struct wim_dentry *duplicate; + struct wim_dentry *parent; struct wim_dentry cur_child; int ret; @@ -1918,6 +2010,18 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, if (cur_offset == 0) return 0; + /* Check for cyclic directory structure */ + for (parent = dentry->parent; !dentry_is_root(parent); parent = parent->parent) + { + if (unlikely(parent->subdir_offset == cur_offset)) { + ERROR("Cyclic directory structure directed: children " + "of \"%"TS"\" coincide with children of \"%"TS"\"", + dentry_full_path(dentry), + dentry_full_path(parent)); + return WIMLIB_ERR_INVALID_DENTRY; + } + } + /* Find and read all the children of @dentry. */ for (;;) { @@ -1947,8 +2051,16 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, * entries. */ cur_offset += dentry_total_length(child); + if (unlikely(!dentry_has_long_name(child))) { + WARNING("Ignoring unnamed dentry in " + "directory \"%"TS"\"", + dentry_full_path(dentry)); + free_dentry(child); + continue; + } + duplicate = dentry_add_child(dentry, child); - if (duplicate) { + if (unlikely(duplicate)) { const tchar *child_type, *duplicate_type; child_type = dentry_get_file_type_string(child); duplicate_type = dentry_get_file_type_string(duplicate); @@ -1957,23 +2069,24 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, "at that path with the exact same name)", child_type, dentry_full_path(duplicate), duplicate_type); - } else { - inode_add_dentry(child, child->d_inode); - /* If there are children of this child, call this - * procedure recursively. */ - if (child->subdir_offset != 0) { - if (dentry_is_directory(child)) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, - child); - if (ret) - break; - } else { - WARNING("Ignoring children of non-directory \"%"TS"\"", - dentry_full_path(child)); - } - } + free_dentry(child); + continue; + } + inode_add_dentry(child, child->d_inode); + /* If there are children of this child, call this + * procedure recursively. */ + if (child->subdir_offset != 0) { + if (likely(dentry_is_directory(child))) { + ret = read_dentry_tree(metadata_resource, + metadata_resource_len, + child); + if (ret) + break; + } else { + WARNING("Ignoring children of non-directory \"%"TS"\"", + dentry_full_path(child)); + } } } return ret; @@ -2027,6 +2140,8 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes); p += sizeof(struct wim_dentry_on_disk); + wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry)); + if (dentry_has_long_name(dentry)) p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2); @@ -2123,7 +2238,7 @@ write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p) * Returns pointer to the byte after the last byte we wrote. */ u8 * -write_dentry_tree(const struct wim_dentry *root, u8 *p) +write_dentry_tree(const struct wim_dentry * restrict root, u8 * restrict p) { DEBUG("Writing dentry tree."); wimlib_assert(dentry_is_root(root));