* a file name and short name that take the specified numbers of bytes. This
* excludes any alternate data stream entries that may follow the dentry. */
static u64
-_dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes)
+dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes)
{
u64 length = sizeof(struct wim_dentry_on_disk);
if (file_name_nbytes)
* ignored; also, this excludes any alternate data stream entries that may
* follow the dentry. */
static u64
-dentry_correct_length_unaligned(const struct wim_dentry *dentry)
+dentry_correct_length_aligned(const struct wim_dentry *dentry)
{
- return _dentry_correct_length_unaligned(dentry->file_name_nbytes,
- dentry->short_name_nbytes);
+ u64 len;
+
+ len = dentry_correct_length_unaligned(dentry->file_name_nbytes,
+ dentry->short_name_nbytes);
+ return (len + 7) & ~7;
}
/* Duplicates a string of system-dependent encoding into a UTF-16LE string and
return (len + 7) & ~7;
}
+/*
+ * Determine whether to include a "dummy" stream when writing a WIM dentry:
+ *
+ * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0,
+ * for example) contain a bug where they assume the first alternate data stream
+ * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed
+ * stream, even if it has a name and the unnamed stream is already specified in
+ * the hash field of the dentry itself.
+ *
+ * wimlib has to work around this behavior by carefully emulating the behavior
+ * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference
+ * into the alternate stream entries whenever there are named data streams, even
+ * though there is already a field in the dentry itself for the unnamed stream
+ * reference, which then goes to waste.
+ */
+static inline bool inode_needs_dummy_stream(const struct wim_inode *inode)
+{
+ return (inode->i_num_ads > 0 &&
+ inode->i_num_ads < 0xffff && /* overflow check */
+ inode->i_canonical_streams); /* assume the dentry is okay if it
+ already had an unnamed ADS entry
+ when it was read in */
+}
-static u64
-_dentry_total_length(const struct wim_dentry *dentry, u64 length)
+/* Calculate the total number of bytes that will be consumed when a WIM dentry
+ * is written. This includes base dentry and name fields as well as all
+ * alternate data stream entries and alignment bytes. */
+u64
+dentry_out_total_length(const struct wim_dentry *dentry)
{
+ u64 length = dentry_correct_length_aligned(dentry);
const struct wim_inode *inode = dentry->d_inode;
+
+ if (inode_needs_dummy_stream(inode))
+ length += ads_entry_total_length(&(struct wim_ads_entry){});
+
for (u16 i = 0; i < inode->i_num_ads; i++)
length += ads_entry_total_length(&inode->i_ads_entries[i]);
- return (length + 7) & ~7;
-}
-/* Calculate the aligned *total* length of an on-disk WIM dentry. This includes
- * all alternate data streams. */
-u64
-dentry_correct_total_length(const struct wim_dentry *dentry)
-{
- return _dentry_total_length(dentry,
- dentry_correct_length_unaligned(dentry));
+ return length;
}
-/* Like dentry_correct_total_length(), but use the existing dentry->length field
- * instead of calculating its "correct" value. */
+/* Calculate the aligned, total length of a dentry, including all alternate data
+ * stream entries. Uses dentry->length. */
static u64
-dentry_total_length(const struct wim_dentry *dentry)
+dentry_in_total_length(const struct wim_dentry *dentry)
{
- return _dentry_total_length(dentry, dentry->length);
+ u64 length = dentry->length;
+ const struct wim_inode *inode = dentry->d_inode;
+ for (u16 i = 0; i < inode->i_num_ads; i++)
+ length += ads_entry_total_length(&inode->i_ads_entries[i]);
+ return (length + 7) & ~7;
}
int
static int
increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p)
{
- *(u64*)subdir_offset_p += dentry_correct_total_length(dentry);
+ *(u64*)subdir_offset_p += dentry_out_total_length(dentry);
return 0;
}
inode->i_nlink = 1;
inode->i_next_stream_id = 1;
inode->i_not_rpfixed = 1;
+ inode->i_canonical_streams = 1;
INIT_LIST_HEAD(&inode->i_list);
INIT_LIST_HEAD(&inode->i_dentry);
}
/*
* Returns the alternate data stream entry belonging to @inode that has the
- * stream name @stream_name.
+ * stream name @stream_name, or NULL if the inode has no alternate data stream
+ * with that name.
+ *
+ * If @p stream_name is the empty string, NULL is returned --- that is, this
+ * function will not return "unnamed" alternate data stream entries.
*/
struct wim_ads_entry *
inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name,
u16 i;
struct wim_ads_entry *result;
+ if (stream_name[0] == T('\0'))
+ return NULL;
+
#if TCHAR_IS_UTF16LE
const utf16lechar *stream_name_utf16le;
struct wim_ads_entry *ads_entries;
struct wim_ads_entry *new_entry;
+ wimlib_assert(stream_name_nbytes != 0);
+
if (inode->i_num_ads >= 0xfffe) {
ERROR("Too many alternate data streams in one inode!");
return NULL;
}
/*
- * Add an alternate stream entry to a WIM inode and return a pointer to it, or
- * NULL if memory could not be allocated.
+ * Add an alternate stream entry to a WIM inode. On success, returns a pointer
+ * to the new entry; on failure, returns NULL.
+ *
+ * @stream_name must be a nonempty string.
*/
struct wim_ads_entry *
inode_add_ads(struct wim_inode *inode, const tchar *stream_name)
struct wim_lookup_table_entry *lte, *existing_lte;
sha1_buffer(buffer, size, hash);
- existing_lte = __lookup_resource(lookup_table, hash);
+ existing_lte = lookup_resource(lookup_table, hash);
if (existing_lte) {
wimlib_assert(wim_resource_size(existing_lte) == size);
lte = existing_lte;
disk_entry->stream_name,
cur_entry->stream_name_nbytes);
cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
+ } else {
+ /* Mark inode as having weird stream entries. */
+ inode->i_canonical_streams = 0;
}
/* It's expected that the size of every ADS entry is a multiple
* The calculated length here is unaligned to allow for the possibility
* that the dentry->length names an unaligned length, although this
* would be unexpected. */
- calculated_size = _dentry_correct_length_unaligned(file_name_nbytes,
- short_name_nbytes);
+ calculated_size = dentry_correct_length_unaligned(file_name_nbytes,
+ short_name_nbytes);
if (dentry->length < calculated_size) {
ERROR("Unexpected end of directory entry! (Expected "
* cur_child.length, which although it does take into account
* the padding, it DOES NOT take into account alternate stream
* entries. */
- cur_offset += dentry_total_length(child);
+ cur_offset += dentry_in_total_length(child);
if (unlikely(!dentry_has_long_name(child))) {
WARNING("Ignoring unnamed dentry in "
return ret;
}
+/*
+ * Writes a WIM alternate data stream (ADS) entry to an output buffer.
+ *
+ * @ads_entry: The ADS entry structure.
+ * @hash: The hash field to use (instead of the one in the ADS entry).
+ * @p: The memory location to write the data to.
+ *
+ * Returns a pointer to the byte after the last byte written.
+ */
+static u8 *
+write_ads_entry(const struct wim_ads_entry *ads_entry,
+ const u8 *hash, u8 * restrict p)
+{
+ struct wim_ads_entry_on_disk *disk_ads_entry =
+ (struct wim_ads_entry_on_disk*)p;
+ u8 *orig_p = p;
+
+ disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
+ copy_hash(disk_ads_entry->hash, hash);
+ disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
+ p += sizeof(struct wim_ads_entry_on_disk);
+ if (ads_entry->stream_name_nbytes) {
+ p = mempcpy(p, ads_entry->stream_name,
+ ads_entry->stream_name_nbytes + 2);
+ }
+ /* Align to 8-byte boundary */
+ while ((uintptr_t)p & 7)
+ *p++ = 0;
+ disk_ads_entry->length = cpu_to_le64(p - orig_p);
+ return p;
+}
+
/*
* Writes a WIM dentry to an output buffer.
*
struct wim_dentry_on_disk *disk_dentry;
const u8 *orig_p;
const u8 *hash;
+ bool use_dummy_stream;
+ u16 num_ads;
wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
orig_p = p;
inode = dentry->d_inode;
+ use_dummy_stream = inode_needs_dummy_stream(inode);
disk_dentry = (struct wim_dentry_on_disk*)p;
disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
- hash = inode_stream_hash(inode, 0);
+ if (use_dummy_stream)
+ hash = zero_hash;
+ else
+ hash = inode_stream_hash(inode, 0);
copy_hash(disk_dentry->unnamed_stream_hash, hash);
if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
disk_dentry->nonreparse.hard_link_group_id =
cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
}
- disk_dentry->num_alternate_data_streams = cpu_to_le16(inode->i_num_ads);
+ num_ads = inode->i_num_ads;
+ if (use_dummy_stream)
+ num_ads++;
+ disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads);
disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
p += sizeof(struct wim_dentry_on_disk);
* have been renamed, thus changing its needed length. */
disk_dentry->length = cpu_to_le64(p - orig_p);
+ if (use_dummy_stream) {
+ hash = inode_unnamed_stream_hash(inode);
+ p = write_ads_entry(&(struct wim_ads_entry){}, hash, p);
+ }
+
/* Write the alternate data streams entries, if any. */
for (u16 i = 0; i < inode->i_num_ads; i++) {
- const struct wim_ads_entry *ads_entry =
- &inode->i_ads_entries[i];
- struct wim_ads_entry_on_disk *disk_ads_entry =
- (struct wim_ads_entry_on_disk*)p;
- orig_p = p;
-
- disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
-
hash = inode_stream_hash(inode, i + 1);
- copy_hash(disk_ads_entry->hash, hash);
- disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
- p += sizeof(struct wim_ads_entry_on_disk);
- if (ads_entry->stream_name_nbytes) {
- p = mempcpy(p, ads_entry->stream_name,
- ads_entry->stream_name_nbytes + 2);
- }
- /* Align to 8-byte boundary */
- while ((uintptr_t)p & 7)
- *p++ = 0;
- disk_ads_entry->length = cpu_to_le64(p - orig_p);
+ p = write_ads_entry(&inode->i_ads_entries[i], hash, p);
}
+
return p;
}
static int
init_wimlib_dentry(struct wimlib_dir_entry *wdentry,
struct wim_dentry *dentry,
- const WIMStruct *wim)
+ const WIMStruct *wim,
+ int flags)
{
int ret;
size_t dummy;
const struct wim_inode *inode = dentry->d_inode;
struct wim_lookup_table_entry *lte;
+ const u8 *hash;
#if TCHAR_IS_UTF16LE
wdentry->filename = dentry->file_name;
wdentry->last_access_time = wim_timestamp_to_timespec(inode->i_last_access_time);
lte = inode_unnamed_lte(inode, wim->lookup_table);
- if (lte)
+ if (lte) {
lte_to_wimlib_resource_entry(lte, &wdentry->streams[0].resource);
+ } else if (!is_zero_hash(hash = inode_unnamed_stream_hash(inode))) {
+ if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED)
+ return resource_not_found_error(inode, hash);
+ copy_hash(wdentry->streams[0].resource.sha1_hash, hash);
+ wdentry->streams[0].resource.is_missing = 1;
+ }
for (unsigned i = 0; i < inode->i_num_ads; i++) {
- if (inode->i_ads_entries[i].stream_name == NULL)
+ if (!ads_entry_is_named_stream(&inode->i_ads_entries[i]))
continue;
lte = inode_stream_lte(inode, i + 1, wim->lookup_table);
wdentry->num_named_streams++;
if (lte) {
lte_to_wimlib_resource_entry(lte, &wdentry->streams[
wdentry->num_named_streams].resource);
+ } else if (!is_zero_hash(hash = inode_stream_hash(inode, i + 1))) {
+ if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED)
+ return resource_not_found_error(inode, hash);
+ copy_hash(wdentry->streams[
+ wdentry->num_named_streams].resource.sha1_hash, hash);
+ wdentry->streams[
+ wdentry->num_named_streams].resource.is_missing = 1;
}
#if TCHAR_IS_UTF16LE
wdentry->streams[wdentry->num_named_streams].stream_name =
if (!wdentry)
goto out;
- ret = init_wimlib_dentry(wdentry, dentry, wim);
+ ret = init_wimlib_dentry(wdentry, dentry, wim, flags);
if (ret)
goto out_free_wimlib_dentry;
return for_image(wim, image, image_do_iterate_dir_tree);
}
+/* Returns %true iff the metadata of @inode and @template_inode are reasonably
+ * consistent with them being the same, unmodified file. */
static bool
-inode_stream_sizes_consistent(const struct wim_inode *inode_1,
- const struct wim_inode *inode_2,
- const struct wim_lookup_table *lookup_table)
+inode_metadata_consistent(const struct wim_inode *inode,
+ const struct wim_inode *template_inode,
+ const struct wim_lookup_table *template_lookup_table)
{
- if (inode_1->i_num_ads != inode_2->i_num_ads)
+ /* Must have exact same creation time and last write time. */
+ if (inode->i_creation_time != template_inode->i_creation_time ||
+ inode->i_last_write_time != template_inode->i_last_write_time)
+ return false;
+
+ /* Last access time may have stayed the same or increased, but certainly
+ * shouldn't have decreased. */
+ if (inode->i_last_access_time < template_inode->i_last_access_time)
+ return false;
+
+ /* Must have same number of alternate data stream entries. */
+ if (inode->i_num_ads != template_inode->i_num_ads)
return false;
- for (unsigned i = 0; i <= inode_1->i_num_ads; i++) {
- const struct wim_lookup_table_entry *lte_1, *lte_2;
- lte_1 = inode_stream_lte(inode_1, i, lookup_table);
- lte_2 = inode_stream_lte(inode_2, i, lookup_table);
- if (lte_1 && lte_2) {
- if (wim_resource_size(lte_1) != wim_resource_size(lte_2))
+ /* If the stream entries for the inode are for some reason not resolved,
+ * then the hashes are already available and the point of this function
+ * is defeated. */
+ if (!inode->i_resolved)
+ return false;
+
+ /* Iterate through each stream and do some more checks. */
+ for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+ const struct wim_lookup_table_entry *lte, *template_lte;
+
+ lte = inode_stream_lte_resolved(inode, i);
+ template_lte = inode_stream_lte(template_inode, i,
+ template_lookup_table);
+
+ /* Compare stream sizes. */
+ if (lte && template_lte) {
+ if (wim_resource_size(lte) != wim_resource_size(template_lte))
return false;
- } else if (lte_1 && wim_resource_size(lte_1)) {
+
+ /* If hash happens to be available, compare with template. */
+ if (!lte->unhashed && !template_lte->unhashed &&
+ !hashes_equal(lte->hash, template_lte->hash))
+ return false;
+
+ } else if (lte && wim_resource_size(lte)) {
return false;
- } else if (lte_2 && wim_resource_size(lte_2)) {
+ } else if (template_lte && wim_resource_size(template_lte)) {
return false;
}
}
+
+ /* All right, barring a full checksum and given that the inodes share a
+ * path and the user isn't trying to trick us, these inodes most likely
+ * refer to the same file. */
return true;
}
-static void
-inode_replace_ltes(struct wim_inode *inode,
- struct wim_inode *template_inode,
- struct wim_lookup_table *lookup_table)
+/**
+ * Given an inode @inode that has been determined to be "the same" as another
+ * inode @template_inode in either the same WIM or another WIM, retrieve some
+ * useful stream information (e.g. checksums) from @template_inode.
+ *
+ * This assumes that the streams for @inode have been resolved (to point
+ * directly to the appropriate `struct wim_lookup_table_entry's) but do not
+ * necessarily have checksum information filled in.
+ */
+static int
+inode_copy_checksums(struct wim_inode *inode,
+ struct wim_inode *template_inode,
+ WIMStruct *wim,
+ WIMStruct *template_wim)
{
for (unsigned i = 0; i <= inode->i_num_ads; i++) {
- struct wim_lookup_table_entry *lte, *lte_template;
+ struct wim_lookup_table_entry *lte, *template_lte;
+ struct wim_lookup_table_entry *replace_lte;
+
+ lte = inode_stream_lte_resolved(inode, i);
+ template_lte = inode_stream_lte(template_inode, i,
+ template_wim->lookup_table);
+
+ /* Only take action if both entries exist, the entry for @inode
+ * has no checksum calculated, but the entry for @template_inode
+ * does. */
+ if (!lte || !template_lte ||
+ !lte->unhashed || template_lte->unhashed)
+ continue;
- lte = inode_stream_lte(inode, i, lookup_table);
- if (lte) {
- for (unsigned j = 0; j < inode->i_nlink; j++)
- lte_decrement_refcnt(lte, lookup_table);
- lte_template = inode_stream_lte(template_inode, i,
- lookup_table);
- if (i == 0)
- inode->i_lte = lte_template;
- else
- inode->i_ads_entries[i - 1].lte = lte_template;
- if (lte_template)
- lte_template->refcnt += inode->i_nlink;
+ wimlib_assert(lte->refcnt == inode->i_nlink);
+
+ /* If the WIM of the template image is the same as the WIM of
+ * the new image, then @template_lte can be used directly.
+ *
+ * Otherwise, look for a stream with the same hash in the WIM of
+ * the new image. If found, use it; otherwise re-use the entry
+ * being discarded, filling in the hash. */
+
+ if (wim == template_wim)
+ replace_lte = template_lte;
+ else
+ replace_lte = lookup_resource(wim->lookup_table,
+ template_lte->hash);
+
+ list_del(<e->unhashed_list);
+ if (replace_lte) {
+ free_lookup_table_entry(lte);
+ } else {
+ copy_hash(lte->hash, template_lte->hash);
+ lte->unhashed = 0;
+ lookup_table_insert(wim->lookup_table, lte);
+ lte->refcnt = 0;
+ replace_lte = lte;
}
+
+ if (i == 0)
+ inode->i_lte = replace_lte;
+ else
+ inode->i_ads_entries[i - 1].lte = replace_lte;
+
+ replace_lte->refcnt += inode->i_nlink;
}
- inode->i_resolved = 1;
+ return 0;
}
+struct reference_template_args {
+ WIMStruct *wim;
+ WIMStruct *template_wim;
+};
+
static int
-dentry_reference_template(struct wim_dentry *dentry, void *_wim)
+dentry_reference_template(struct wim_dentry *dentry, void *_args)
{
int ret;
struct wim_dentry *template_dentry;
struct wim_inode *inode, *template_inode;
- WIMStruct *wim = _wim;
+ struct reference_template_args *args = _args;
+ WIMStruct *wim = args->wim;
+ WIMStruct *template_wim = args->template_wim;
if (dentry->d_inode->i_visited)
return 0;
if (ret)
return ret;
- template_dentry = get_dentry(wim, dentry->_full_path);
+ template_dentry = get_dentry(template_wim, dentry->_full_path);
if (!template_dentry) {
DEBUG("\"%"TS"\": newly added file", dentry->_full_path);
return 0;
inode = dentry->d_inode;
template_inode = template_dentry->d_inode;
- if (inode->i_last_write_time == template_inode->i_last_write_time
- && inode->i_creation_time == template_inode->i_creation_time
- && inode->i_last_access_time >= template_inode->i_last_access_time
- && inode_stream_sizes_consistent(inode, template_inode,
- wim->lookup_table))
- {
+ if (inode_metadata_consistent(inode, template_inode,
+ template_wim->lookup_table)) {
/*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/
- inode_replace_ltes(inode, template_inode, wim->lookup_table);
+ ret = inode_copy_checksums(inode, template_inode,
+ wim, template_wim);
inode->i_visited = 1;
} else {
DEBUG("\"%"TS"\": change detected!", dentry->_full_path);
+ ret = 0;
}
- return 0;
+ return ret;
}
/* API function documented in wimlib.h */
WIMLIBAPI int
-wimlib_reference_template_image(WIMStruct *wim, int new_image, int template_image,
+wimlib_reference_template_image(WIMStruct *wim, int new_image,
+ WIMStruct *template_wim, int template_image,
int flags, wimlib_progress_func_t progress_func)
{
int ret;
struct wim_image_metadata *new_imd;
- if (new_image < 1 || new_image > wim->hdr.image_count)
- return WIMLIB_ERR_INVALID_IMAGE;
-
- if (template_image < 1 || template_image > wim->hdr.image_count)
- return WIMLIB_ERR_INVALID_IMAGE;
+ if (wim == NULL || template_wim == NULL)
+ return WIMLIB_ERR_INVALID_PARAM;
- if (new_image == template_image)
+ if (wim == template_wim && new_image == template_image)
return WIMLIB_ERR_INVALID_PARAM;
+ if (new_image < 1 || new_image > wim->hdr.image_count)
+ return WIMLIB_ERR_INVALID_IMAGE;
+
if (!wim_has_metadata(wim))
return WIMLIB_ERR_METADATA_NOT_FOUND;
if (!new_imd->modified)
return WIMLIB_ERR_INVALID_PARAM;
- ret = select_wim_image(wim, template_image);
+ ret = select_wim_image(template_wim, template_image);
if (ret)
return ret;
+ struct reference_template_args args = {
+ .wim = wim,
+ .template_wim = template_wim,
+ };
+
ret = for_dentry_in_tree(new_imd->root_dentry,
- dentry_reference_template, wim);
+ dentry_reference_template, &args);
dentry_tree_clear_inode_visited(new_imd->root_dentry);
return ret;
}