Fixes
authorEric Biggers <ebiggers3@gmail.com>
Sun, 31 Mar 2013 04:13:25 +0000 (23:13 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 31 Mar 2013 04:13:25 +0000 (23:13 -0500)
17 files changed:
src/add_image.c
src/delete_image.c
src/dentry.c
src/dentry.h
src/export_image.c
src/extract_image.c
src/join.c
src/lookup_table.c
src/lookup_table.h
src/mount_image.c
src/ntfs-capture.c
src/resource.c
src/split.c
src/wim.c
src/wimlib_internal.h
src/win32.c
src/write.c

index 904f354..e3f022a 100644 (file)
@@ -113,7 +113,8 @@ unix_capture_regular_file(const char *path,
                lte->file_on_disk = file_on_disk;
                lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
                lte->resource_entry.original_size = size;
-               lookup_table_insert_unhashed(lookup_table, lte, &inode->i_lte);
+               lookup_table_insert_unhashed(lookup_table, lte, inode, 0);
+               inode->i_lte = lte;
        }
        return 0;
 }
index c95819e..14f8e7e 100644 (file)
@@ -69,9 +69,8 @@ wimlib_delete_image(WIMStruct *w, int image)
                put_image_metadata(w->image_metadata[image - 1], w->lookup_table);
 
                /* Get rid of the empty slot in the image metadata array. */
-               memmove(w->image_metadata[image - 1],
-                       &w->image_metadata[image],
-                       (w->hdr.image_count - image) * sizeof(w->image_metadata[0]));
+               for (int i = image - 1; i < w->hdr.image_count - 1; i++)
+                       w->image_metadata[i] = w->image_metadata[i + 1];
 
                /* Decrement the image count. */
                --w->hdr.image_count;
index 16d9b41..7f0c1d0 100644 (file)
@@ -692,8 +692,8 @@ new_timeless_inode()
        if (inode) {
                inode->i_security_id = -1;
                inode->i_nlink = 1;
-       #ifdef WITH_FUSE
                inode->i_next_stream_id = 1;
+       #ifdef WITH_FUSE
                if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) {
                        ERROR_WITH_ERRNO("Error initializing mutex");
                        FREE(inode);
@@ -1025,9 +1025,7 @@ do_inode_add_ads(struct wim_inode *inode, const void *stream_name,
        new_entry = &inode->i_ads_entries[num_ads - 1];
        if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le))
                return NULL;
-#ifdef WITH_FUSE
        new_entry->stream_id = inode->i_next_stream_id++;
-#endif
        inode->i_num_ads = num_ads;
        return new_entry;
 }
index 299abc9..5f7ccd5 100644 (file)
@@ -81,11 +81,9 @@ struct wim_ads_entry {
        /* Stream name (UTF-16LE) */
        utf16lechar *stream_name;
 
-#ifdef WITH_FUSE
        /* Number to identify an alternate data stream even after it's possibly
         * been moved or renamed. */
        u32 stream_id;
-#endif
 };
 
 
@@ -286,15 +284,14 @@ struct wim_inode {
         * noted in the @attributes field.) */
        struct rb_root i_children;
 
+       /* Next alternate data stream ID to be assigned */
+       u32 i_next_stream_id;
+
 #ifdef WITH_FUSE
        /* wimfs file descriptors table for the inode */
        u16 i_num_opened_fds;
        u16 i_num_allocated_fds;
        struct wimfs_fd **i_fds;
-
-       /* Next alternate data stream ID to be assigned */
-       u32 i_next_stream_id;
-
        /* This mutex protects the inode's file descriptors table during
         * read-only mounts.  Read-write mounts are still restricted to 1
         * thread. */
index deeaddb..34564e2 100644 (file)
@@ -47,7 +47,7 @@ inode_allocate_needed_ltes(struct wim_inode *inode,
                                dest_lte = clone_lookup_table_entry(src_lte);
                                if (!dest_lte)
                                        return WIMLIB_ERR_NOMEM;
-                               list_add_tail(&dest_lte->new_stream_list,
+                               list_add_tail(&dest_lte->export_stream_list,
                                              lte_list_head);
                        }
                }
@@ -77,7 +77,7 @@ inode_move_ltes_to_table(struct wim_inode *inode,
                                list_del(next);
                                dest_lte = container_of(next,
                                                        struct wim_lookup_table_entry,
-                                                       new_stream_list);
+                                                       export_stream_list);
                                dest_lte->part_number = 1;
                                dest_lte->refcnt = 0;
                                wimlib_assert(hashes_equal(dest_lte->hash, src_lte->hash));
@@ -186,6 +186,13 @@ wimlib_export_image(WIMStruct *src_wim,
        if (ret)
                return ret;
 
+       ret = wim_checksum_unhashed_streams(src_wim);
+       if (ret)
+               return ret;
+       ret = wim_checksum_unhashed_streams(dest_wim);
+       if (ret)
+               return ret;
+
        if (num_additional_swms) {
                ret = new_joined_lookup_table(src_wim, additional_swms,
                                              num_additional_swms,
@@ -231,6 +238,7 @@ wimlib_export_image(WIMStruct *src_wim,
        /* The `struct image_metadata' is now referenced by both the @src_wim
         * and the @dest_wim. */
        src_imd->refcnt++;
+       src_imd->modified = 1;
 
        /* All memory allocations have been taken care of, so it's no longer
         * possible for this function to fail.  Go ahead and update the lookup
@@ -251,7 +259,7 @@ out_xml_delete_image:
 out_free_ltes:
        {
                struct wim_lookup_table_entry *lte, *tmp;
-               list_for_each_entry_safe(lte, tmp, &lte_list_head, new_stream_list)
+               list_for_each_entry_safe(lte, tmp, &lte_list_head, export_stream_list)
                        free_lookup_table_entry(lte);
        }
 out:
index dff7dd4..fa72edb 100644 (file)
@@ -1017,7 +1017,11 @@ wimlib_extract_image(WIMStruct *w,
        }
 
        ret = verify_swm_set(w, additional_swms, num_additional_swms);
-       if (ret != 0)
+       if (ret)
+               return ret;
+
+       ret = wim_checksum_unhashed_streams(w);
+       if (ret)
                return ret;
 
        if (num_additional_swms) {
index 0d0df25..b5c7c37 100644 (file)
@@ -109,10 +109,10 @@ join_wims(WIMStruct **swms, unsigned num_swms,
        }
 
        if (progress_func) {
-               progress.join.total_bytes        = total_bytes;
-               progress.join.total_parts        = swms[0]->hdr.total_parts;
-               progress.join.completed_bytes    = 0;
-               progress.join.completed_parts    = 0;
+               progress.join.total_bytes     = total_bytes;
+               progress.join.total_parts     = swms[0]->hdr.total_parts;
+               progress.join.completed_bytes = 0;
+               progress.join.completed_parts = 0;
                progress_func(WIMLIB_PROGRESS_MSG_JOIN_STREAMS, &progress);
        }
 
@@ -157,7 +157,6 @@ join_wims(WIMStruct **swms, unsigned num_swms,
        }
 
        /* Write lookup table, XML data, and optional integrity table */
-       joined_wim->hdr.image_count = swms[0]->hdr.image_count;
        for (i = 0; i < num_swms; i++)
                lookup_table_join(joined_wim->lookup_table, swms[i]->lookup_table);
 
@@ -205,8 +204,8 @@ wimlib_join(const tchar * const *swm_names,
        for (i = 0; i < num_swms; i++) {
                ret = wimlib_open_wim(swm_names[i], swm_open_flags, &swms[i],
                                      progress_func);
-               if (ret != 0)
-                       goto out;
+               if (ret)
+                       goto out_free_wims;
 
                /* Don't open all the parts at the same time, in case there are
                 * a lot of them */
@@ -217,20 +216,20 @@ wimlib_join(const tchar * const *swm_names,
        qsort(swms, num_swms, sizeof(swms[0]), cmp_swms_by_part_number);
 
        ret = verify_swm_set(swms[0], &swms[1], num_swms - 1);
-       if (ret != 0)
-               goto out;
+       if (ret)
+               goto out_free_wims;
 
        ret = wimlib_create_new_wim(wimlib_get_compression_type(swms[0]),
                                    &joined_wim);
-       if (ret != 0)
-               goto out;
+       if (ret)
+               goto out_free_wims;
 
        ret = begin_write(joined_wim, output_path, wim_write_flags);
-       if (ret != 0)
-               goto out;
+       if (ret)
+               goto out_free_wims;
        ret = join_wims(swms, num_swms, joined_wim, wim_write_flags,
                        progress_func);
-out:
+out_free_wims:
        for (i = 0; i < num_swms; i++)
                wimlib_free(swms[i]);
        wimlib_free(joined_wim);
index 8ab54b6..0ebaf61 100644 (file)
@@ -88,8 +88,11 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old)
        switch (new->resource_location) {
 #ifdef __WIN32__
        case RESOURCE_WIN32:
+       case RESOURCE_WIN32_ENCRYPTED:
 #endif
+#ifdef WITH_FUSE
        case RESOURCE_IN_STAGING_FILE:
+#endif
        case RESOURCE_IN_FILE_ON_DISK:
                BUILD_BUG_ON((void*)&old->file_on_disk !=
                             (void*)&old->staging_file_name);
@@ -323,8 +326,11 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
        qsort(lte_array, num_streams, sizeof(lte_array[0]),
              cmp_streams_by_wim_position);
        ret = 0;
-       for (size_t i = 0; i < num_streams && ret == 0; i++)
+       for (size_t i = 0; i < num_streams; i++) {
                ret = visitor(lte_array[i], arg);
+               if (ret)
+                       break;
+       }
        FREE(lte_array);
        return ret;
 }
@@ -634,7 +640,8 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out)
        tfprintf(out, T("Reference Count   = %u\n"), lte->refcnt);
 
        if (lte->unhashed) {
-               tfprintf(out, T("(Unhashed, back ptr at %p)\n"), lte->back_ptr);
+               tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"),
+                        lte->back_inode, lte->back_stream_id);
        } else {
                tfprintf(out, T("Hash              = 0x"));
                print_hash(lte->hash, out);
@@ -891,3 +898,68 @@ lookup_table_total_stream_size(struct wim_lookup_table *table)
        for_lookup_table_entry(table, lte_add_stream_size, &total_size);
        return total_size;
 }
+
+struct wim_lookup_table_entry **
+retrieve_lte_pointer(struct wim_lookup_table_entry *lte)
+{
+       wimlib_assert(lte->unhashed);
+       struct wim_inode *inode = lte->back_inode;
+       u32 stream_id = lte->back_stream_id;
+       if (stream_id == 0)
+               return &inode->i_lte;
+       else
+               for (u16 i = 0; i < inode->i_num_ads; i++)
+                       if (inode->i_ads_entries[i].stream_id == stream_id)
+                               return &inode->i_ads_entries[i].lte;
+       wimlib_assert(0);
+       return NULL;
+}
+
+int
+hash_unhashed_stream(struct wim_lookup_table_entry *lte,
+                    struct wim_lookup_table *lookup_table,
+                    struct wim_lookup_table_entry **lte_ret)
+{
+       int ret;
+       struct wim_lookup_table_entry *duplicate_lte;
+       struct wim_lookup_table_entry **back_ptr;
+
+       wimlib_assert(lte->unhashed);
+
+       /* back_ptr must be saved because @back_inode and @back_stream_id are in
+        * union with the SHA1 message digest and will no longer be valid once
+        * the SHA1 has been calculated. */
+       back_ptr = retrieve_lte_pointer(lte);
+
+       ret = sha1_resource(lte);
+       if (ret)
+               return ret;
+
+       /* Look for a duplicate stream */
+       duplicate_lte = __lookup_resource(lookup_table, lte->hash);
+       list_del(&lte->unhashed_list);
+       if (duplicate_lte) {
+               /* We have a duplicate stream.  Transfer the reference counts
+                * from this stream to the duplicate, update the reference to
+                * this stream (in an inode or ads_entry) to point to the
+                * duplicate, then free this stream. */
+               wimlib_assert(!(duplicate_lte->unhashed));
+               duplicate_lte->refcnt += lte->refcnt;
+               duplicate_lte->out_refcnt += lte->refcnt;
+               *back_ptr = duplicate_lte;
+               free_lookup_table_entry(lte);
+               lte = duplicate_lte;
+       } else {
+               /* No duplicate stream, so we need to insert
+                * this stream into the lookup table and treat
+                * it as a hashed stream. */
+               list_del(&lte->unhashed_list);
+               lookup_table_insert(lookup_table, lte);
+               lte->out_refcnt = lte->refcnt;
+               lte->unhashed = 0;
+       }
+       if (lte_ret)
+               *lte_ret = lte;
+       return 0;
+}
+
index 81cb50a..48ec965 100644 (file)
@@ -159,11 +159,14 @@ struct wim_lookup_table_entry {
                 * table. */
                size_t hash_short;
 
-               /* Unhashed entries only (unhashed == 1): this points directly
-                * to the pointer to this 'struct wim_lookup_table_entry'
-                * contained in a 'struct wim_ads_entry' or 'struct wim_inode'.
-                * */
-               struct wim_lookup_table_entry **back_ptr;
+               /* Unhashed entries only (unhashed == 1): these variables make
+                * it possible to find the to the pointer to this 'struct
+                * wim_lookup_table_entry' contained in a 'struct wim_ads_entry'
+                * or 'struct wim_inode'.  */
+               struct {
+                       struct wim_inode *back_inode;
+                       u32 back_stream_id;
+               };
        };
 
        /* When a WIM file is written, out_refcnt starts at 0 and is incremented
@@ -188,17 +191,12 @@ struct wim_lookup_table_entry {
        #endif
        };
 
-       /* Pointer to inode that contains the opened file descriptors to
-        * this stream (valid iff resource_location ==
-        * RESOURCE_IN_STAGING_FILE) */
-       struct wim_inode *lte_inode;
-
        u32 real_refcnt;
 
        union {
-               #ifdef WITH_FUSE
+       #ifdef WITH_FUSE
                u16 num_opened_fds;
-               #endif
+       #endif
 
                /* This field is used for the special hardlink or symlink image
                 * extraction mode.   In these mode, all identical files are linked
@@ -225,6 +223,13 @@ struct wim_lookup_table_entry {
 
                        struct list_head write_streams_list;
                };
+
+       #ifdef WITH_FUSE
+               /* Pointer to inode that contains the opened file descriptors to
+                * this stream (valid when resource_location ==
+                * RESOURCE_IN_STAGING_FILE) */
+               struct wim_inode *lte_inode;
+       #endif
        };
 
        /* Temporary list fields */
@@ -232,7 +237,7 @@ struct wim_lookup_table_entry {
                struct list_head unhashed_list;
                struct list_head swm_stream_list;
                struct list_head extraction_list;
-               struct list_head new_stream_list;
+               struct list_head export_stream_list;
        };
 };
 
@@ -300,6 +305,7 @@ lookup_table_insert(struct wim_lookup_table *table, struct wim_lookup_table_entr
 static inline void
 lookup_table_unlink(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte)
 {
+       wimlib_assert(!lte->unhashed);
        hlist_del(&lte->hash_list);
        wimlib_assert(table->num_entries != 0);
        table->num_entries--;
@@ -485,12 +491,21 @@ lookup_table_total_stream_size(struct wim_lookup_table *table);
 static inline void
 lookup_table_insert_unhashed(struct wim_lookup_table *table,
                             struct wim_lookup_table_entry *lte,
-                            struct wim_lookup_table_entry **back_ptr)
+                            struct wim_inode *back_inode,
+                            u32 back_stream_id)
 {
        lte->unhashed = 1;
+       lte->back_inode = back_inode;
+       lte->back_stream_id = back_stream_id;
        list_add_tail(&lte->unhashed_list, table->unhashed_streams);
-       lte->back_ptr = back_ptr;
-       *back_ptr = lte;
 }
 
+extern int
+hash_unhashed_stream(struct wim_lookup_table_entry *lte,
+                    struct wim_lookup_table *lookup_table,
+                    struct wim_lookup_table_entry **lte_ret);
+
+extern struct wim_lookup_table_entry **
+retrieve_lte_pointer(struct wim_lookup_table_entry *lte);
+
 #endif
index 82b80b9..d0b5d2c 100644 (file)
@@ -642,26 +642,14 @@ extract_resource_to_staging_dir(struct wim_inode *inode,
                }
        }
 
-       new_lte->refcnt            = inode->i_nlink;
-       new_lte->resource_location = RESOURCE_IN_STAGING_FILE;
-       new_lte->staging_file_name = staging_file_name;
-       new_lte->lte_inode         = inode;
-
-       struct wim_lookup_table_entry **back_ptr;
-
-       if (stream_id == 0) {
-               back_ptr = &inode->i_lte;
-       } else {
-               for (u16 i = 0; ; i++) {
-                       wimlib_assert(i < inode->i_num_ads);
-                       if (inode->i_ads_entries[i].stream_id == stream_id) {
-                               back_ptr = &inode->i_ads_entries[i].lte;
-                               break;
-                       }
-               }
-       }
-
-       lookup_table_insert_unhashed(ctx->wim->lookup_table, new_lte, back_ptr);
+       new_lte->refcnt                       = inode->i_nlink;
+       new_lte->resource_location            = RESOURCE_IN_STAGING_FILE;
+       new_lte->staging_file_name            = staging_file_name;
+       new_lte->lte_inode                    = inode;
+       new_lte->resource_entry.original_size = size;
+
+       lookup_table_insert_unhashed(ctx->wim->lookup_table, new_lte,
+                                    inode, stream_id);
        *lte = new_lte;
        return 0;
 out_revert_fd_changes:
@@ -802,21 +790,26 @@ rebuild_wim(struct wimfs_context *ctx, int write_flags,
            wimlib_progress_func_t progress_func)
 {
        int ret;
-       struct wim_lookup_table_entry *lte;
+       struct wim_lookup_table_entry *lte, *tmp;
        WIMStruct *w = ctx->wim;
        struct wim_image_metadata *imd = wim_get_current_image_metadata(ctx->wim);
 
        DEBUG("Closing all staging file descriptors.");
-       image_for_each_unhashed_stream(lte, imd) {
+       image_for_each_unhashed_stream_safe(lte, tmp, imd) {
                ret = inode_close_fds(lte->lte_inode);
                if (ret)
                        return ret;
        }
 
        DEBUG("Freeing entries for zero-length streams");
-       image_for_each_unhashed_stream(lte, imd) {
+       image_for_each_unhashed_stream_safe(lte, tmp, imd) {
+               wimlib_assert(lte->unhashed);
                if (wim_resource_size(lte) == 0) {
-                       *lte->back_ptr = NULL;
+                       print_lookup_table_entry(lte, stderr);
+                       struct wim_lookup_table_entry **back_ptr;
+                       back_ptr = retrieve_lte_pointer(lte);
+                       *back_ptr = NULL;
+                       list_del(&lte->unhashed_list);
                        free_lookup_table_entry(lte);
                }
        }
@@ -1813,6 +1806,7 @@ wimfs_open(const char *path, struct fuse_file_info *fi)
        u16 stream_idx;
        u32 stream_id;
        struct wimfs_context *ctx = wimfs_get_context();
+       struct wim_lookup_table_entry **back_ptr;
 
        ret = lookup_resource(ctx->wim, path, get_lookup_flags(ctx),
                              &dentry, &lte, &stream_idx);
@@ -1821,10 +1815,13 @@ wimfs_open(const char *path, struct fuse_file_info *fi)
 
        inode = dentry->d_inode;
 
-       if (stream_idx == 0)
+       if (stream_idx == 0) {
                stream_id = 0;
-       else
+               back_ptr = &inode->i_lte;
+       } else {
                stream_id = inode->i_ads_entries[stream_idx - 1].stream_id;
+               back_ptr = &inode->i_ads_entries[stream_idx - 1].lte;
+       }
 
        /* The file resource may be in the staging directory (read-write mounts
         * only) or in the WIM.  If it's in the staging directory, we need to
@@ -1840,7 +1837,9 @@ wimfs_open(const char *path, struct fuse_file_info *fi)
                                                      &lte, size, ctx);
                if (ret != 0)
                        return ret;
+               *back_ptr = lte;
        }
+       print_lookup_table_entry(lte, stderr);
 
        ret = alloc_wimfs_fd(inode, stream_id, lte, &fd,
                             wimfs_ctx_readonly(ctx));
@@ -2209,24 +2208,23 @@ wimfs_truncate(const char *path, off_t size)
        if (lte == NULL && size == 0)
                return 0;
 
-       inode = dentry->d_inode;
-       if (stream_idx == 0)
-               stream_id = 0;
-       else
-               stream_id = inode->i_ads_entries[stream_idx - 1].stream_id;
-
        if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
                ret = truncate(lte->staging_file_name, size);
-               if (ret != 0)
+               if (ret)
                        ret = -errno;
+               else
+                       lte->resource_entry.original_size = size;
        } else {
                /* File in WIM.  Extract it to the staging directory, but only
                 * the first @size bytes of it. */
+               inode = dentry->d_inode;
+               if (stream_idx == 0)
+                       stream_id = 0;
+               else
+                       stream_id = inode->i_ads_entries[stream_idx - 1].stream_id;
                ret = extract_resource_to_staging_dir(inode, stream_id,
                                                      &lte, size, ctx);
        }
-       if (ret == 0)
-               lte->resource_entry.original_size = size;
        return ret;
 }
 
@@ -2329,8 +2327,12 @@ wimfs_write(const char *path, const char *buf, size_t size,
                return -errno;
 
        /* Update file size */
-       if (offset + size > fd->f_lte->resource_entry.original_size)
+       if (offset + size > fd->f_lte->resource_entry.original_size) {
+               DEBUG("Update file size %"PRIu64 " => %"PRIu64"",
+                     fd->f_lte->resource_entry.original_size,
+                     offset + size);
                fd->f_lte->resource_entry.original_size = offset + size;
+       }
 
        /* Update timestamps */
        touch_inode(fd->f_inode);
@@ -2444,6 +2446,10 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir,
                        goto out;
        }
 
+       ret = wim_checksum_unhashed_streams(wim);
+       if (ret)
+               goto out;
+
        ret = select_wim_image(wim, image);
        if (ret)
                goto out;
@@ -2486,6 +2492,7 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir,
        ctx.image_inode_list = &imd->inode_list;
        ctx.default_uid = getuid();
        ctx.default_gid = getgid();
+       wimlib_assert(list_empty(&imd->unhashed_streams));
        ctx.wim->lookup_table->unhashed_streams = &imd->unhashed_streams;
        if (mount_flags & WIMLIB_MOUNT_FLAG_STREAM_INTERFACE_WINDOWS)
                ctx.default_lookup_flags = LOOKUP_FLAG_ADS_OK;
index 7f4f2bb..327040b 100644 (file)
@@ -165,7 +165,8 @@ capture_ntfs_streams(struct wim_inode *inode,
        {
                u64 data_size = ntfs_get_attribute_value_length(actx->attr);
                u64 name_length = actx->attr->name_length;
-               struct wim_lookup_table_entry **back_ptr;
+               u32 stream_id;
+
                if (data_size == 0) {
                        if (errno != 0) {
                                ERROR_WITH_ERRNO("Failed to get size of attribute of "
@@ -224,7 +225,8 @@ capture_ntfs_streams(struct wim_inode *inode,
                                ret = WIMLIB_ERR_NTFS_3G;
                                goto out_free_lte;
                        }
-                       back_ptr = &inode->i_lte;
+                       stream_id = 0;
+                       inode->i_lte = lte;
                } else {
                        /* Named data stream.  Put the reference to it in the
                         * alternate data stream entries */
@@ -236,9 +238,13 @@ capture_ntfs_streams(struct wim_inode *inode,
                        if (!new_ads_entry)
                                goto out_free_lte;
                        wimlib_assert(new_ads_entry->stream_name_nbytes == name_length * 2);
-                       back_ptr = &new_ads_entry->lte;
+                       stream_id = new_ads_entry->stream_id;
+                       new_ads_entry->lte = lte;
+               }
+               if (lte) {
+                       lookup_table_insert_unhashed(lookup_table, lte,
+                                                    inode, stream_id);
                }
-               lookup_table_insert_unhashed(lookup_table, lte, back_ptr);
        }
        ret = 0;
        goto out_put_actx;
index 5237b88..dc5b7a8 100644 (file)
@@ -593,7 +593,11 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
        }
        goto out_release_fp;
 read_error:
-       ERROR_WITH_ERRNO("Error reading data from WIM");
+       if (ferror(wim_fp)) {
+               ERROR_WITH_ERRNO("Error reading data from WIM");
+       } else {
+               ERROR("Unexpected EOF in WIM!");
+       }
        ret = WIMLIB_ERR_READ;
 out_release_fp:
        if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ)
@@ -838,6 +842,28 @@ extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
        return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, &fd);
 }
 
+
+static int
+sha1_chunk(const void *buf, size_t len, void *ctx)
+{
+       sha1_update(ctx, buf, len);
+       return 0;
+}
+
+int
+sha1_resource(struct wim_lookup_table_entry *lte)
+{
+       int ret;
+       SHA_CTX sha_ctx;
+
+       sha1_init(&sha_ctx);
+       ret = read_resource_prefix(lte, wim_resource_size(lte),
+                                  sha1_chunk, &sha_ctx, 0);
+       if (ret == 0)
+               sha1_final(lte->hash, &sha_ctx);
+       return ret;
+}
+
 /*
  * Copies the file resource specified by the lookup table entry @lte from the
  * input WIM to the output WIM that has its FILE * given by
index a24ac4b..6a2a0b4 100644 (file)
@@ -145,6 +145,10 @@ wimlib_split(WIMStruct *w, const tchar *swm_name,
 
        write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
 
+       ret = wim_checksum_unhashed_streams(w);
+       if (ret)
+               return ret;
+
        swm_name_len = tstrlen(swm_name);
        tchar swm_base_name[swm_name_len + 20];
 
@@ -190,6 +194,7 @@ wimlib_split(WIMStruct *w, const tchar *swm_name,
        for (int i = 0; i < w->hdr.image_count; i++) {
                struct wim_lookup_table_entry *metadata_lte;
                metadata_lte = w->image_metadata[i]->metadata_lte;
+               print_lookup_table_entry(metadata_lte, stderr);
                ret = copy_resource(metadata_lte, w);
                if (ret)
                        goto out;
index 4060fa5..8f579f2 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -616,6 +616,24 @@ new_image_metadata_array(unsigned num_images)
        return imd_array;
 }
 
+int
+wim_checksum_unhashed_streams(WIMStruct *w)
+{
+       int ret;
+       for (int i = 0; i < w->hdr.image_count; i++) {
+               struct wim_lookup_table_entry *lte, *tmp;
+               list_for_each_entry_safe(lte, tmp,
+                                        &w->image_metadata[i]->unhashed_streams,
+                                        unhashed_list)
+               {
+                       ret = hash_unhashed_stream(lte, w->lookup_table, NULL);
+                       if (ret)
+                               return ret;
+               }
+       }
+       return 0;
+}
+
 /* Frees the memory for the WIMStruct, including all internal memory; also
  * closes all files associated with the WIMStruct.  */
 WIMLIBAPI void
index a7ac5dc..f07935c 100644 (file)
@@ -369,6 +369,9 @@ resource_is_compressed(const struct resource_entry *entry)
 #define image_for_each_unhashed_stream(lte, imd) \
        list_for_each_entry(lte, &imd->unhashed_streams, unhashed_list)
 
+#define image_for_each_unhashed_stream_safe(lte, tmp, imd) \
+       list_for_each_entry_safe(lte, tmp, &imd->unhashed_streams, unhashed_list)
+
 #if 1
 #  define copy_resource_entry(dst, src) memcpy(dst, src, sizeof(struct resource_entry))
 #else
@@ -594,6 +597,9 @@ extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
                           int fd, u64 size);
 
 extern int
+sha1_resource(struct wim_lookup_table_entry *lte);
+
+extern int
 copy_resource(struct wim_lookup_table_entry *lte, void *w);
 
 /* security.c */
@@ -660,6 +666,9 @@ new_image_metadata();
 extern struct wim_image_metadata **
 new_image_metadata_array(unsigned num_images);
 
+extern int
+wim_checksum_unhashed_streams(WIMStruct *w);
+
 /* write.c */
 
 /* Internal use only */
index e8ef17a..f2e7eeb 100644 (file)
@@ -586,13 +586,16 @@ win32_capture_stream(const wchar_t *path,
        lte->resource_location = RESOURCE_WIN32;
        lte->resource_entry.original_size = (u64)dat->StreamSize.QuadPart;
 
-       struct wim_lookup_table_entry **back_ptr;
-       if (is_named_stream)
-               back_ptr = &ads_entry->lte;
-       else
-               back_ptr = &inode->i_lte;
+       u32 stream_id;
+       if (is_named_stream) {
+               stream_id = ads_entry->stream_id;
+               ads_entry->lte = lte;
+       } else {
+               stream_id = 0;
+               inode->i_lte = lte;
+       }
 
-       lookup_table_insert_unhashed(lookup_table, lte, back_ptr);
+       lookup_table_insert_unhashed(lookup_table, lte, inode, stream_id);
 out_free_spath:
        FREE(spath);
 out:
index 159fd57..ba3b315 100644 (file)
@@ -312,6 +312,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
                   int flags)
 {
        struct write_resource_ctx write_ctx;
+       u64 read_size;
        u64 new_size;
        off_t offset;
        int ret;
@@ -343,9 +344,11 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
        {
                flags |= WIMLIB_RESOURCE_FLAG_RAW;
                write_ctx.doing_sha = false;
+               read_size = lte->resource_entry.size;
        } else {
                write_ctx.doing_sha = true;
                sha1_init(&write_ctx.sha_ctx);
+               read_size = lte->resource_entry.original_size;
        }
 
        /* Initialize the chunk table and set the compression function if
@@ -367,8 +370,10 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
         * the data through the write_resource_cb function. */
        write_ctx.out_fp = out_fp;
 try_write_again:
-       ret = read_resource_prefix(lte, wim_resource_size(lte),
+       ret = read_resource_prefix(lte, read_size,
                                   write_resource_cb, &write_ctx, flags);
+       if (ret)
+               goto out_free_chunk_tab;
 
        /* Verify SHA1 message digest of the resource, or set the hash for the
         * first time. */
@@ -602,27 +607,6 @@ do_write_streams_progress(union wimlib_progress_info *progress,
        }
 }
 
-static int
-sha1_chunk(const void *buf, size_t len, void *ctx)
-{
-       sha1_update(ctx, buf, len);
-       return 0;
-}
-
-static int
-sha1_resource(struct wim_lookup_table_entry *lte)
-{
-       int ret;
-       SHA_CTX sha_ctx;
-
-       sha1_init(&sha_ctx);
-       ret = read_resource_prefix(lte, wim_resource_size(lte),
-                                  sha1_chunk, &sha_ctx, 0);
-       if (ret == 0)
-               sha1_final(lte->hash, &sha_ctx);
-       return ret;
-}
-
 enum {
        STREAMS_MERGED = 0,
        STREAMS_NOT_MERGED = 1,
@@ -647,52 +631,21 @@ do_write_stream_list(struct list_head *stream_list,
                                   write_streams_list);
                list_del(&lte->write_streams_list);
                if (lte->unhashed && !lte->unique_size) {
-
                        /* Unhashed stream that shares a size with some other
                         * stream in the WIM we are writing.  The stream must be
                         * checksummed to know if we need to write it or not. */
-                       struct wim_lookup_table_entry *duplicate_lte;
-                       struct wim_lookup_table_entry **back_ptr;
+                       struct wim_lookup_table_entry *tmp;
+                       u32 orig_refcnt = lte->out_refcnt;
 
-                       /* back_ptr must be saved because it's in union with the
-                        * SHA1 message digest and will no longer be valid once
-                        * the SHA1 has been calculated. */
-                       back_ptr = lte->back_ptr;
-
-                       /* Checksum the stream */
-                       ret = sha1_resource(lte);
+                       ret = hash_unhashed_stream(lte,
+                                                  lookup_table,
+                                                  &tmp);
                        if (ret)
                                return ret;
-
-                       /* Look for a duplicate stream */
-                       duplicate_lte = __lookup_resource(lookup_table, lte->hash);
-                       if (duplicate_lte) {
-                               /* We have a duplicate stream.  Transfer the
-                                * reference counts from this stream to the
-                                * duplicate, update the reference to this
-                                * stream (in an inode or ads_entry) to point to
-                                * the duplicate, then free this stream. */
-                               wimlib_assert(!(duplicate_lte->unhashed));
-                               bool is_new_stream = (duplicate_lte->out_refcnt == 0);
-                               duplicate_lte->refcnt += lte->refcnt;
-                               duplicate_lte->out_refcnt += lte->refcnt;
-                               *back_ptr = duplicate_lte;
-                               list_del(&lte->unhashed_list);
-                               free_lookup_table_entry(lte);
-                               lte = duplicate_lte;
-
-                               if (is_new_stream) {
-                                       /* The duplicate stream is one we
-                                        * weren't already planning to write.
-                                        * But, now we must write it.
-                                        *
-                                        * XXX:  Currently, the copy of the
-                                        * stream in the WIM is always chosen
-                                        * for writing, rather than the extra
-                                        * copy we just read (which may be in an
-                                        * external file).  This may not always
-                                        * be fastest. */
-                               } else {
+                       if (tmp != lte) {
+                               lte = tmp;
+                               /* We found a duplicate stream. */
+                               if (orig_refcnt != tmp->out_refcnt) {
                                        /* We have already written, or are going
                                         * to write, the duplicate stream.  So
                                         * just skip to the next stream. */
@@ -700,28 +653,17 @@ do_write_stream_list(struct list_head *stream_list,
                                              wim_resource_size(lte));
                                        goto skip_to_progress;
                                }
-
-                       } else {
-                               /* No duplicate stream, so we need to insert
-                                * this stream into the lookup table and treat
-                                * it as a hashed stream. */
-                               list_del(&lte->unhashed_list);
-                               lookup_table_insert(lookup_table, lte);
-                               lte->out_refcnt = lte->refcnt;
-                               lte->unhashed = 0;
                        }
                }
 
-               /* Here, @lte either a hashed stream or an unhashed stream with
-                * a unique size.  In either case we know that the stream has to
-                * be written.  In either case the SHA1 message digest will be
-                * calculated over the stream while writing it; however, in the
-                * former case this is done merely to check the data, while in
-                * the latter case this is done because we do not have the SHA1
-                * message digest yet.  */
-
+               /* Here, @lte is either a hashed stream or an unhashed stream
+                * with a unique size.  In either case we know that the stream
+                * has to be written.  In either case the SHA1 message digest
+                * will be calculated over the stream while writing it; however,
+                * in the former case this is done merely to check the data,
+                * while in the latter case this is done because we do not have
+                * the SHA1 message digest yet.  */
                wimlib_assert(lte->out_refcnt != 0);
-
                ret = write_wim_resource(lte,
                                         out_fp,
                                         out_ctype,
@@ -1577,11 +1519,8 @@ image_find_streams_to_write(WIMStruct *w)
        ctx = w->private;
        imd = wim_get_current_image_metadata(w);
 
-       image_for_each_unhashed_stream(lte, imd) {
+       image_for_each_unhashed_stream(lte, imd)
                lte->out_refcnt = 0;
-               wimlib_assert(lte->unhashed);
-               wimlib_assert(lte->back_ptr != NULL);
-       }
 
        /* Go through this image's inodes to find any streams that have not been
         * found yet. */
@@ -1697,43 +1636,43 @@ finish_write(WIMStruct *w, int image, int write_flags,
 
        if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
                ret = write_lookup_table(w, image, &hdr.lookup_table_res_entry);
-               if (ret != 0)
-                       goto out;
+               if (ret)
+                       goto out_close_wim;
        }
 
        ret = write_xml_data(w->wim_info, image, out,
                             (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
                              wim_info_get_total_bytes(w->wim_info) : 0,
                             &hdr.xml_res_entry);
-       if (ret != 0)
-               goto out;
+       if (ret)
+               goto out_close_wim;
 
        if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
                if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
                        struct wim_header checkpoint_hdr;
                        memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header));
                        memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry));
-                       if (fseeko(out, 0, SEEK_SET) != 0) {
+                       if (fseeko(out, 0, SEEK_SET)) {
                                ERROR_WITH_ERRNO("Failed to seek to beginning "
                                                 "of WIM being written");
                                ret = WIMLIB_ERR_WRITE;
-                               goto out;
+                               goto out_close_wim;
                        }
                        ret = write_header(&checkpoint_hdr, out);
-                       if (ret != 0)
-                               goto out;
+                       if (ret)
+                               goto out_close_wim;
 
                        if (fflush(out) != 0) {
                                ERROR_WITH_ERRNO("Can't write data to WIM");
                                ret = WIMLIB_ERR_WRITE;
-                               goto out;
+                               goto out_close_wim;
                        }
 
                        if (fseeko(out, 0, SEEK_END) != 0) {
                                ERROR_WITH_ERRNO("Failed to seek to end "
                                                 "of WIM being written");
                                ret = WIMLIB_ERR_WRITE;
-                               goto out;
+                               goto out_close_wim;
                        }
                }
 
@@ -1753,8 +1692,8 @@ finish_write(WIMStruct *w, int image, int write_flags,
                                            new_lookup_table_end,
                                            old_lookup_table_end,
                                            progress_func);
-               if (ret != 0)
-                       goto out;
+               if (ret)
+                       goto out_close_wim;
        } else {
                memset(&hdr.integrity, 0, sizeof(struct resource_entry));
        }
@@ -1790,12 +1729,12 @@ finish_write(WIMStruct *w, int image, int write_flags,
                ERROR_WITH_ERRNO("Failed to seek to beginning of WIM "
                                 "being written");
                ret = WIMLIB_ERR_WRITE;
-               goto out;
+               goto out_close_wim;
        }
 
        ret = write_header(&hdr, out);
        if (ret)
-               goto out;
+               goto out_close_wim;
 
        if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
                if (fflush(out) != 0
@@ -1805,7 +1744,7 @@ finish_write(WIMStruct *w, int image, int write_flags,
                        ret = WIMLIB_ERR_WRITE;
                }
        }
-out:
+out_close_wim:
        if (fclose(out) != 0) {
                ERROR_WITH_ERRNO("Failed to close the WIM file");
                if (ret == 0)
@@ -2038,6 +1977,8 @@ overwrite_wim_inplace(WIMStruct *w, int write_flags,
        if (!w->deletion_occurred && !any_images_modified(w)) {
                /* If no images have been modified and no images have been
                 * deleted, a new lookup table does not need to be written. */
+               DEBUG("Skipping writing lookup table "
+                     "(no images modified or deleted)");
                old_wim_end = w->hdr.lookup_table_res_entry.offset +
                              w->hdr.lookup_table_res_entry.size;
                write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |