]> wimlib.net Git - wimlib/blobdiff - src/write.c
Fix sequential extraction, and include progress info
[wimlib] / src / write.c
index e48d3c91a8dc95692abc106661c1e56d89eddee9..61a29962f6916521bc1ca255ccb7b39bc2162cb0 100644 (file)
@@ -382,7 +382,8 @@ int write_wim_resource(struct lookup_table_entry *lte,
        /* Are the compression types the same?  If so, do a raw copy (copy
         * without decompressing and recompressing the data). */
        raw = (wim_resource_compression_type(lte) == out_ctype
-              && out_ctype != WIM_COMPRESSION_TYPE_NONE);
+              && out_ctype != WIM_COMPRESSION_TYPE_NONE
+              && !(flags & WIMLIB_RESOURCE_FLAG_RECOMPRESS));
 
        if (raw) {
                flags |= WIMLIB_RESOURCE_FLAG_RAW;
@@ -649,16 +650,17 @@ static void *compressor_thread_proc(void *arg)
 }
 #endif
 
-static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
-                                      u64 total_size, u64 one_percent,
-                                      unsigned *cur_percent,
-                                      const struct lookup_table_entry *cur_lte)
+void show_stream_op_progress(u64 *cur_size, u64 *next_size,
+                            u64 total_size, u64 one_percent,
+                            unsigned *cur_percent,
+                            const struct lookup_table_entry *cur_lte,
+                            const char *op)
 {
        if (*cur_size >= *next_size) {
                printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
-                      "(uncompressed) written (%u%% done)",
+                      "(uncompressed) %s (%u%% done)",
                       *cur_size >> 20,
-                      total_size >> 20, *cur_percent);
+                      total_size >> 20, op, *cur_percent);
                fflush(stdout);
                *next_size += one_percent;
                (*cur_percent)++;
@@ -666,11 +668,11 @@ static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
        *cur_size += wim_resource_size(cur_lte);
 }
 
-static void finish_stream_write_progress(u64 total_size)
+void finish_stream_op_progress(u64 total_size, const char *op)
 {
        printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
-              "(uncompressed) written (100%% done)\n",
-              total_size >> 20, total_size >> 20);
+              "(uncompressed) %s (100%% done)\n",
+              total_size >> 20, total_size >> 20, op);
        fflush(stdout);
 }
 
@@ -685,20 +687,25 @@ static int write_stream_list_serial(struct list_head *stream_list,
        u64 cur_size = 0;
        u64 next_size = 0;
        unsigned cur_percent = 0;
+       int write_resource_flags = 0;
+
+       if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
+               write_resource_flags |= WIMLIB_RESOURCE_FLAG_RECOMPRESS;
 
        list_for_each_entry(lte, stream_list, staging_list) {
                if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
-                       show_stream_write_progress(&cur_size, &next_size,
-                                                  total_size, one_percent,
-                                                  &cur_percent, lte);
+                       show_stream_op_progress(&cur_size, &next_size,
+                                               total_size, one_percent,
+                                               &cur_percent, lte, "written");
                }
                ret = write_wim_resource(lte, out_fp, out_ctype,
-                                        &lte->output_resource_entry, 0);
+                                        &lte->output_resource_entry,
+                                        write_resource_flags);
                if (ret != 0)
                        return ret;
        }
        if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
-               finish_stream_write_progress(total_size);
+               finish_stream_op_progress(total_size, "written");
        return 0;
 }
 
@@ -748,7 +755,6 @@ static int main_writer_thread_proc(struct list_head *stream_list,
 {
        int ret;
 
-
        struct message msgs[queue_size];
        ZERO_ARRAY(msgs);
 
@@ -942,8 +948,9 @@ static int main_writer_thread_proc(struct list_head *stream_list,
                                                        struct lookup_table_entry,
                                                        staging_list);
                                next_resource = next_resource->next;
-                               if ((next_lte->resource_location == RESOURCE_IN_WIM
-                                   && wimlib_get_compression_type(next_lte->wim) == out_ctype)
+                               if ((!(write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
+                                     && next_lte->resource_location == RESOURCE_IN_WIM
+                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
                                    || wim_resource_size(next_lte) == 0)
                                {
                                        list_add_tail(&next_lte->staging_list,
@@ -1004,12 +1011,13 @@ static int main_writer_thread_proc(struct list_head *stream_list,
                        if (msg->begin_chunk == 0) {
                                DEBUG2("Begin chunk tab");
                                if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
-                                       show_stream_write_progress(&cur_size,
-                                                                  &next_size,
-                                                                  total_size,
-                                                                  one_percent,
-                                                                  &cur_percent,
-                                                                  cur_lte);
+                                       show_stream_op_progress(&cur_size,
+                                                               &next_size,
+                                                               total_size,
+                                                               one_percent,
+                                                               &cur_percent,
+                                                               cur_lte,
+                                                               "written");
                                }
 
                                // This is the first set of chunks.  Leave space
@@ -1095,12 +1103,13 @@ static int main_writer_thread_proc(struct list_head *stream_list,
                                                         staging_list)
                                {
                                        if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
-                                               show_stream_write_progress(&cur_size,
-                                                                          &next_size,
-                                                                          total_size,
-                                                                          one_percent,
-                                                                          &cur_percent,
-                                                                          lte);
+                                               show_stream_op_progress(&cur_size,
+                                                                       &next_size,
+                                                                       total_size,
+                                                                       one_percent,
+                                                                       &cur_percent,
+                                                                       lte,
+                                                                       "written");
                                        }
 
                                        ret = write_wim_resource(lte,
@@ -1125,12 +1134,13 @@ out:
        if (ret == 0) {
                list_for_each_entry(lte, &my_resources, staging_list) {
                        if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
-                               show_stream_write_progress(&cur_size,
-                                                          &next_size,
-                                                          total_size,
-                                                          one_percent,
-                                                          &cur_percent,
-                                                          lte);
+                               show_stream_op_progress(&cur_size,
+                                                       &next_size,
+                                                       total_size,
+                                                       one_percent,
+                                                       &cur_percent,
+                                                       lte,
+                                                       "written");
                        }
                        ret = write_wim_resource(lte, out_fp,
                                                 out_ctype,
@@ -1140,7 +1150,7 @@ out:
                                break;
                }
                if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
-                       finish_stream_write_progress(total_size);
+                       finish_stream_op_progress(total_size, "written");
        } else {
                size_t num_available_msgs = 0;
                struct list_head *cur;
@@ -1238,7 +1248,7 @@ static int write_stream_list_parallel(struct list_head *stream_list,
        }
 
        if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
-               printf("Writing %s compressed data using %u threads...\n",
+               printf("Writing %s data using %u threads...\n",
                       get_data_type(out_ctype), num_threads);
        }
 
@@ -1291,13 +1301,21 @@ static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
                num_streams++;
                total_size += wim_resource_size(lte);
                if (!compression_needed
-                   && out_ctype != WIM_COMPRESSION_TYPE_NONE
-                   && (lte->resource_location != RESOURCE_IN_WIM
-                       || wimlib_get_compression_type(lte->wim) != out_ctype)
+                   &&
+                   (out_ctype != WIM_COMPRESSION_TYPE_NONE
+                      && (lte->resource_location != RESOURCE_IN_WIM
+                          || wimlib_get_compression_type(lte->wim) != out_ctype
+                          || (write_flags & WIMLIB_WRITE_FLAG_REBUILD)))
                    && wim_resource_size(lte) != 0)
                        compression_needed = true;
        }
 
+       if (num_streams == 0) {
+               if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE)
+                       printf("No streams to write\n");
+               return 0;
+       }
+
        if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
                printf("Preparing to write %zu streams "
                       "(%"PRIu64" total bytes uncompressed)\n",
@@ -1364,18 +1382,36 @@ static int write_wim_streams(WIMStruct *w, int image, int write_flags,
 }
 
 /*
- * Write the lookup table, xml data, and integrity table, then overwrite the WIM
- * header.
+ * Finish writing a WIM file: write the lookup table, xml data, and integrity
+ * table (optional), then overwrite the WIM header.
+ *
+ * write_flags is a bitwise OR of the following:
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_CHECK_INTEGRITY:
+ *             Include an integrity table.
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_SHOW_PROGRESS:
+ *             Show progress information when (if) writing the integrity table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE:
+ *             Don't write the lookup table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE:
+ *             When (if) writing the integrity table, re-use entries from the
+ *             existing integrity table, if possible.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML:
+ *             After writing the XML data but before writing the integrity
+ *             table, write a temporary WIM header and flush the stream so that
+ *             the WIM is less likely to become corrupted upon abrupt program
+ *             termination.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_FSYNC:
+ *             fsync() the output file before closing it.
+ *
  */
 int finish_write(WIMStruct *w, int image, int write_flags)
 {
-       off_t lookup_table_offset;
-       off_t xml_data_offset;
-       off_t lookup_table_size;
-       off_t integrity_offset;
-       off_t xml_data_size;
-       off_t end_offset;
-       off_t integrity_size;
        int ret;
        struct wim_header hdr;
        FILE *out = w->out_fp;
@@ -1400,11 +1436,48 @@ int finish_write(WIMStruct *w, int image, int write_flags)
                goto out;
 
        if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
+               if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
+                       struct wim_header checkpoint_hdr;
+                       memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header));
+                       memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry));
+                       if (fseeko(out, 0, SEEK_SET) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+                       ret = write_header(&checkpoint_hdr, out);
+                       if (ret != 0)
+                               goto out;
+
+                       if (fflush(out) != 0) {
+                               ERROR_WITH_ERRNO("Can't write data to WIM");
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+
+                       if (fseeko(out, 0, SEEK_END) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+               }
+
+               off_t old_lookup_table_end;
+               off_t new_lookup_table_end;
+               bool show_progress;
+               if (write_flags & WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE) {
+                       old_lookup_table_end = w->hdr.lookup_table_res_entry.offset +
+                                              w->hdr.lookup_table_res_entry.size;
+               } else {
+                       old_lookup_table_end = 0;
+               }
+               new_lookup_table_end = hdr.lookup_table_res_entry.offset +
+                                      hdr.lookup_table_res_entry.size;
+               show_progress = ((write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) != 0);
+
                ret = write_integrity_table(out,
-                                           WIM_HEADER_DISK_SIZE,
-                                           hdr.xml_res_entry.offset,
-                                           write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS,
-                                           &hdr.integrity);
+                                           &hdr.integrity,
+                                           new_lookup_table_end,
+                                           old_lookup_table_end,
+                                           show_progress);
                if (ret != 0)
                        goto out;
        } else {
@@ -1440,8 +1513,6 @@ int finish_write(WIMStruct *w, int image, int write_flags)
 
        if (fseeko(out, 0, SEEK_SET) != 0) {
                ret = WIMLIB_ERR_WRITE;
-               ERROR_WITH_ERRNO("Failed to seek to beginning of WIM "
-                                "to overwrite header");
                goto out;
        }
 
@@ -1455,10 +1526,8 @@ int finish_write(WIMStruct *w, int image, int write_flags)
                {
                        ERROR_WITH_ERRNO("Error flushing data to WIM file");
                        ret = WIMLIB_ERR_WRITE;
-                       goto out;
                }
        }
-
 out:
        if (fclose(out) != 0) {
                ERROR_WITH_ERRNO("Failed to close the WIM file");
@@ -1484,7 +1553,12 @@ static void close_wim_writable(WIMStruct *w)
 int begin_write(WIMStruct *w, const char *path, int write_flags)
 {
        int ret;
-       ret = open_wim_writable(w, path);
+       bool need_readable = false;
+       bool trunc = true;
+       if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
+               need_readable = true;
+
+       ret = open_wim_writable(w, path, trunc, need_readable);
        if (ret != 0)
                return ret;
        /* Write dummy header. It will be overwritten later. */
@@ -1500,7 +1574,7 @@ WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
        if (!w || !path)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
+       write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
 
        if (image != WIM_ALL_IMAGES &&
             (image < 1 || image > w->hdr.image_count))
@@ -1550,11 +1624,11 @@ static int lte_overwrite_prepare(struct lookup_table_entry *lte,
 
 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
 {
-       u64 xml_data_offset = *(u64*)arg;
+       off_t end_offset = *(u64*)arg;
 
        wimlib_assert(lte->out_refcnt <= lte->refcnt);
        if (lte->out_refcnt < lte->refcnt) {
-               if (lte->resource_entry.offset > xml_data_offset) {
+               if (lte->resource_entry.offset + lte->resource_entry.size > end_offset) {
                        ERROR("The following resource is after the XML data:");
                        print_lookup_table_entry(lte);
                        return WIMLIB_ERR_RESOURCE_ORDER;
@@ -1565,7 +1639,6 @@ static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
 
 static int find_new_streams(struct lookup_table_entry *lte, void *arg)
 {
-       wimlib_assert(lte->out_refcnt <= lte->refcnt);
        if (lte->out_refcnt == lte->refcnt)
                list_add(&lte->staging_list, (struct list_head*)arg);
        else
@@ -1573,6 +1646,63 @@ static int find_new_streams(struct lookup_table_entry *lte, void *arg)
        return 0;
 }
 
+/*
+ * Overwrite a WIM, possibly appending streams to it.
+ *
+ * A WIM looks like (or is supposed to look like) the following:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * If we are not adding any streams or metadata resources, the lookup table is
+ * unchanged--- so we only need to overwrite the XML data, integrity table, and
+ * header.  This operation is potentially unsafe if the program is abruptly
+ * terminated while the XML data or integrity table are being overwritten, but
+ * before the new header has been written.  To partially alleviate this problem,
+ * a special flag (WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) is passed to
+ * finish_write() to cause a temporary WIM header to be written after the XML
+ * data has been written.  This may prevent the WIM from becoming corrupted if
+ * the program is terminated while the integrity table is being calculated (but
+ * no guarantees, due to write re-ordering...).
+ *
+ * If we are adding new streams or images (metadata resources), the lookup table
+ * needs to be changed, and those streams need to be written.  In this case, we
+ * try to perform a safe update of the WIM file by writing the streams *after*
+ * the end of the previous WIM, then writing the new lookup table, XML data, and
+ * (optionally) integrity table following the new streams.  This will produce a
+ * layout like the following:
+ *
+ *                   Header (212 bytes)
+ *                   (OLD) Streams and metadata resources (variable size)
+ *                   (OLD) Lookup table (variable size)
+ *                   (OLD) XML data (variable size)
+ *                   (OLD) Integrity table (optional) (variable size)
+ *                   (NEW) Streams and metadata resources (variable size)
+ *                   (NEW) Lookup table (variable size)
+ *                   (NEW) XML data (variable size)
+ *                   (NEW) Integrity table (optional) (variable size)
+ *
+ * At all points, the WIM is valid as nothing points to the new data yet.  Then,
+ * the header is overwritten to point to the new lookup table, XML data, and
+ * integrity table, to produce the following layout:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Nothing (variable size)
+ *                   More Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * This method allows an image to be appended to a large WIM very quickly, and
+ * is is crash-safe except in the case of write re-ordering, but the
+ * disadvantage is that a small hole is left in the WIM where the old lookup
+ * table, xml data, and integrity table were.  (These usually only take up a
+ * small amount of space compared to the streams, however.
+ */
 static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
                                 unsigned num_threads,
                                 int modified_image_idx)
@@ -1606,26 +1736,38 @@ static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
                wimlib_assert(w->image_metadata[i].modified);
                wimlib_assert(!w->image_metadata[i].has_been_mounted_rw);
                wimlib_assert(w->image_metadata[i].root_dentry != NULL);
+               wimlib_assert(w->image_metadata[i].metadata_lte != NULL);
                w->private = &stream_list;
                for_dentry_in_tree(w->image_metadata[i].root_dentry,
                                   dentry_find_streams_to_write, w);
        }
 
+       if (w->hdr.integrity.offset)
+               old_wim_end = w->hdr.integrity.offset + w->hdr.integrity.size;
+       else
+               old_wim_end = w->hdr.xml_res_entry.offset + w->hdr.xml_res_entry.size;
+
        ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
-                                    &w->hdr.xml_res_entry.offset);
+                                    &old_wim_end);
        if (ret != 0)
                return ret;
 
+       if (modified_image_idx == w->hdr.image_count && !w->deletion_occurred) {
+               /* If no images have been modified and no images have been
+                * deleted, a new lookup table does not need to be written. */
+               wimlib_assert(list_empty(&stream_list));
+               old_wim_end = w->hdr.lookup_table_res_entry.offset +
+                             w->hdr.lookup_table_res_entry.size;
+               write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |
+                              WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML;
+       }
+
        INIT_LIST_HEAD(&stream_list);
        for_lookup_table_entry(w->lookup_table, find_new_streams,
                               &stream_list);
 
-       if (w->hdr.integrity.offset)
-               old_wim_end = w->hdr.integrity.offset + w->hdr.integrity.size;
-       else
-               old_wim_end = w->hdr.xml_res_entry.offset + w->hdr.xml_res_entry.size;
-
-       ret = open_wim_writable(w, w->filename);
+       ret = open_wim_writable(w, w->filename, false,
+                               (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) != 0);
        if (ret != 0)
                return ret;
 
@@ -1647,16 +1789,12 @@ static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
        }
 
        for (int i = modified_image_idx; i < w->hdr.image_count; i++) {
-               wimlib_assert(w->image_metadata[i].modified);
-               wimlib_assert(!w->image_metadata[i].has_been_mounted_rw);
-               wimlib_assert(w->image_metadata[i].root_dentry != NULL);
-               wimlib_assert(w->image_metadata[i].metadata_lte != NULL);
-               ret = select_wim_image(w, i + 1);
-               wimlib_assert(ret == 0);
+               select_wim_image(w, i + 1);
                ret = write_metadata_resource(w);
                if (ret != 0)
                        goto out_ftruncate;
        }
+       write_flags |= WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE;
        ret = finish_write(w, WIM_ALL_IMAGES, write_flags);
 out_ftruncate:
        close_wim_writable(w);
@@ -1732,12 +1870,11 @@ err:
 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
                               unsigned num_threads)
 {
-       int ret;
-
        if (!w)
                return WIMLIB_ERR_INVALID_PARAM;
 
-       write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
+       write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
+
        if (!w->filename)
                return WIMLIB_ERR_NO_FILENAME;
 
@@ -1746,7 +1883,9 @@ WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
                return WIMLIB_ERR_SPLIT_UNSUPPORTED;
        }
 
-       if (!w->deletion_occurred && !(write_flags & WIMLIB_WRITE_FLAG_REBUILD)) {
+       if ((!w->deletion_occurred || (write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE))
+           && !(write_flags & WIMLIB_WRITE_FLAG_REBUILD))
+       {
                int i, modified_image_idx;
                for (i = 0; i < w->hdr.image_count && !w->image_metadata[i].modified; i++)
                        ;
@@ -1754,11 +1893,10 @@ WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
                for (; i < w->hdr.image_count && w->image_metadata[i].modified &&
                        !w->image_metadata[i].has_been_mounted_rw; i++)
                        ;
-               // XXX
-               /*if (i == w->hdr.image_count) {*/
-                       /*return overwrite_wim_inplace(w, write_flags, num_threads,*/
-                                                    /*modified_image_idx);*/
-               /*}*/
+               if (i == w->hdr.image_count) {
+                       return overwrite_wim_inplace(w, write_flags, num_threads,
+                                                    modified_image_idx);
+               }
        }
        return overwrite_wim_via_tmpfile(w, write_flags, num_threads);
 }