Add support for a data recovery mode

[wimlib] / src / write.c
diff --git a/src/write.c b/src/write.c

index cc569395d19c12f3f499b5a68f8523f33e5055bd..e28069ebba707acd49ad53e5aae3c9ade3a01b8a 100644 (file)
--- a/src/write.c
+++ b/src/write.c
@@ -6,7 +6,7 @@
   */
  
  /*
- * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
   *
   * This file is free software; you can redistribute it and/or modify it under
   * the terms of the GNU Lesser General Public License as published by the Free
@@ -288,7 +288,7 @@ write_pwm_blob_header(const struct blob_descriptor *blob,
         blob_hdr.flags = cpu_to_le32(reshdr_flags);
         ret = full_write(out_fd, &blob_hdr, sizeof(blob_hdr));
         if (ret)
-               ERROR_WITH_ERRNO("Write error");
+               ERROR_WITH_ERRNO("Error writing blob header to WIM file");
         return ret;
  }
  
@@ -301,7 +301,8 @@ struct write_blobs_progress_data {
  
  static int
  do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
-                       u64 complete_size, u32 complete_count, bool discarded)
+                       u64 complete_size, u64 complete_compressed_size,
+                       u32 complete_count, bool discarded)
  {
         union wimlib_progress_info *progress = &progress_data->progress;
         int ret;
@@ -316,6 +317,8 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
                 }
         } else {
                 progress->write_streams.completed_bytes += complete_size;
+               progress->write_streams.completed_compressed_bytes +=
+                       complete_compressed_size;
                 progress->write_streams.completed_streams += complete_count;
         }
  
@@ -377,12 +380,6 @@ struct write_blobs_ctx {
          * @blobs_being_compressed only when writing a solid resource.  */
         struct list_head blobs_in_solid_resource;
  
-       /* Current uncompressed offset in the blob being read.  */
-       u64 cur_read_blob_offset;
-
-       /* Uncompressed size of the blob currently being read.  */
-       u64 cur_read_blob_size;
-
         /* Current uncompressed offset in the blob being written.  */
         u64 cur_write_blob_offset;
  
@@ -461,8 +458,11 @@ begin_chunk_table(struct write_blobs_ctx *ctx, u64 res_expected_size)
                         reserve_size += sizeof(struct alt_chunk_table_header_disk);
                 memset(ctx->chunk_csizes, 0, reserve_size);
                 ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size);
-               if (ret)
+               if (ret) {
+                       ERROR_WITH_ERRNO("Error reserving space for chunk "
+                                        "table in WIM file");
                         return ret;
+               }
         }
         return 0;
  }
@@ -588,7 +588,7 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
         return 0;
  
  write_error:
-       ERROR_WITH_ERRNO("Write error");
+       ERROR_WITH_ERRNO("Error writing chunk table to WIM file");
         return ret;
  }
  
@@ -686,9 +686,6 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
  
         wimlib_assert(blob->size > 0);
  
-       ctx->cur_read_blob_offset = 0;
-       ctx->cur_read_blob_size = blob->size;
-
         /* As an optimization, we allow some blobs to be "unhashed", meaning
          * their SHA-1 message digests are unknown.  This is the case with blobs
          * that are added by scanning a directory tree with wimlib_add_image(),
@@ -719,7 +716,9 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
                                  * output reference count to the duplicate blob
                                  * in the former case.  */
                                 ret = do_write_blobs_progress(&ctx->progress_data,
-                                                             blob->size, 1, true);
+                                                             blob->size,
+                                                             blob->size,
+                                                             1, true);
                                 list_del(&blob->write_blobs_list);
                                 list_del(&blob->blob_table_list);
                                 if (new_blob->will_be_in_output_wim)
@@ -768,7 +767,7 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
         if (filedes_seek(out_fd, begin_offset) == -1)
                 return 0;
  
-       ret = extract_blob_to_fd(blob, out_fd);
+       ret = extract_blob_to_fd(blob, out_fd, false);
         if (ret) {
                 /* Error reading the uncompressed data.  */
                 if (out_fd->offset == begin_offset &&
@@ -826,7 +825,7 @@ should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx,
          * Exception: if the compressed size happens to be *exactly* the same as
          * the uncompressed size, then the blob *must* be written uncompressed
          * in order to remain compatible with the Windows Overlay Filesystem
-        * Filter Driver (WOF).
+        * filter driver (WOF).
          *
          * TODO: we are currently assuming that the optimization for
          * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents
@@ -873,8 +872,7 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
  {
         int ret;
         struct blob_descriptor *blob;
-       u32 completed_blob_count;
-       u32 completed_size;
+       u32 completed_blob_count = 0;
  
         blob = list_entry(ctx->blobs_being_compressed.next,
                           struct blob_descriptor, write_blobs_list);
@@ -921,8 +919,6 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
  
         ctx->cur_write_blob_offset += usize;
  
-       completed_size = usize;
-       completed_blob_count = 0;
         if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
                 /* Wrote chunk in solid mode.  It may have finished multiple
                  * blobs.  */
@@ -979,11 +975,11 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
                 }
         }
  
-       return do_write_blobs_progress(&ctx->progress_data, completed_size,
+       return do_write_blobs_progress(&ctx->progress_data, usize, csize,
                                        completed_blob_count, false);
  
  write_error:
-       ERROR_WITH_ERRNO("Write error");
+       ERROR_WITH_ERRNO("Error writing chunk data to WIM file");
         return ret;
  }
  
@@ -1016,7 +1012,8 @@ prepare_chunk_buffer(struct write_blobs_ctx *ctx)
  
  /* Process the next chunk of data to be written to a WIM resource.  */
  static int
-write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
+write_blob_process_chunk(const struct blob_descriptor *blob, u64 offset,
+                        const void *chunk, size_t size, void *_ctx)
  {
         struct write_blobs_ctx *ctx = _ctx;
         int ret;
@@ -1029,7 +1026,6 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
                  ret = write_chunk(ctx, chunk, size, size);
                  if (ret)
                          return ret;
-                ctx->cur_read_blob_offset += size;
                  return 0;
         }
  
@@ -1053,8 +1049,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
                 } else {
                         needed_chunk_size = min(ctx->out_chunk_size,
                                                 ctx->cur_chunk_buf_filled +
-                                                       (ctx->cur_read_blob_size -
-                                                        ctx->cur_read_blob_offset));
+                                                       (blob->size - offset));
                 }
  
                 bytes_consumed = min(chunkend - chunkptr,
@@ -1064,7 +1059,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
                        chunkptr, bytes_consumed);
  
                 chunkptr += bytes_consumed;
-               ctx->cur_read_blob_offset += bytes_consumed;
+               offset += bytes_consumed;
                 ctx->cur_chunk_buf_filled += bytes_consumed;
  
                 if (ctx->cur_chunk_buf_filled == needed_chunk_size) {
@@ -1085,8 +1080,6 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx)
  {
         struct write_blobs_ctx *ctx = _ctx;
  
-       wimlib_assert(ctx->cur_read_blob_offset == ctx->cur_read_blob_size || status);
-
         if (!blob->will_be_in_output_wim) {
                 /* The blob was a duplicate.  Now that its data has finished
                  * being read, it is being discarded in favor of the duplicate
@@ -1240,12 +1233,18 @@ write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc,
  
                         ret = full_pread(in_fd, buf, bytes_to_read,
                                          cur_read_offset);
-                       if (ret)
+                       if (ret) {
+                               ERROR_WITH_ERRNO("Error reading raw data "
+                                                "from WIM file");
                                 return ret;
+                       }
  
                         ret = full_write(out_fd, buf, bytes_to_read);
-                       if (ret)
+                       if (ret) {
+                               ERROR_WITH_ERRNO("Error writing raw data "
+                                                "to WIM file");
                                 return ret;
+                       }
  
                         cur_read_offset += bytes_to_read;
  
@@ -1290,15 +1289,18 @@ write_raw_copy_resources(struct list_head *raw_copy_blobs,
                 blob->rdesc->raw_copy_ok = 1;
  
         list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) {
+               u64 compressed_size = 0;
+
                 if (blob->rdesc->raw_copy_ok) {
                         /* Write each solid resource only one time.  */
                         ret = write_raw_copy_resource(blob->rdesc, out_fd);
                         if (ret)
                                 return ret;
                         blob->rdesc->raw_copy_ok = 0;
+                       compressed_size = blob->rdesc->size_in_wim;
                 }
                 ret = do_write_blobs_progress(progress_data, blob->size,
-                                             1, false);
+                                             compressed_size, 1, false);
                 if (ret)
                         return ret;
         }
@@ -1607,7 +1609,7 @@ write_blob_list(struct list_head *blob_list,
  
         struct read_blob_callbacks cbs = {
                 .begin_blob     = write_blob_begin_read,
-               .consume_chunk  = write_blob_process_chunk,
+               .continue_blob  = write_blob_process_chunk,
                 .end_blob       = write_blob_end_read,
                 .ctx            = &ctx,
         };
@@ -2593,14 +2595,15 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags,
         /* At this point, the header at the beginning of the file has already
          * been written.  */
  
-       /* For efficiency, when wimlib adds an image to the WIM with
-        * wimlib_add_image(), the SHA-1 message digests of files are not
-        * calculated; instead, they are calculated while the files are being
-        * written.  However, this does not work when writing a pipable WIM,
-        * since when writing a blob to a pipable WIM, its SHA-1 message digest
-        * needs to be known before the blob data is written.  Therefore, before
-        * getting much farther, we need to pre-calculate the SHA-1 message
-        * digests of all blobs that will be written.  */
+       /*
+        * For efficiency, wimlib normally delays calculating each newly added
+        * stream's hash until while that stream being written, or just before
+        * it is written.  However, when writing a pipable WIM (potentially to a
+        * pipe), we first have to write the metadata resources, which contain
+        * all the hashes.  Moreover each blob is prefixed with its hash (struct
+        * pwm_blob_hdr).  Thus, we have to calculate all the hashes before
+        * writing anything.
+        */
         ret = wim_checksum_unhashed_blobs(wim);
         if (ret)
                 return ret;
@@ -3248,7 +3251,7 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads)
         wim_name_len = tstrlen(wim->filename);
         tchar tmpfile[wim_name_len + 10];
         tmemcpy(tmpfile, wim->filename, wim_name_len);
-       randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
+       get_random_alnum_chars(tmpfile + wim_name_len, 9);
         tmpfile[wim_name_len + 9] = T('\0');
  
         ret = wimlib_write(wim, tmpfile, WIMLIB_ALL_IMAGES,