X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fextract.c;h=c61dac764dfc612e36872c099100ca0c643af58f;hp=7c401551cc802235e3ea381ffa7dc8ac3e508552;hb=465a76356bbac48a658da7cdc14b1bb764d91a29;hpb=66592dd7dcf325b39ca110ba53fa6443d0b9825f diff --git a/src/extract.c b/src/extract.c index 7c401551..c61dac76 100644 --- a/src/extract.c +++ b/src/extract.c @@ -8,20 +8,18 @@ /* * Copyright (C) 2012, 2013, 2014 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ /* @@ -41,7 +39,13 @@ # include "config.h" #endif +#include +#include +#include +#include + #include "wimlib/apply.h" +#include "wimlib/assert.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" @@ -61,15 +65,8 @@ #include "wimlib/wildcard.h" #include "wimlib/wim.h" -#include -#include -#include -#include -#include - -#define WIMLIB_EXTRACT_FLAG_MULTI_IMAGE 0x80000000 -#define WIMLIB_EXTRACT_FLAG_FROM_PIPE 0x40000000 -#define WIMLIB_EXTRACT_FLAG_IMAGEMODE 0x20000000 +#define WIMLIB_EXTRACT_FLAG_FROM_PIPE 0x80000000 +#define WIMLIB_EXTRACT_FLAG_IMAGEMODE 0x40000000 /* Keep in sync with wimlib.h */ #define WIMLIB_EXTRACT_MASK_PUBLIC \ @@ -91,6 +88,54 @@ WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE | \ WIMLIB_EXTRACT_FLAG_WIMBOOT) +/* Send WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE or + * WIMLIB_PROGRESS_MSG_EXTRACT_METADATA. */ +int +do_file_extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg) +{ + ctx->count_until_file_progress = 500; /* Arbitrary value to limit calls */ + return extract_progress(ctx, msg); +} + +static int +start_file_phase(struct apply_ctx *ctx, uint64_t end_file_count, enum wimlib_progress_msg msg) +{ + ctx->progress.extract.current_file_count = 0; + ctx->progress.extract.end_file_count = end_file_count; + return do_file_extract_progress(ctx, msg); +} + +int +start_file_structure_phase(struct apply_ctx *ctx, uint64_t end_file_count) +{ + return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE); +} + +int +start_file_metadata_phase(struct apply_ctx *ctx, uint64_t end_file_count) +{ + return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA); +} + +static int +end_file_phase(struct apply_ctx *ctx, enum wimlib_progress_msg msg) +{ + ctx->progress.extract.current_file_count = ctx->progress.extract.end_file_count; + return do_file_extract_progress(ctx, msg); +} + +int +end_file_structure_phase(struct apply_ctx *ctx) +{ + return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE); +} + +int +end_file_metadata_phase(struct apply_ctx *ctx) +{ + return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA); +} + /* Check whether the extraction of a dentry should be skipped completely. */ static bool dentry_is_supported(struct wim_dentry *dentry, @@ -132,7 +177,9 @@ read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte, if (ret) goto read_error; - if ((flags & PWM_ALLOW_WIM_HDR) && buf.stream_hdr.magic == PWM_MAGIC) { + if ((flags & PWM_ALLOW_WIM_HDR) && + le64_to_cpu(buf.stream_hdr.magic) == PWM_MAGIC) + { BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr)); ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1, sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr)); @@ -216,7 +263,7 @@ load_streams_from_pipe(struct apply_ctx *ctx, lte_unbind_wim_resource_spec(found_lte); lte_bind_wim_resource_spec(needed_lte, rspec); - ret = (*cbs->begin_stream)(needed_lte, 0, + ret = (*cbs->begin_stream)(needed_lte, cbs->begin_stream_ctx); if (ret) { lte_unbind_wim_resource_spec(needed_lte); @@ -259,34 +306,79 @@ load_streams_from_pipe(struct apply_ctx *ctx, } ret = 0; out: - if (found_lte->resource_location != RESOURCE_IN_WIM) + if (found_lte && found_lte->resource_location != RESOURCE_IN_WIM) FREE(rspec); free_lookup_table_entry(found_lte); return ret; } +/* Creates a temporary file opened for writing. The open file descriptor is + * returned in @fd_ret and its name is returned in @name_ret (dynamically + * allocated). */ +static int +create_temporary_file(struct filedes *fd_ret, tchar **name_ret) +{ + tchar *name; + int open_flags; + int raw_fd; + +retry: + name = ttempnam(NULL, T("wimlib")); + if (!name) { + ERROR_WITH_ERRNO("Failed to create temporary filename"); + return WIMLIB_ERR_NOMEM; + } + + open_flags = O_WRONLY | O_CREAT | O_EXCL | O_BINARY; +#ifdef __WIN32__ + open_flags |= _O_SHORT_LIVED; +#endif + raw_fd = topen(name, open_flags, 0600); + + if (raw_fd < 0) { + if (errno == EEXIST) { + FREE(name); + goto retry; + } + ERROR_WITH_ERRNO("Failed to create temporary file " + "\"%"TS"\"", name); + FREE(name); + return WIMLIB_ERR_OPEN; + } + + filedes_init(fd_ret, raw_fd); + *name_ret = name; + return 0; +} + static int -begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte, - u32 flags, void *_ctx) +begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte, void *_ctx) { struct apply_ctx *ctx = _ctx; ctx->cur_stream = lte; + ctx->cur_stream_offset = 0; - return (*ctx->saved_cbs->begin_stream)(lte, flags, - ctx->saved_cbs->begin_stream_ctx); + if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS)) + return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name); + else + return (*ctx->saved_cbs->begin_stream)(lte, ctx->saved_cbs->begin_stream_ctx); } static int -consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) +extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx) { struct apply_ctx *ctx = _ctx; union wimlib_progress_info *progress = &ctx->progress; int ret; + ctx->cur_stream_offset += size; + if (likely(ctx->supported_features.hard_links)) { progress->extract.completed_bytes += (u64)size * ctx->cur_stream->out_refcnt; + if (ctx->cur_stream_offset == ctx->cur_stream->size) + progress->extract.completed_streams += ctx->cur_stream->out_refcnt; } else { const struct stream_owner *owners = stream_owners(ctx->cur_stream); for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) { @@ -298,6 +390,8 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) d_extraction_alias_node) { progress->extract.completed_bytes += size; + if (ctx->cur_stream_offset == ctx->cur_stream->size) + progress->extract.completed_streams++; } } } @@ -312,13 +406,117 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) { ctx->next_progress = UINT64_MAX; } else { - ctx->next_progress += progress->extract.total_bytes / 128; - if (ctx->next_progress > progress->extract.total_bytes) + /* Send new message as soon as another 1/128 of the + * total has been extracted. (Arbitrary number.) */ + ctx->next_progress = + progress->extract.completed_bytes + + progress->extract.total_bytes / 128; + + /* ... Unless that would be more than 5000000 bytes, in + * which case send the next after the next 5000000 + * bytes. (Another arbitrary number.) */ + if (progress->extract.completed_bytes + 5000000 < + ctx->next_progress) + ctx->next_progress = + progress->extract.completed_bytes + 5000000; + + /* ... But always send a message as soon as we're + * completely done. */ + if (progress->extract.total_bytes < ctx->next_progress) ctx->next_progress = progress->extract.total_bytes; } } - return (*ctx->saved_cbs->consume_chunk)(chunk, size, - ctx->saved_cbs->consume_chunk_ctx); + + if (unlikely(filedes_valid(&ctx->tmpfile_fd))) { + /* Just extracting to temporary file for now. */ + ret = full_write(&ctx->tmpfile_fd, chunk, size); + if (ret) { + ERROR_WITH_ERRNO("Error writing data to " + "temporary file \"%"TS"\"", + ctx->tmpfile_name); + } + return ret; + } else { + return (*ctx->saved_cbs->consume_chunk)(chunk, size, + ctx->saved_cbs->consume_chunk_ctx); + } +} + +static int +extract_from_tmpfile(const tchar *tmpfile_name, struct apply_ctx *ctx) +{ + struct wim_lookup_table_entry tmpfile_lte; + struct wim_lookup_table_entry *orig_lte = ctx->cur_stream; + const struct read_stream_list_callbacks *cbs = ctx->saved_cbs; + int ret; + const u32 orig_refcnt = orig_lte->out_refcnt; + + BUILD_BUG_ON(MAX_OPEN_STREAMS < ARRAY_LEN(orig_lte->inline_stream_owners)); + + struct stream_owner *owners = orig_lte->stream_owners; + + /* Copy the stream's data from the temporary file to each of its + * destinations. + * + * This is executed only in the very uncommon case that a + * single-instance stream is being extracted to more than + * MAX_OPEN_STREAMS locations! */ + + memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry)); + tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK; + tmpfile_lte.file_on_disk = ctx->tmpfile_name; + ret = 0; + for (u32 i = 0; i < orig_refcnt; i++) { + + /* Note: it usually doesn't matter whether we pass the original + * stream entry to callbacks provided by the extraction backend + * as opposed to the tmpfile stream entry, since they shouldn't + * actually read data from the stream other than through the + * read_stream_prefix() call below. But for + * WIMLIB_EXTRACT_FLAG_WIMBOOT mode on Windows it does matter + * because it needs the original stream location in order to + * create the external backing reference. */ + + orig_lte->out_refcnt = 1; + orig_lte->inline_stream_owners[0] = owners[i]; + + ret = (*cbs->begin_stream)(orig_lte, cbs->begin_stream_ctx); + if (ret) + break; + + /* Extra SHA-1 isn't necessary here, but it shouldn't hurt as + * this case is very rare anyway. */ + ret = extract_stream(&tmpfile_lte, tmpfile_lte.size, + cbs->consume_chunk, + cbs->consume_chunk_ctx); + + ret = (*cbs->end_stream)(orig_lte, ret, cbs->end_stream_ctx); + if (ret) + break; + } + FREE(owners); + orig_lte->out_refcnt = 0; + return ret; +} + +static int +end_extract_stream_wrapper(struct wim_lookup_table_entry *stream, + int status, void *_ctx) +{ + struct apply_ctx *ctx = _ctx; + + if (unlikely(filedes_valid(&ctx->tmpfile_fd))) { + filedes_close(&ctx->tmpfile_fd); + if (!status) + status = extract_from_tmpfile(ctx->tmpfile_name, ctx); + filedes_invalidate(&ctx->tmpfile_fd); + tunlink(ctx->tmpfile_name); + FREE(ctx->tmpfile_name); + return status; + } else { + return (*ctx->saved_cbs->end_stream)(stream, status, + ctx->saved_cbs->end_stream_ctx); + } } /* @@ -331,30 +529,34 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) * * This also works if the WIM is being read from a pipe, whereas attempting to * read streams directly (e.g. with read_full_stream_into_buf()) will not. + * + * This also will split up streams that will need to be extracted to more than + * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream. + * Therefore, the apply_operations implementation need not worry about running + * out of file descriptors, unless it might open more than one file descriptor + * per nominal destination (e.g. Win32 currently might because the destination + * file system might not support hard links). */ int extract_stream_list(struct apply_ctx *ctx, const struct read_stream_list_callbacks *cbs) { struct read_stream_list_callbacks wrapper_cbs = { - .begin_stream = begin_extract_stream_with_progress, + .begin_stream = begin_extract_stream_wrapper, .begin_stream_ctx = ctx, - .consume_chunk = consume_chunk_with_progress, + .consume_chunk = extract_chunk_wrapper, .consume_chunk_ctx = ctx, - .end_stream = cbs->end_stream, - .end_stream_ctx = cbs->end_stream_ctx, + .end_stream = end_extract_stream_wrapper, + .end_stream_ctx = ctx, }; - if (ctx->progfunc) { - ctx->saved_cbs = cbs; - cbs = &wrapper_cbs; - } + ctx->saved_cbs = cbs; if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { - return load_streams_from_pipe(ctx, cbs); + return load_streams_from_pipe(ctx, &wrapper_cbs); } else { return read_stream_list(&ctx->stream_list, offsetof(struct wim_lookup_table_entry, extraction_list), - cbs, VERIFY_STREAM_HASHES); + &wrapper_cbs, VERIFY_STREAM_HASHES); } } @@ -535,6 +737,7 @@ destroy_dentry_list(struct list_head *dentry_list) inode = dentry->d_inode; dentry_reset_extraction_list_node(dentry); inode->i_visited = 0; + inode->i_can_externally_back = 0; if ((void *)dentry->d_extraction_name != (void *)dentry->file_name) FREE(dentry->d_extraction_name); dentry->d_extraction_name = NULL; @@ -611,12 +814,14 @@ dentry_calculate_extraction_name(struct wim_dentry *dentry, if (dentry_is_root(dentry)) return 0; +#ifdef WITH_NTFS_3G if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { dentry->d_extraction_name = dentry->file_name; dentry->d_extraction_name_nchars = dentry->file_name_nbytes / sizeof(utf16lechar); return 0; } +#endif if (!ctx->supported_features.case_sensitive_filenames) { struct wim_dentry *other; @@ -803,7 +1008,7 @@ dentry_list_resolve_streams(struct list_head *dentry_list, } static int -ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx, +ref_stream(struct wim_lookup_table_entry *lte, unsigned stream_idx, struct wim_dentry *dentry, struct apply_ctx *ctx) { struct wim_inode *inode = dentry->d_inode; @@ -818,7 +1023,7 @@ ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx, return 0; ctx->progress.extract.total_bytes += lte->size; - ctx->progress.extract.num_streams++; + ctx->progress.extract.total_streams++; if (inode->i_visited) return 0; @@ -874,29 +1079,49 @@ ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx, } static int -dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx) +ref_unnamed_stream(struct wim_dentry *dentry, struct apply_ctx *ctx) { struct wim_inode *inode = dentry->d_inode; int ret; + unsigned stream_idx; + struct wim_lookup_table_entry *stream; - /* The unnamed data stream will always be extracted, except in an - * unlikely case. */ - if (!inode_is_encrypted_directory(inode)) { - u16 stream_idx; - struct wim_lookup_table_entry *stream; + if (unlikely(inode_is_encrypted_directory(inode))) + return 0; - stream = inode_unnamed_stream_resolved(inode, &stream_idx); - ret = ref_stream(stream, stream_idx, dentry, ctx); - if (ret) - return ret; + if (unlikely(ctx->apply_ops->will_externally_back)) { + ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx); + if (ret >= 0) { + if (ret) /* Error */ + return ret; + /* Will externally back */ + return 0; + } + /* Won't externally back */ } + stream = inode_unnamed_stream_resolved(inode, &stream_idx); + return ref_stream(stream, stream_idx, dentry, ctx); +} + +static int +dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx) +{ + struct wim_inode *inode = dentry->d_inode; + int ret; + + /* The unnamed data stream will almost always be extracted, but there + * exist cases in which it won't be. */ + ret = ref_unnamed_stream(dentry, ctx); + if (ret) + return ret; + /* Named data streams will be extracted only if supported in the current * extraction mode and volume, and to avoid complications, if not doing * a linked extraction. */ if (ctx->supported_features.named_data_streams) { - for (u16 i = 0; i < inode->i_num_ads; i++) { - if (!ads_entry_is_named_stream(&inode->i_ads_entries[i])) + for (unsigned i = 0; i < inode->i_num_ads; i++) { + if (!inode->i_ads_entries[i].stream_name_nbytes) continue; ret = ref_stream(inode->i_ads_entries[i].lte, i + 1, dentry, ctx); @@ -1226,6 +1451,8 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, ctx->progress.extract.target = target; } INIT_LIST_HEAD(&ctx->stream_list); + filedes_invalidate(&ctx->tmpfile_fd); + ctx->apply_ops = ops; ret = (*ops->get_supported_features)(target, &ctx->supported_features); if (ret) @@ -1246,16 +1473,21 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, if (ret) goto out_cleanup; + if (unlikely(list_empty(&dentry_list))) { + WARNING("There is nothing to extract!"); + goto out_cleanup; + } + ret = dentry_list_resolve_streams(&dentry_list, ctx); if (ret) goto out_cleanup; + dentry_list_build_inode_alias_lists(&dentry_list); + ret = dentry_list_ref_streams(&dentry_list, ctx); if (ret) goto out_cleanup; - dentry_list_build_inode_alias_lists(&dentry_list); - if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { /* When extracting from a pipe, the number of bytes of data to * extract can't be determined in the normal way (examining the @@ -1313,23 +1545,20 @@ out: static int mkdir_if_needed(const tchar *target) { - struct stat stbuf; - if (tstat(target, &stbuf)) { - if (errno == ENOENT) { - if (tmkdir(target, 0755)) { - ERROR_WITH_ERRNO("Failed to create directory " - "\"%"TS"\"", target); - return WIMLIB_ERR_MKDIR; - } - } else { - ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target); - return WIMLIB_ERR_STAT; - } - } else if (!S_ISDIR(stbuf.st_mode)) { - ERROR("\"%"TS"\" is not a directory", target); - return WIMLIB_ERR_NOTDIR; - } - return 0; + if (!tmkdir(target, 0755)) + return 0; + + if (errno == EEXIST) + return 0; + +#ifdef __WIN32__ + /* _wmkdir() fails with EACCES if called on a drive root directory. */ + if (errno == EACCES) + return 0; +#endif + + ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target); + return WIMLIB_ERR_MKDIR; } /* Make sure the extraction flags make sense, and update them if needed. */ @@ -1360,12 +1589,16 @@ check_extract_flags(const WIMStruct *wim, int *extract_flags_p) } #endif -#ifndef __WIN32__ if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) { +#ifdef __WIN32__ + if (!wim->filename) + return WIMLIB_ERR_NO_FILENAME; +#else ERROR("WIMBoot extraction is only supported on Windows!"); return WIMLIB_ERR_UNSUPPORTED; - } #endif + } + if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX | @@ -1568,8 +1801,6 @@ extract_all_images(WIMStruct *wim, const tchar *target, int extract_flags) int image; const tchar *image_name; - extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE; - if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { ERROR("Cannot extract multiple images in NTFS extraction mode."); return WIMLIB_ERR_INVALID_PARAM; @@ -1791,7 +2022,7 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, if (i == image) { /* Metadata resource is for the image being extracted. * Parse it and save the metadata in memory. */ - ret = read_metadata_resource(pwm, imd); + ret = read_metadata_resource(imd); if (ret) goto out_wimlib_free; imd->modified = 1;