+/*----------------------------------------------------------------------------*
+ * Fast MFT scan implementation *
+ *----------------------------------------------------------------------------*/
+
+#define ENABLE_FAST_MFT_SCAN 1
+
+#ifdef ENABLE_FAST_MFT_SCAN
+
+typedef struct {
+ u64 StartingCluster;
+ u64 ClusterCount;
+} CLUSTER_RANGE;
+
+typedef struct {
+ u64 StartingFileReferenceNumber;
+ u64 EndingFileReferenceNumber;
+} FILE_REFERENCE_RANGE;
+
+/* The FSCTL_QUERY_FILE_LAYOUT ioctl. This ioctl can be used on Windows 8 and
+ * later to scan the MFT of an NTFS volume. */
+#define FSCTL_QUERY_FILE_LAYOUT CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 157, METHOD_NEITHER, FILE_ANY_ACCESS)
+
+/* The input to FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 NumberOfPairs;
+#define QUERY_FILE_LAYOUT_RESTART 0x00000001
+#define QUERY_FILE_LAYOUT_INCLUDE_NAMES 0x00000002
+#define QUERY_FILE_LAYOUT_INCLUDE_STREAMS 0x00000004
+#define QUERY_FILE_LAYOUT_INCLUDE_EXTENTS 0x00000008
+#define QUERY_FILE_LAYOUT_INCLUDE_EXTRA_INFO 0x00000010
+#define QUERY_FILE_LAYOUT_INCLUDE_STREAMS_WITH_NO_CLUSTERS_ALLOCATED 0x00000020
+ u32 Flags;
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_NONE 0
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_CLUSTERS 1
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_FILEID 2
+#define QUERY_FILE_LAYOUT_NUM_FILTER_TYPES 3
+ u32 FilterType;
+ u32 Reserved;
+ union {
+ CLUSTER_RANGE ClusterRanges[1];
+ FILE_REFERENCE_RANGE FileReferenceRanges[1];
+ } Filter;
+} QUERY_FILE_LAYOUT_INPUT;
+
+/* The header of the buffer returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 FileEntryCount;
+ u32 FirstFileOffset;
+#define QUERY_FILE_LAYOUT_SINGLE_INSTANCED 0x00000001
+ u32 Flags;
+ u32 Reserved;
+} QUERY_FILE_LAYOUT_OUTPUT;
+
+/* Inode information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 Version;
+ u32 NextFileOffset;
+ u32 Flags;
+ u32 FileAttributes;
+ u64 FileReferenceNumber;
+ u32 FirstNameOffset;
+ u32 FirstStreamOffset;
+ u32 ExtraInfoOffset;
+ u32 Reserved;
+} FILE_LAYOUT_ENTRY;
+
+/* Extra inode information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ struct {
+ u64 CreationTime;
+ u64 LastAccessTime;
+ u64 LastWriteTime;
+ u64 ChangeTime;
+ u32 FileAttributes;
+ } BasicInformation;
+ u32 OwnerId;
+ u32 SecurityId;
+ s64 Usn;
+} FILE_LAYOUT_INFO_ENTRY;
+
+/* Filename (or dentry) information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 NextNameOffset;
+#define FILE_LAYOUT_NAME_ENTRY_PRIMARY 0x00000001
+#define FILE_LAYOUT_NAME_ENTRY_DOS 0x00000002
+ u32 Flags;
+ u64 ParentFileReferenceNumber;
+ u32 FileNameLength;
+ u32 Reserved;
+ wchar_t FileName[1];
+} FILE_LAYOUT_NAME_ENTRY;
+
+/* Stream information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 Version;
+ u32 NextStreamOffset;
+#define STREAM_LAYOUT_ENTRY_IMMOVABLE 0x00000001
+#define STREAM_LAYOUT_ENTRY_PINNED 0x00000002
+#define STREAM_LAYOUT_ENTRY_RESIDENT 0x00000004
+#define STREAM_LAYOUT_ENTRY_NO_CLUSTERS_ALLOCATED 0x00000008
+ u32 Flags;
+ u32 ExtentInformationOffset;
+ u64 AllocationSize;
+ u64 EndOfFile;
+ u64 Reserved;
+ u32 AttributeFlags;
+ u32 StreamIdentifierLength;
+ wchar_t StreamIdentifier[1];
+} STREAM_LAYOUT_ENTRY;
+
+
+typedef struct {
+#define STREAM_EXTENT_ENTRY_AS_RETRIEVAL_POINTERS 0x00000001
+#define STREAM_EXTENT_ENTRY_ALL_EXTENTS 0x00000002
+ u32 Flags;
+ union {
+ RETRIEVAL_POINTERS_BUFFER RetrievalPointers;
+ } ExtentInformation;
+} STREAM_EXTENT_ENTRY;
+
+/* Extract the MFT number part of the full inode number */
+#define NTFS_MFT_NO(ref) ((ref) & (((u64)1 << 48) - 1))
+
+/* Is the file the root directory of the NTFS volume? The root directory always
+ * occupies MFT record 5. */
+#define NTFS_IS_ROOT_FILE(ino) (NTFS_MFT_NO(ino) == 5)
+
+/* Is the file a special NTFS file, other than the root directory? The special
+ * files are the first 16 records in the MFT. */
+#define NTFS_IS_SPECIAL_FILE(ino) \
+ (NTFS_MFT_NO(ino) <= 15 && !NTFS_IS_ROOT_FILE(ino))
+
+/* Intermediate inode structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct wim_inode'. */
+struct ntfs_inode {
+ struct avl_tree_node index_node;
+ u64 ino;
+ u64 creation_time;
+ u64 last_access_time;
+ u64 last_write_time;
+ u64 starting_lcn;
+ u32 attributes;
+ u32 security_id;
+ u32 num_aliases;
+ u32 num_streams;
+ u32 first_stream_offset;
+ struct ntfs_dentry *first_child;
+ wchar_t short_name[13];
+};
+
+/* Intermediate dentry structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct wim_dentry'. */
+struct ntfs_dentry {
+ u32 offset_from_inode : 31;
+ u32 is_primary : 1;
+ union {
+ /* Note: build_children_lists() replaces 'parent_ino' with
+ * 'next_child'. */
+ u64 parent_ino;
+ struct ntfs_dentry *next_child;
+ };
+ wchar_t name[0];
+};
+
+/* Intermediate stream structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct
+ * wim_inode_stream'. */
+struct ntfs_stream {
+ u64 size;
+ wchar_t name[0];
+};
+
+/* Map of all known NTFS inodes, keyed by inode number */
+struct ntfs_inode_map {
+ struct avl_tree_node *root;
+};
+
+#define NTFS_INODE(node) \
+ avl_tree_entry((node), struct ntfs_inode, index_node)
+
+#define SKIP_ALIGNED(p, size) ((void *)(p) + ALIGN((size), 8))
+
+/* Get a pointer to the first dentry of the inode. */
+#define FIRST_DENTRY(ni) SKIP_ALIGNED((ni), sizeof(struct ntfs_inode))
+
+/* Get a pointer to the first stream of the inode. */
+#define FIRST_STREAM(ni) ((const void *)ni + ni->first_stream_offset)
+
+/* Advance to the next dentry of the inode. */
+#define NEXT_DENTRY(nd) SKIP_ALIGNED((nd), sizeof(struct ntfs_dentry) + \
+ (wcslen((nd)->name) + 1) * sizeof(wchar_t))
+
+/* Advance to the next stream of the inode. */
+#define NEXT_STREAM(ns) SKIP_ALIGNED((ns), sizeof(struct ntfs_stream) + \
+ (wcslen((ns)->name) + 1) * sizeof(wchar_t))
+
+static int
+_avl_cmp_ntfs_inodes(const struct avl_tree_node *node1,
+ const struct avl_tree_node *node2)
+{
+ return cmp_u64(NTFS_INODE(node1)->ino, NTFS_INODE(node2)->ino);
+}
+
+/* Adds an NTFS inode to the map. */
+static void
+ntfs_inode_map_add_inode(struct ntfs_inode_map *map, struct ntfs_inode *ni)
+{
+ if (avl_tree_insert(&map->root, &ni->index_node, _avl_cmp_ntfs_inodes)) {
+ WARNING("Inode 0x%016"PRIx64" is a duplicate!", ni->ino);
+ FREE(ni);
+ }
+}
+
+/* Find an ntfs_inode in the map by inode number. Returns NULL if not found. */
+static struct ntfs_inode *
+ntfs_inode_map_lookup(struct ntfs_inode_map *map, u64 ino)
+{
+ struct ntfs_inode tmp;
+ struct avl_tree_node *res;
+
+ tmp.ino = ino;
+ res = avl_tree_lookup_node(map->root, &tmp.index_node, _avl_cmp_ntfs_inodes);
+ if (!res)
+ return NULL;
+ return NTFS_INODE(res);
+}
+
+/* Remove an ntfs_inode from the map and free it. */
+static void
+ntfs_inode_map_remove(struct ntfs_inode_map *map, struct ntfs_inode *ni)
+{
+ avl_tree_remove(&map->root, &ni->index_node);
+ FREE(ni);
+}
+
+/* Free all ntfs_inodes in the map. */
+static void
+ntfs_inode_map_destroy(struct ntfs_inode_map *map)
+{
+ struct ntfs_inode *ni;
+
+ avl_tree_for_each_in_postorder(ni, map->root, struct ntfs_inode, index_node)
+ FREE(ni);
+}
+
+static bool
+file_has_streams(const FILE_LAYOUT_ENTRY *file)
+{
+ return (file->FirstStreamOffset != 0) &&
+ !(file->FileAttributes & FILE_ATTRIBUTE_ENCRYPTED);
+}
+
+static bool
+is_valid_name_entry(const FILE_LAYOUT_NAME_ENTRY *name)
+{
+ return name->FileNameLength > 0 &&
+ name->FileNameLength % 2 == 0 &&
+ !wmemchr(name->FileName, L'\0', name->FileNameLength / 2) &&
+ (!(name->Flags & FILE_LAYOUT_NAME_ENTRY_DOS) ||
+ name->FileNameLength <= 24);
+}
+
+/* Validate the FILE_LAYOUT_NAME_ENTRYs of the specified file and compute the
+ * total length in bytes of the ntfs_dentry structures needed to hold the name
+ * information. */
+static int
+validate_names_and_compute_total_length(const FILE_LAYOUT_ENTRY *file,
+ size_t *total_length_ret)
+{
+ const FILE_LAYOUT_NAME_ENTRY *name =
+ (const void *)file + file->FirstNameOffset;
+ size_t total = 0;
+ size_t num_long_names = 0;
+
+ for (;;) {
+ if (unlikely(!is_valid_name_entry(name))) {
+ ERROR("Invalid FILE_LAYOUT_NAME_ENTRY! "
+ "FileReferenceNumber=0x%016"PRIx64", "
+ "FileNameLength=%"PRIu32", "
+ "FileName=%.*ls, Flags=0x%08"PRIx32,
+ file->FileReferenceNumber,
+ name->FileNameLength,
+ (int)(name->FileNameLength / 2),
+ name->FileName, name->Flags);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+ if (name->Flags != FILE_LAYOUT_NAME_ENTRY_DOS) {
+ num_long_names++;
+ total += ALIGN(sizeof(struct ntfs_dentry) +
+ name->FileNameLength + sizeof(wchar_t),
+ 8);
+ }
+ if (name->NextNameOffset == 0)
+ break;
+ name = (const void *)name + name->NextNameOffset;
+ }
+
+ if (unlikely(num_long_names == 0)) {
+ ERROR("Inode 0x%016"PRIx64" has no long names!",
+ file->FileReferenceNumber);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+
+ *total_length_ret = total;
+ return 0;
+}
+
+static bool
+is_valid_stream_entry(const STREAM_LAYOUT_ENTRY *stream)
+{
+ return stream->StreamIdentifierLength % 2 == 0 &&
+ !wmemchr(stream->StreamIdentifier , L'\0',
+ stream->StreamIdentifierLength / 2);
+}
+
+/*
+ * If the specified STREAM_LAYOUT_ENTRY represents a DATA stream as opposed to
+ * some other type of NTFS stream such as a STANDARD_INFORMATION stream, return
+ * true and set *stream_name_ret and *stream_name_nchars_ret to specify just the
+ * stream name. For example, ":foo:$DATA" would become "foo" with length 3
+ * characters. Otherwise return false.
+ */
+static bool
+use_stream(const FILE_LAYOUT_ENTRY *file, const STREAM_LAYOUT_ENTRY *stream,
+ const wchar_t **stream_name_ret, size_t *stream_name_nchars_ret)
+{
+ const wchar_t *stream_name;
+ size_t stream_name_nchars;
+
+ if (stream->StreamIdentifierLength == 0) {
+ /* The unnamed data stream may be given as an empty string
+ * rather than as "::$DATA". Handle it both ways. */
+ stream_name = L"";
+ stream_name_nchars = 0;
+ } else if (!get_data_stream_name(stream->StreamIdentifier,
+ stream->StreamIdentifierLength / 2,
+ &stream_name, &stream_name_nchars))
+ return false;
+
+ /* Skip the unnamed data stream for directories. */
+ if (stream_name_nchars == 0 &&
+ (file->FileAttributes & FILE_ATTRIBUTE_DIRECTORY))
+ return false;
+
+ *stream_name_ret = stream_name;
+ *stream_name_nchars_ret = stream_name_nchars;
+ return true;
+}
+
+/* Validate the STREAM_LAYOUT_ENTRYs of the specified file and compute the total
+ * length in bytes of the ntfs_stream structures needed to hold the stream
+ * information. */
+static int
+validate_streams_and_compute_total_length(const FILE_LAYOUT_ENTRY *file,
+ size_t *total_length_ret)
+{
+ const STREAM_LAYOUT_ENTRY *stream =
+ (const void *)file + file->FirstStreamOffset;
+ size_t total = 0;
+ for (;;) {
+ const wchar_t *name;
+ size_t name_nchars;
+
+ if (unlikely(!is_valid_stream_entry(stream))) {
+ WARNING("Invalid STREAM_LAYOUT_ENTRY! "
+ "FileReferenceNumber=0x%016"PRIx64", "
+ "StreamIdentifierLength=%"PRIu32", "
+ "StreamIdentifier=%.*ls",
+ file->FileReferenceNumber,
+ stream->StreamIdentifierLength,
+ (int)(stream->StreamIdentifierLength / 2),
+ stream->StreamIdentifier);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+
+ if (use_stream(file, stream, &name, &name_nchars)) {
+ total += ALIGN(sizeof(struct ntfs_stream) +
+ (name_nchars + 1) * sizeof(wchar_t), 8);
+ }
+ if (stream->NextStreamOffset == 0)
+ break;
+ stream = (const void *)stream + stream->NextStreamOffset;
+ }
+
+ *total_length_ret = total;
+ return 0;
+}
+
+static void *
+load_name_information(const FILE_LAYOUT_ENTRY *file, struct ntfs_inode *ni,
+ void *p)
+{
+ const FILE_LAYOUT_NAME_ENTRY *name =
+ (const void *)file + file->FirstNameOffset;
+ for (;;) {
+ struct ntfs_dentry *nd = p;
+ /* Note that a name may be just a short (DOS) name, just a long
+ * name, or both a short name and a long name. If there is a
+ * short name, one name should also be marked as "primary" to
+ * indicate which long name the short name is associated with.
+ * Also, there should be at most one short name per inode. */
+ if (name->Flags & FILE_LAYOUT_NAME_ENTRY_DOS) {
+ memcpy(ni->short_name,
+ name->FileName, name->FileNameLength);
+ ni->short_name[name->FileNameLength / 2] = L'\0';
+ }
+ if (name->Flags != FILE_LAYOUT_NAME_ENTRY_DOS) {
+ ni->num_aliases++;
+ nd->offset_from_inode = (u8 *)nd - (u8 *)ni;
+ nd->is_primary = ((name->Flags &
+ FILE_LAYOUT_NAME_ENTRY_PRIMARY) != 0);
+ nd->parent_ino = name->ParentFileReferenceNumber;
+ memcpy(nd->name, name->FileName, name->FileNameLength);
+ nd->name[name->FileNameLength / 2] = L'\0';
+ p += ALIGN(sizeof(struct ntfs_dentry) +
+ name->FileNameLength + sizeof(wchar_t), 8);
+ }
+ if (name->NextNameOffset == 0)
+ break;
+ name = (const void *)name + name->NextNameOffset;
+ }
+ return p;
+}
+
+static u64
+load_starting_lcn(const STREAM_LAYOUT_ENTRY *stream)
+{
+ const STREAM_EXTENT_ENTRY *entry;
+
+ if (stream->ExtentInformationOffset == 0)
+ return 0;
+
+ entry = (const void *)stream + stream->ExtentInformationOffset;
+
+ if (!(entry->Flags & STREAM_EXTENT_ENTRY_AS_RETRIEVAL_POINTERS))
+ return 0;
+
+ return extract_starting_lcn(&entry->ExtentInformation.RetrievalPointers);
+}
+
+static void *
+load_stream_information(const FILE_LAYOUT_ENTRY *file, struct ntfs_inode *ni,
+ void *p)
+{
+ const STREAM_LAYOUT_ENTRY *stream =
+ (const void *)file + file->FirstStreamOffset;
+ const u32 first_stream_offset = (const u8 *)p - (const u8 *)ni;
+ for (;;) {
+ struct ntfs_stream *ns = p;
+ const wchar_t *name;
+ size_t name_nchars;
+
+ if (use_stream(file, stream, &name, &name_nchars)) {
+ ni->first_stream_offset = first_stream_offset;
+ ni->num_streams++;
+ if (name_nchars == 0)
+ ni->starting_lcn = load_starting_lcn(stream);
+ ns->size = stream->EndOfFile;
+ wmemcpy(ns->name, name, name_nchars);
+ ns->name[name_nchars] = L'\0';
+ p += ALIGN(sizeof(struct ntfs_stream) +
+ (name_nchars + 1) * sizeof(wchar_t), 8);
+ }
+ if (stream->NextStreamOffset == 0)
+ break;
+ stream = (const void *)stream + stream->NextStreamOffset;
+ }
+ return p;
+}
+
+/* Process the information for a file given by FSCTL_QUERY_FILE_LAYOUT. */
+static int
+load_one_file(const FILE_LAYOUT_ENTRY *file, struct ntfs_inode_map *inode_map)
+{
+ const FILE_LAYOUT_INFO_ENTRY *info =
+ (const void *)file + file->ExtraInfoOffset;
+ size_t inode_size;
+ struct ntfs_inode *ni;
+ size_t n;
+ int ret;
+ void *p;
+
+ inode_size = ALIGN(sizeof(struct ntfs_inode), 8);
+
+ /* The root file should have no names, and all other files should have
+ * at least one name. But just in case, we ignore the names of the root
+ * file, and we ignore any non-root file with no names. */
+ if (!NTFS_IS_ROOT_FILE(file->FileReferenceNumber)) {
+ if (file->FirstNameOffset == 0)
+ return 0;
+ ret = validate_names_and_compute_total_length(file, &n);
+ if (ret)
+ return ret;
+ inode_size += n;
+ }
+
+ if (file_has_streams(file)) {
+ ret = validate_streams_and_compute_total_length(file, &n);
+ if (ret)
+ return ret;
+ inode_size += n;
+ }
+
+ /* To save memory, we allocate the ntfs_dentry's and ntfs_stream's in
+ * the same memory block as their ntfs_inode. */
+ ni = CALLOC(1, inode_size);
+ if (!ni)
+ return WIMLIB_ERR_NOMEM;
+
+ ni->ino = file->FileReferenceNumber;
+ ni->attributes = info->BasicInformation.FileAttributes;
+ ni->creation_time = info->BasicInformation.CreationTime;
+ ni->last_write_time = info->BasicInformation.LastWriteTime;
+ ni->last_access_time = info->BasicInformation.LastAccessTime;
+ ni->security_id = info->SecurityId;
+
+ p = FIRST_DENTRY(ni);
+
+ if (!NTFS_IS_ROOT_FILE(file->FileReferenceNumber))
+ p = load_name_information(file, ni, p);
+
+ if (file_has_streams(file))
+ p = load_stream_information(file, ni, p);
+
+ wimlib_assert((u8 *)p - (u8 *)ni == inode_size);
+
+ ntfs_inode_map_add_inode(inode_map, ni);
+ return 0;
+}
+
+/*
+ * Quickly find all files on an NTFS volume by using FSCTL_QUERY_FILE_LAYOUT to
+ * scan the MFT. The NTFS volume is specified by the NT namespace path @path.
+ * For each file, allocate an 'ntfs_inode' structure for each file and add it to
+ * 'inode_map' keyed by inode number. Include NTFS special files such as
+ * $Bitmap (they will be removed later).
+ */
+static int
+load_files_from_mft(const wchar_t *path, struct ntfs_inode_map *inode_map)
+{
+ HANDLE h = NULL;
+ QUERY_FILE_LAYOUT_INPUT in = (QUERY_FILE_LAYOUT_INPUT) {
+ .NumberOfPairs = 0,
+ .Flags = QUERY_FILE_LAYOUT_RESTART |
+ QUERY_FILE_LAYOUT_INCLUDE_NAMES |
+ QUERY_FILE_LAYOUT_INCLUDE_STREAMS |
+ QUERY_FILE_LAYOUT_INCLUDE_EXTENTS |
+ QUERY_FILE_LAYOUT_INCLUDE_EXTRA_INFO |
+ QUERY_FILE_LAYOUT_INCLUDE_STREAMS_WITH_NO_CLUSTERS_ALLOCATED,
+ .FilterType = QUERY_FILE_LAYOUT_FILTER_TYPE_NONE,
+ };
+ const size_t outsize = 32768;
+ QUERY_FILE_LAYOUT_OUTPUT *out = NULL;
+ int ret;
+ NTSTATUS status;
+
+ status = winnt_open(path, wcslen(path),
+ FILE_READ_DATA | FILE_READ_ATTRIBUTES, &h);
+ if (!NT_SUCCESS(status)) {
+ ret = -1; /* Silently try standard recursive scan instead */
+ goto out;
+ }
+
+ out = MALLOC(outsize);
+ if (!out) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out;
+ }
+
+ while (NT_SUCCESS(status = winnt_fsctl(h, FSCTL_QUERY_FILE_LAYOUT,
+ &in, sizeof(in),
+ out, outsize, NULL)))
+ {
+ const FILE_LAYOUT_ENTRY *file =
+ (const void *)out + out->FirstFileOffset;
+ for (;;) {
+ ret = load_one_file(file, inode_map);
+ if (ret)
+ goto out;
+ if (file->NextFileOffset == 0)
+ break;
+ file = (const void *)file + file->NextFileOffset;
+ }
+ in.Flags &= ~QUERY_FILE_LAYOUT_RESTART;
+ }
+
+ /* Normally, FSCTL_QUERY_FILE_LAYOUT fails with STATUS_END_OF_FILE after
+ * all files have been enumerated. */
+ if (status != STATUS_END_OF_FILE) {
+ if (status == STATUS_INVALID_DEVICE_REQUEST /* old OS */ ||
+ status == STATUS_INVALID_PARAMETER /* not root directory */ ) {
+ /* Silently try standard recursive scan instead */
+ ret = -1;
+ } else {
+ winnt_error(status,
+ L"Error enumerating files on volume \"%ls\"",
+ path);
+ /* Try standard recursive scan instead */
+ ret = WIMLIB_ERR_UNSUPPORTED;
+ }
+ goto out;
+ }
+ ret = 0;
+out:
+ FREE(out);
+ NtClose(h);
+ return ret;
+}
+
+/* Build the list of child dentries for each inode in @map. This is done by
+ * iterating through each name of each inode and adding it to its parent's
+ * children list. Note that every name should have a parent, i.e. should belong
+ * to some directory. The root directory does not have any names. */
+static int
+build_children_lists(struct ntfs_inode_map *map, struct ntfs_inode **root_ret)
+{
+ struct ntfs_inode *ni;
+
+ avl_tree_for_each_in_order(ni, map->root, struct ntfs_inode, index_node)
+ {
+ struct ntfs_dentry *nd;
+ u32 n;
+
+ if (NTFS_IS_ROOT_FILE(ni->ino)) {
+ *root_ret = ni;
+ continue;
+ }
+
+ n = ni->num_aliases;
+ nd = FIRST_DENTRY(ni);
+ for (;;) {
+ struct ntfs_inode *parent;
+
+ parent = ntfs_inode_map_lookup(map, nd->parent_ino);
+ if (unlikely(!parent)) {
+ ERROR("Parent inode 0x%016"PRIx64" of"
+ "directory entry \"%ls\" (inode "
+ "0x%016"PRIx64") was missing from the "
+ "MFT listing!",
+ nd->parent_ino, nd->name, ni->ino);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+ nd->next_child = parent->first_child;
+ parent->first_child = nd;
+ if (!--n)
+ break;
+ nd = NEXT_DENTRY(nd);
+ }
+ }
+ return 0;
+}
+
+struct security_map_node {
+ struct avl_tree_node index_node;
+ u32 disk_security_id;
+ u32 wim_security_id;
+};
+
+/* Map from disk security IDs to WIM security IDs */
+struct security_map {
+ struct avl_tree_node *root;
+};
+
+#define SECURITY_MAP_NODE(node) \
+ avl_tree_entry((node), struct security_map_node, index_node)
+
+static int
+_avl_cmp_security_map_nodes(const struct avl_tree_node *node1,
+ const struct avl_tree_node *node2)
+{
+ return cmp_u32(SECURITY_MAP_NODE(node1)->disk_security_id,
+ SECURITY_MAP_NODE(node2)->disk_security_id);
+}
+
+static s32
+security_map_lookup(struct security_map *map, u32 disk_security_id)
+{
+ struct security_map_node tmp;
+ const struct avl_tree_node *res;
+
+ if (disk_security_id == 0) /* No on-disk security ID; uncacheable */
+ return -1;
+
+ tmp.disk_security_id = disk_security_id;
+ res = avl_tree_lookup_node(map->root, &tmp.index_node,
+ _avl_cmp_security_map_nodes);
+ if (!res)
+ return -1;
+ return SECURITY_MAP_NODE(res)->wim_security_id;
+}
+
+static int
+security_map_insert(struct security_map *map, u32 disk_security_id,
+ u32 wim_security_id)
+{
+ struct security_map_node *node;
+
+ if (disk_security_id == 0) /* No on-disk security ID; uncacheable */
+ return 0;
+
+ node = MALLOC(sizeof(*node));
+ if (!node)
+ return WIMLIB_ERR_NOMEM;
+
+ node->disk_security_id = disk_security_id;
+ node->wim_security_id = wim_security_id;
+ avl_tree_insert(&map->root, &node->index_node,
+ _avl_cmp_security_map_nodes);
+ return 0;
+}
+
+static void
+security_map_destroy(struct security_map *map)
+{
+ struct security_map_node *node;
+
+ avl_tree_for_each_in_postorder(node, map->root,
+ struct security_map_node, index_node)
+ FREE(node);
+}
+
+/*
+ * Turn our temporary NTFS structures into the final WIM structures:
+ *
+ * ntfs_inode => wim_inode
+ * ntfs_dentry => wim_dentry
+ * ntfs_stream => wim_inode_stream
+ *
+ * This also handles things such as exclusions and issuing progress messages.
+ * It's similar to winnt_build_dentry_tree_recursive(), but this is much faster
+ * because almost all information we need is already loaded in memory in the
+ * ntfs_* structures. However, in some cases we still fall back to
+ * winnt_build_dentry_tree_recursive() and/or opening the file.
+ */
+static int
+generate_wim_structures_recursive(struct wim_dentry **root_ret,
+ wchar_t *path, size_t path_nchars,
+ const wchar_t *filename, bool is_primary_name,
+ struct ntfs_inode *ni,
+ struct winnt_scan_ctx *ctx,
+ struct ntfs_inode_map *inode_map,
+ struct security_map *security_map)
+{
+ int ret = 0;
+ struct wim_dentry *root = NULL;
+ struct wim_inode *inode = NULL;
+ const struct ntfs_stream *ns;
+
+ /* Completely ignore NTFS special files. */
+ if (NTFS_IS_SPECIAL_FILE(ni->ino))
+ goto out;
+
+ /* Fall back to a recursive scan for unhandled cases. Reparse points,
+ * in particular, can't be properly handled here because a commonly used
+ * filter driver (WOF) hides reparse points from regular filesystem APIs
+ * but not from FSCTL_QUERY_FILE_LAYOUT. */
+ if (ni->attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
+ FILE_ATTRIBUTE_ENCRYPTED))
+ {
+ ret = winnt_build_dentry_tree_recursive(&root,
+ NULL,
+ path,
+ path_nchars,
+ path,
+ path_nchars,
+ filename,
+ ctx);
+ goto out;
+ }
+
+ /* Test for exclusion based on path. */
+ ret = try_exclude(path, ctx->params);
+ if (unlikely(ret < 0)) /* Excluded? */
+ goto out_progress;
+ if (unlikely(ret > 0)) /* Error? */
+ goto out;
+
+ /* Create the WIM dentry and possibly a new WIM inode */
+ ret = inode_table_new_dentry(ctx->params->inode_table, filename,
+ ni->ino, ctx->params->capture_root_dev,
+ false, &root);
+ if (ret)
+ goto out;
+
+ inode = root->d_inode;
+
+ /* Set the short name if needed. */
+ if (is_primary_name && *ni->short_name) {
+ size_t nbytes = wcslen(ni->short_name) * sizeof(wchar_t);
+ root->d_short_name = memdup(ni->short_name,
+ nbytes + sizeof(wchar_t));
+ if (!root->d_short_name) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out;
+ }
+ root->d_short_name_nbytes = nbytes;
+ }
+
+ if (inode->i_nlink > 1) { /* Already seen this inode? */
+ ret = 0;
+ goto out_progress;
+ }
+
+ /* The file attributes and timestamps were cached from the MFT. */
+ inode->i_attributes = ni->attributes;
+ inode->i_creation_time = ni->creation_time;
+ inode->i_last_write_time = ni->last_write_time;
+ inode->i_last_access_time = ni->last_access_time;
+
+ /* Set the security descriptor if needed. */
+ if (!(ctx->params->add_flags & WIMLIB_ADD_FLAG_NO_ACLS)) {
+ /* Look up the WIM security ID that corresponds to the on-disk
+ * security ID. */
+ s32 wim_security_id =
+ security_map_lookup(security_map, ni->security_id);
+ if (likely(wim_security_id >= 0)) {
+ /* The mapping for this security ID is already cached.*/
+ inode->i_security_id = wim_security_id;
+ } else {
+ HANDLE h;
+ NTSTATUS status;
+
+ /* Create a mapping for this security ID and insert it
+ * into the security map. */
+
+ status = winnt_open(path, path_nchars,
+ READ_CONTROL |
+ ACCESS_SYSTEM_SECURITY, &h);
+ if (!NT_SUCCESS(status)) {
+ winnt_error(status, L"Can't open \"%ls\" to "
+ "read security descriptor",
+ printable_path(path));
+ ret = WIMLIB_ERR_OPEN;
+ goto out;
+ }
+ ret = winnt_load_security_descriptor(h, inode, path, ctx);
+ NtClose(h);
+ if (ret)
+ goto out;
+
+ ret = security_map_insert(security_map, ni->security_id,
+ inode->i_security_id);
+ if (ret)
+ goto out;
+ }
+ }
+
+ /* Add data streams based on the cached information from the MFT. */
+ ns = FIRST_STREAM(ni);
+ for (u32 i = 0; i < ni->num_streams; i++) {
+ struct windows_file *windows_file;
+
+ /* Reference the stream by path if it's a named data stream, or
+ * if the volume doesn't support "open by file ID", or if the
+ * application hasn't explicitly opted in to "open by file ID".
+ * Otherwise, only save the inode number (file ID). */
+ if (*ns->name ||
+ !(ctx->vol_flags & FILE_SUPPORTS_OPEN_BY_FILE_ID) ||
+ !(ctx->params->add_flags & WIMLIB_ADD_FLAG_FILE_PATHS_UNNEEDED))
+ {
+ windows_file = alloc_windows_file(path,
+ path_nchars,
+ ns->name,
+ wcslen(ns->name),
+ ctx->snapshot,
+ false);
+ } else {
+ windows_file = alloc_windows_file_for_file_id(ni->ino,
+ path,
+ ctx->params->capture_root_nchars + 1,
+ ctx->snapshot);
+ }
+
+ ret = add_stream(inode, windows_file, ns->size,
+ STREAM_TYPE_DATA, ns->name,
+ ctx->params->unhashed_blobs);
+ if (ret)
+ goto out;
+ ns = NEXT_STREAM(ns);
+ }
+
+ set_sort_key(inode, ni->starting_lcn);
+
+ /* If processing a directory, then recurse to its children. In this
+ * version there is no need to go to disk, as we already have the list
+ * of children cached from the MFT. */
+ if (inode_is_directory(inode)) {
+ const struct ntfs_dentry *nd = ni->first_child;
+
+ while (nd != NULL) {
+ const size_t name_len = wcslen(nd->name);
+ wchar_t *p = path + path_nchars;
+ struct wim_dentry *child;
+ const struct ntfs_dentry *next = nd->next_child;
+
+ if (*(p - 1) != L'\\')
+ *p++ = L'\\';
+ p = wmempcpy(p, nd->name, name_len);
+ *p = '\0';
+
+ ret = generate_wim_structures_recursive(
+ &child,
+ path,
+ p - path,
+ nd->name,
+ nd->is_primary,
+ (void *)nd - nd->offset_from_inode,
+ ctx,
+ inode_map,
+ security_map);
+
+ path[path_nchars] = L'\0';
+
+ if (ret)
+ goto out;
+
+ attach_scanned_tree(root, child, ctx->params->blob_table);
+ nd = next;
+ }
+ }
+
+out_progress:
+ ctx->params->progress.scan.cur_path = path;
+ if (likely(root))
+ ret = do_capture_progress(ctx->params, WIMLIB_SCAN_DENTRY_OK, inode);
+ else
+ ret = do_capture_progress(ctx->params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
+out:
+ if (--ni->num_aliases == 0) {
+ /* Memory usage optimization: when we don't need the ntfs_inode
+ * (and its names and streams) anymore, free it. */
+ ntfs_inode_map_remove(inode_map, ni);
+ }
+ if (unlikely(ret)) {
+ free_dentry_tree(root, ctx->params->blob_table);
+ root = NULL;
+ }
+ *root_ret = root;
+ return ret;
+}
+
+static int
+winnt_build_dentry_tree_fast(struct wim_dentry **root_ret, wchar_t *path,
+ size_t path_nchars, struct winnt_scan_ctx *ctx)
+{
+ struct ntfs_inode_map inode_map = { .root = NULL };
+ struct security_map security_map = { .root = NULL };
+ struct ntfs_inode *root = NULL;
+ bool adjust_path;
+ int ret;
+
+ adjust_path = (path[path_nchars - 1] == L'\\');
+ if (adjust_path)
+ path[path_nchars - 1] = L'\0';
+
+ ret = load_files_from_mft(path, &inode_map);
+
+ if (adjust_path)
+ path[path_nchars - 1] = L'\\';
+
+ if (ret)
+ goto out;
+
+ ret = build_children_lists(&inode_map, &root);
+ if (ret)
+ goto out;
+
+ if (!root) {
+ ERROR("The MFT listing for volume \"%ls\" did not include a "
+ "root directory!", path);
+ ret = WIMLIB_ERR_UNSUPPORTED;
+ goto out;
+ }
+
+ root->num_aliases = 1;
+
+ ret = generate_wim_structures_recursive(root_ret, path, path_nchars,
+ L"", false, root, ctx,
+ &inode_map, &security_map);
+out:
+ ntfs_inode_map_destroy(&inode_map);
+ security_map_destroy(&security_map);
+ return ret;
+}
+
+#endif /* ENABLE_FAST_MFT_SCAN */
+
+/*----------------------------------------------------------------------------*
+ * Entry point for directory tree scans on Windows *
+ *----------------------------------------------------------------------------*/
+