a611bba82884d2fbd39ce40e47ce0ac624a45c10
[wimlib] / src / pathlist.c
1 /*
2  * pathlist.c
3  *
4  * Utility function for reading path list files.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include "wimlib/encoding.h"
31 #include "wimlib/error.h"
32 #include "wimlib/file_io.h"
33 #include "wimlib/pathlist.h"
34 #include "wimlib/util.h"
35
36 #include <ctype.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <string.h>
40 #include <sys/stat.h>
41 #include <unistd.h>
42
43 static int
44 read_file_contents(const tchar *path, char **buf_ret, size_t *bufsize_ret)
45 {
46         int raw_fd;
47         struct filedes fd;
48         struct stat st;
49         void *buf;
50         int ret;
51         int errno_save;
52
53         raw_fd = topen(path, O_RDONLY | O_BINARY);
54         if (raw_fd < 0) {
55                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
56                 return WIMLIB_ERR_OPEN;
57         }
58         if (fstat(raw_fd, &st)) {
59                 ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
60                 close(raw_fd);
61                 return WIMLIB_ERR_STAT;
62         }
63         if ((size_t)st.st_size != st.st_size ||
64             (buf = MALLOC(st.st_size)) == NULL)
65         {
66                 close(raw_fd);
67                 ERROR("Not enough memory to read \"%"TS"\"", path);
68                 return WIMLIB_ERR_NOMEM;
69         }
70
71         filedes_init(&fd, raw_fd);
72         ret = full_read(&fd, buf, st.st_size);
73         errno_save = errno;
74         filedes_close(&fd);
75         errno = errno_save;
76         if (ret) {
77                 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
78                 FREE(buf);
79                 return ret;
80         }
81
82         *buf_ret = buf;
83         *bufsize_ret = st.st_size;
84         return 0;
85 }
86
87 static int
88 read_utf8_file_contents(const tchar *path, tchar **buf_ret, size_t *buflen_ret)
89 {
90         int ret;
91         char *buf_utf8;
92         size_t bufsize_utf8;
93         tchar *buf_tstr;
94         size_t bufsize_tstr;
95
96         ret = read_file_contents(path, &buf_utf8, &bufsize_utf8);
97         if (ret)
98                 return ret;
99
100         ret = utf8_to_tstr(buf_utf8, bufsize_utf8, &buf_tstr, &bufsize_tstr);
101         FREE(buf_utf8);
102         if (ret)
103                 return ret;
104
105         *buf_ret = buf_tstr;
106         *buflen_ret = bufsize_tstr / sizeof(tchar);
107         return 0;
108 }
109
110 static int
111 parse_path_list_file(tchar *buf, size_t buflen,
112                      tchar ***paths_ret, size_t *num_paths_ret)
113 {
114         tchar **paths = NULL;
115         size_t num_paths = 0;
116         size_t num_alloc_paths = 0;
117         tchar *nl;
118         tchar *p;
119
120         for (p = buf; p != buf + buflen; p = nl + 1) {
121                 tchar *line_begin, *line_end;
122                 size_t line_len;
123
124                 nl = tmemchr(p, T('\n'), buf + buflen - p);
125                 if (nl == NULL)
126                         break;
127
128                 line_begin = p;
129                 line_end = nl;
130
131                 /* Ignore UTF-8 BOM.  */
132                 if (nl - line_begin >= 3 && (u8)line_begin[0] == 0xef &&
133                     (u8)line_begin[1] == 0xbb && (u8)line_begin[2] == 0xbf)
134                         line_begin += 3;
135
136                 /* Ignore leading whitespace.  */
137                 while (line_begin < nl && istspace(*line_begin))
138                         line_begin++;
139
140                 /* Ignore trailing whitespace.  */
141                 while (line_end > line_begin && istspace(*(line_end - 1)))
142                         line_end--;
143
144                 line_len = line_end - line_begin;
145
146                 /* Ignore comments and empty lines.  */
147                 if (line_len == 0 || *line_begin == T(';'))
148                         continue;
149
150                 if (num_paths == num_alloc_paths) {
151                         tchar **new_paths;
152                         size_t new_num_alloc_paths = max(num_alloc_paths + 8,
153                                                          num_alloc_paths * 3 / 2);
154
155                         new_paths = REALLOC(paths, new_num_alloc_paths *
156                                                    sizeof(paths[0]));
157                         if (new_paths == NULL)
158                                 goto oom;
159                         paths = new_paths;
160                         num_alloc_paths = new_num_alloc_paths;
161                 }
162
163                 *line_end = T('\0');
164                 paths[num_paths++] = line_begin;
165         }
166
167         *paths_ret = paths;
168         *num_paths_ret = num_paths;
169         return 0;
170
171 oom:
172         FREE(paths);
173         return WIMLIB_ERR_NOMEM;
174 }
175
176 int
177 read_path_list_file(const tchar *listfile,
178                     tchar ***paths_ret, size_t *num_paths_ret,
179                     void **mem_ret)
180 {
181         int ret;
182         tchar *buf;
183         size_t buflen;
184
185         ret = read_utf8_file_contents(listfile, &buf, &buflen);
186         if (ret)
187                 return ret;
188
189         buf[buflen++] = T('\n');
190
191         ret = parse_path_list_file(buf, buflen, paths_ret, num_paths_ret);
192         if (ret) {
193                 FREE(buf);
194                 return ret;
195         }
196         *mem_ret = buf;
197         return 0;
198 }