Pay attention to PrepopulateList during WIMBoot application
[wimlib] / src / textfile.c
1 /*
2  * textfile.c
3  */
4
5 /*
6  * Copyright (C) 2014 Eric Biggers
7  *
8  * This file is part of wimlib, a library for working with WIM files.
9  *
10  * wimlib is free software; you can redistribute it and/or modify it under the
11  * terms of the GNU General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option)
13  * any later version.
14  *
15  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17  * A PARTICULAR PURPOSE. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with wimlib; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include "wimlib/assert.h"
29 #include "wimlib/encoding.h"
30 #include "wimlib/error.h"
31 #include "wimlib/file_io.h"
32 #include "wimlib/textfile.h"
33 #include "wimlib/util.h"
34
35 #include <ctype.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <sys/stat.h>
40 #include <unistd.h>
41
42 static int
43 read_file_contents(const tchar *path, void **buf_ret, size_t *bufsize_ret)
44 {
45         int raw_fd;
46         struct filedes fd;
47         struct stat st;
48         void *buf;
49         int ret;
50         int errno_save;
51
52         if (!path || !*path)
53                 return WIMLIB_ERR_INVALID_PARAM;
54
55         raw_fd = topen(path, O_RDONLY | O_BINARY);
56         if (raw_fd < 0) {
57                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
58                 return WIMLIB_ERR_OPEN;
59         }
60         if (fstat(raw_fd, &st)) {
61                 ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
62                 close(raw_fd);
63                 return WIMLIB_ERR_STAT;
64         }
65         if ((size_t)st.st_size != st.st_size ||
66             (buf = MALLOC(st.st_size)) == NULL)
67         {
68                 close(raw_fd);
69                 ERROR("Not enough memory to read \"%"TS"\"", path);
70                 return WIMLIB_ERR_NOMEM;
71         }
72
73         filedes_init(&fd, raw_fd);
74         ret = full_read(&fd, buf, st.st_size);
75         errno_save = errno;
76         filedes_close(&fd);
77         errno = errno_save;
78         if (ret) {
79                 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
80                 FREE(buf);
81                 return ret;
82         }
83
84         *buf_ret = buf;
85         *bufsize_ret = st.st_size;
86         return 0;
87 }
88
89 static int
90 translate_text_buffer(const u8 *buf_raw, size_t bufsize_raw,
91                       tchar **tstr_ret, size_t *tstr_nchars_ret)
92 {
93         size_t offset_raw;
94         bool utf8;
95         tchar *buf_tstr;
96         size_t bufsize_tstr;
97         int ret;
98
99         /* Guess the encoding: UTF-8 or UTF-16LE.  (Something weirder and you're
100          * out of luck, sorry...)  */
101         if (bufsize_raw >= 2 &&
102             buf_raw[0] == 0xFF &&
103             buf_raw[1] == 0xFE)
104         {
105                 utf8 = false;
106                 offset_raw = 2;
107         }
108         else if (bufsize_raw >= 2 &&
109                  buf_raw[0] <= 0x7F &&
110                  buf_raw[1] == 0x00)
111         {
112                 utf8 = false;
113                 offset_raw = 0;
114         }
115         else if (bufsize_raw >= 3 &&
116                  buf_raw[0] == 0xEF &&
117                  buf_raw[1] == 0xBB &&
118                  buf_raw[2] == 0xBF)
119         {
120                 utf8 = true;
121                 offset_raw = 3;
122         }
123         else
124         {
125                 utf8 = true;
126                 offset_raw = 0;
127         }
128
129         if (utf8) {
130                 ret = utf8_to_tstr((const char *)(buf_raw + offset_raw),
131                                    bufsize_raw - offset_raw,
132                                    &buf_tstr, &bufsize_tstr);
133         } else {
134         #if TCHAR_IS_UTF16LE
135                 bufsize_tstr = bufsize_raw - offset_raw;
136                 buf_tstr = MALLOC(bufsize_tstr + 2);
137                 if (buf_tstr) {
138                         memcpy(buf_tstr, buf_raw + offset_raw, bufsize_tstr);
139                         ((u8*)buf_tstr)[bufsize_tstr + 0] = 0;
140                         ((u8*)buf_tstr)[bufsize_tstr + 1] = 0;
141                         ret = 0;
142                 } else {
143                         ret = WIMLIB_ERR_NOMEM;
144                 }
145         #else
146                 ret = utf16le_to_tstr((const utf16lechar *)(buf_raw + offset_raw),
147                                       bufsize_raw - offset_raw,
148                                       &buf_tstr, &bufsize_tstr);
149         #endif
150         }
151         if (ret)
152                 return ret;
153
154         *tstr_ret = buf_tstr;
155         *tstr_nchars_ret = bufsize_tstr / sizeof(tchar);
156         return 0;
157 }
158
159 static int
160 string_set_append(struct string_set *set, tchar *str)
161 {
162         size_t num_alloc_strings = set->num_alloc_strings;
163
164         if (set->num_strings == num_alloc_strings) {
165                 tchar **new_strings;
166
167                 num_alloc_strings = max(num_alloc_strings * 3 / 2,
168                                         num_alloc_strings + 4);
169                 new_strings = REALLOC(set->strings,
170                                       sizeof(set->strings[0]) * num_alloc_strings);
171                 if (!new_strings)
172                         return WIMLIB_ERR_NOMEM;
173                 set->strings = new_strings;
174                 set->num_alloc_strings = num_alloc_strings;
175         }
176         set->strings[set->num_strings++] = str;
177         return 0;
178 }
179
180 #define NOT_IN_SECTION          -1
181 #define IN_UNKNOWN_SECTION      -2
182
183 static int
184 parse_text_file(const tchar *path, tchar *buf, size_t buflen,
185                 const struct text_file_section *pos_sections,
186                 int num_pos_sections, int flags, line_mangle_t mangle_line)
187 {
188         int current_section = NOT_IN_SECTION;
189         bool have_named_sections = false;
190         tchar *p;
191         tchar *nl;
192         unsigned long line_no = 1;
193
194         for (int i = 0; i < num_pos_sections; i++) {
195                 if (*pos_sections[i].name)
196                         have_named_sections = true;
197                 else
198                         current_section = i;
199         }
200
201         for (p = buf; p != buf + buflen; p = nl + 1, line_no++) {
202                 tchar *line_begin, *line_end;
203                 size_t line_len;
204                 int ret;
205
206                 nl = tmemchr(p, T('\n'), buf + buflen - p);
207                 if (!nl)
208                         break;
209
210                 line_begin = p;
211                 line_end = nl;
212
213                 /* Ignore leading whitespace.  */
214                 while (line_begin < nl && istspace(*line_begin))
215                         line_begin++;
216
217                 /* Ignore trailing whitespace.  */
218                 while (line_end > line_begin && istspace(*(line_end - 1)))
219                         line_end--;
220
221                 line_len = line_end - line_begin;
222
223                 /* Ignore comments and empty lines.  */
224                 if (line_len == 0 || *line_begin == T(';') || *line_begin == T('#'))
225                         continue;
226
227                 line_begin[line_len] = T('\0');
228
229                 /* Check for beginning of new section.  */
230                 if (line_begin[0] == T('[') &&
231                     line_begin[line_len - 1] == T(']') &&
232                     have_named_sections)
233                 {
234                         line_begin[line_len - 1] = T('\0');
235                         current_section = IN_UNKNOWN_SECTION;
236                         for (int i = 0; i < num_pos_sections; i++) {
237                                 if (!tstrcmp(line_begin + 1,
238                                              pos_sections[i].name))
239                                 {
240                                         current_section = i;
241                                         break;
242                                 }
243                         }
244                         line_begin[line_len - 1] = T(']');
245                         if (current_section < 0) {
246                                 if (!(flags & LOAD_TEXT_FILE_NO_WARNINGS)) {
247                                         WARNING("%"TS":%lu: Unrecognized section \"%"TS"\"",
248                                                 path, line_no, line_begin);
249                                 }
250                         }
251                         continue;
252                 }
253
254                 if (current_section < 0) {
255                         if (current_section == NOT_IN_SECTION) {
256                                 if (!(flags & LOAD_TEXT_FILE_NO_WARNINGS)) {
257                                         WARNING("%"TS":%lu: Not in a bracketed section!",
258                                                 path, line_no);
259                                 }
260                         }
261                         continue;
262                 }
263
264                 if (flags & LOAD_TEXT_FILE_REMOVE_QUOTES) {
265                         if (line_begin[0] == T('"') || line_begin[0] == T('\'')) {
266                                 tchar quote = line_begin[0];
267                                 if (line_len >= 2 &&
268                                     line_begin[line_len - 1] == quote)
269                                 {
270                                         line_begin++;
271                                         line_len -= 2;
272                                         line_begin[line_len] = T('\0');
273                                 }
274                         }
275                 }
276
277                 if (mangle_line) {
278                         ret = (*mangle_line)(line_begin, path, line_no);
279                         if (ret)
280                                 return ret;
281                 }
282
283                 ret = string_set_append(pos_sections[current_section].strings,
284                                         line_begin);
285                 if (ret)
286                         return ret;
287         }
288         return 0;
289 }
290
291 /**
292  * do_load_text_file -
293  *
294  * Read and parse lines from a text file from an on-disk file or a buffer.
295  * The file may contain sections, like in an INI file.
296  *
297  * @path
298  *      Path to the file on disk to read, or a dummy name for the buffer.
299  * @buf
300  *      If NULL, the data will be read from the @path file.  Otherwise the data
301  *      will be read from this buffer, which must be newline-terminated.
302  * @buflen
303  *      Length of buffer in bytes; ignored if @buf is NULL.
304  * @buf_ret
305  *      On success, a pointer to a buffer backing the parsed lines is stored
306  *      here.  If @buf is not NULL, this will be @buf.  Otherwise, this will be
307  *      an allocated buffer that must be freed when finished with the lines.
308  * @pos_sections
309  *      Specifications of allowed sections in the file.  Each such specification
310  *      consists of the name of the section (e.g. [ExclusionList], like in the
311  *      INI file format), along with a pointer to the list of lines parsed for
312  *      that section.  Use an empty name to indicate the destination of lines
313  *      not in any section.
314  * @num_pos_sections
315  *      Length of @pos_sections array.
316  * @flags
317  *      LOAD_TEXT_FILE_REMOVE_QUOTES or 0.
318  * @mangle_line
319  *      Optional callback to modify each line being read.
320  *
321  * Returns 0 on success or a positive error code on failure.
322  *
323  * Unknown sections are ignored (warning printed).
324  */
325 int
326 do_load_text_file(const tchar *path,
327                   const void *buf, size_t bufsize,
328                   void **mem_ret,
329                   const struct text_file_section *pos_sections,
330                   int num_pos_sections,
331                   int flags,
332                   line_mangle_t mangle_line)
333 {
334         int ret;
335         bool pathmode = (buf == NULL);
336         tchar *tstr;
337         size_t tstr_nchars;
338
339         if (pathmode) {
340                 ret = read_file_contents(path, (void **)&buf, &bufsize);
341                 if (ret)
342                         return ret;
343         }
344
345         ret = translate_text_buffer(buf, bufsize, &tstr, &tstr_nchars);
346         if (pathmode)
347                 FREE((void *)buf);
348         if (ret)
349                 return ret;
350
351         tstr[tstr_nchars++] = T('\n');
352
353         ret = parse_text_file(path, tstr, tstr_nchars, pos_sections,
354                               num_pos_sections, flags, mangle_line);
355         if (ret) {
356                 for (int i = 0; i < num_pos_sections; i++)
357                         FREE(pos_sections[i].strings->strings);
358                 FREE(tstr);
359                 return ret;
360         }
361
362         *mem_ret = tstr;
363         return 0;
364 }