read_utf8_file_contents(): Move check for BOM here
[wimlib] / src / pathlist.c
1 /*
2  * pathlist.c
3  *
4  * Utility function for reading path list files.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include "wimlib/encoding.h"
31 #include "wimlib/error.h"
32 #include "wimlib/file_io.h"
33 #include "wimlib/pathlist.h"
34 #include "wimlib/util.h"
35
36 #include <ctype.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <string.h>
40 #include <sys/stat.h>
41 #include <unistd.h>
42
43 static int
44 read_file_contents(const tchar *path, char **buf_ret, size_t *bufsize_ret)
45 {
46         int raw_fd;
47         struct filedes fd;
48         struct stat st;
49         void *buf;
50         int ret;
51         int errno_save;
52
53         raw_fd = topen(path, O_RDONLY | O_BINARY);
54         if (raw_fd < 0) {
55                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
56                 return WIMLIB_ERR_OPEN;
57         }
58         if (fstat(raw_fd, &st)) {
59                 ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
60                 close(raw_fd);
61                 return WIMLIB_ERR_STAT;
62         }
63         if ((size_t)st.st_size != st.st_size ||
64             (buf = MALLOC(st.st_size)) == NULL)
65         {
66                 close(raw_fd);
67                 ERROR("Not enough memory to read \"%"TS"\"", path);
68                 return WIMLIB_ERR_NOMEM;
69         }
70
71         filedes_init(&fd, raw_fd);
72         ret = full_read(&fd, buf, st.st_size);
73         errno_save = errno;
74         filedes_close(&fd);
75         errno = errno_save;
76         if (ret) {
77                 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
78                 FREE(buf);
79                 return ret;
80         }
81
82         *buf_ret = buf;
83         *bufsize_ret = st.st_size;
84         return 0;
85 }
86
87 static int
88 read_utf8_file_contents(const tchar *path, tchar **buf_ret, size_t *buflen_ret)
89 {
90         int ret;
91         char *buf_utf8;
92         size_t bufsize_utf8;
93         size_t offset_utf8;
94         tchar *buf_tstr;
95         size_t bufsize_tstr;
96
97         ret = read_file_contents(path, &buf_utf8, &bufsize_utf8);
98         if (ret)
99                 return ret;
100
101         /* Ignore UTF-8 BOM.  */
102         if (bufsize_utf8 >= 3 && (u8)buf_utf8[0] == 0xef &&
103             (u8)buf_utf8[1] == 0xbb && (u8)buf_utf8[2] == 0xbf)
104                 offset_utf8 = 3;
105         else
106                 offset_utf8 = 0;
107
108         ret = utf8_to_tstr(buf_utf8 + offset_utf8, bufsize_utf8 - offset_utf8,
109                            &buf_tstr, &bufsize_tstr);
110         FREE(buf_utf8);
111         if (ret)
112                 return ret;
113
114         *buf_ret = buf_tstr;
115         *buflen_ret = bufsize_tstr / sizeof(tchar);
116         return 0;
117 }
118
119 static int
120 parse_path_list_file(tchar *buf, size_t buflen,
121                      tchar ***paths_ret, size_t *num_paths_ret)
122 {
123         tchar **paths = NULL;
124         size_t num_paths = 0;
125         size_t num_alloc_paths = 0;
126         tchar *nl;
127         tchar *p;
128
129         for (p = buf; p != buf + buflen; p = nl + 1) {
130                 tchar *line_begin, *line_end;
131                 size_t line_len;
132
133                 nl = tmemchr(p, T('\n'), buf + buflen - p);
134                 if (nl == NULL)
135                         break;
136
137                 line_begin = p;
138                 line_end = nl;
139
140                 /* Ignore leading whitespace.  */
141                 while (line_begin < nl && istspace(*line_begin))
142                         line_begin++;
143
144                 /* Ignore trailing whitespace.  */
145                 while (line_end > line_begin && istspace(*(line_end - 1)))
146                         line_end--;
147
148                 line_len = line_end - line_begin;
149
150                 /* Ignore comments and empty lines.  */
151                 if (line_len == 0 || *line_begin == T(';'))
152                         continue;
153
154                 if (num_paths == num_alloc_paths) {
155                         tchar **new_paths;
156                         size_t new_num_alloc_paths = max(num_alloc_paths + 8,
157                                                          num_alloc_paths * 3 / 2);
158
159                         new_paths = REALLOC(paths, new_num_alloc_paths *
160                                                    sizeof(paths[0]));
161                         if (new_paths == NULL)
162                                 goto oom;
163                         paths = new_paths;
164                         num_alloc_paths = new_num_alloc_paths;
165                 }
166
167                 *line_end = T('\0');
168                 paths[num_paths++] = line_begin;
169         }
170
171         *paths_ret = paths;
172         *num_paths_ret = num_paths;
173         return 0;
174
175 oom:
176         FREE(paths);
177         return WIMLIB_ERR_NOMEM;
178 }
179
180 int
181 read_path_list_file(const tchar *listfile,
182                     tchar ***paths_ret, size_t *num_paths_ret,
183                     void **mem_ret)
184 {
185         int ret;
186         tchar *buf;
187         size_t buflen;
188
189         ret = read_utf8_file_contents(listfile, &buf, &buflen);
190         if (ret)
191                 return ret;
192
193         buf[buflen++] = T('\n');
194
195         ret = parse_path_list_file(buf, buflen, paths_ret, num_paths_ret);
196         if (ret) {
197                 FREE(buf);
198                 return ret;
199         }
200         *mem_ret = buf;
201         return 0;
202 }