OILS / pyext / libc.c View on Github | oils.pub

441 lines, 257 significant
1/*
2 * Python interface to libc functions.
3 */
4
5// - Enable GNU extensions in fnmatch.h for extended glob.
6// - It's also apparently needed for wchar.h in combination with Python.
7// https://github.com/python-pillow/Pillow/issues/1850
8// - It's currently hard-coded in pyconfig.h.
9#define _GNU_SOURCE 1
10
11#include <stdarg.h> // va_list, etc.
12#include <stdio.h> // printf
13#include <limits.h>
14#include <wchar.h>
15#include <stdlib.h>
16#include <sys/ioctl.h>
17#include <locale.h>
18#include <fnmatch.h>
19#include <glob.h>
20#include <regex.h>
21
22#include <Python.h>
23
24#include "_build/detected-config.h"
25
26// Log messages to stderr.
27static void debug(const char* fmt, ...) {
28#ifdef LIBC_VERBOSE
29 va_list args;
30 va_start(args, fmt);
31 vfprintf(stderr, fmt, args);
32 va_end(args);
33 fprintf(stderr, "\n");
34#endif
35}
36
37static PyObject *
38func_realpath(PyObject *self, PyObject *args) {
39 const char *symlink;
40
41 if (!PyArg_ParseTuple(args, "s", &symlink)) {
42 return NULL;
43 }
44 char target[PATH_MAX + 1];
45 char *status = realpath(symlink, target);
46
47 // TODO: Throw exception like IOError here
48 if (status == NULL) {
49 debug("error from realpath()");
50 Py_RETURN_NONE;
51 }
52
53 return PyString_FromString(target);
54}
55
56static PyObject *
57func_fnmatch(PyObject *self, PyObject *args) {
58 const char *pattern;
59 int pattern_len;
60 const char *str;
61 int str_len;
62 int flags = 0;
63
64 if (!PyArg_ParseTuple(args, "s#s#|i", &pattern, &pattern_len, &str, &str_len, &flags)) {
65 return NULL;
66 }
67
68#ifdef FNM_EXTMATCH
69 flags |= FNM_EXTMATCH;
70#endif
71
72 int ret = fnmatch(pattern, str, flags);
73
74 switch (ret) {
75 case 0:
76 debug("matched: %s", str);
77 return PyLong_FromLong(1);
78 break;
79 case FNM_NOMATCH:
80 debug("no match: %s", str);
81 return PyLong_FromLong(0);
82 break;
83 default:
84 debug("other error: %s", str);
85 return PyLong_FromLong(-1);
86 break;
87 }
88}
89
90// error callback to glob()
91//
92// Disabled because of spurious errors. For example, sed -i s/.*// (without
93// quotes) is OK, but it would be treated as a glob, and prints an error if the
94// directory 's' doesn't exist.
95//
96// Bash does its own globbing -- it doesn't use libc. Likewise, I think dash
97// and mksh do their own globbing.
98
99int globerr(const char *path, int errno_) {
100 fprintf(stderr, "globerr: %s: %s\n", path, strerror(errno_));
101 return 0; // let glob() keep going
102}
103
104static PyObject *
105func_glob(PyObject *self, PyObject *args) {
106 const char* pattern;
107 int flags = 0;
108 if (!PyArg_ParseTuple(args, "s|i", &pattern, &flags)) {
109 return NULL;
110 }
111
112 glob_t results;
113 // Hm, it's weird that the first one can't be called with GLOB_APPEND. You
114 // get a segfault.
115 // int flags = GLOB_APPEND;
116 //flags |= GLOB_NOMAGIC;
117 int ret = glob(pattern, flags, NULL, &results);
118
119 const char *err_str = NULL;
120 switch (ret) {
121 case 0: // no error
122 break;
123 case GLOB_ABORTED:
124 err_str = "read error";
125 break;
126 case GLOB_NOMATCH:
127 // No error, because not matching isn't necessarily a problem.
128 // NOTE: This can be turned on to log overaggressive calls to glob().
129 //err_str = "nothing matched";
130 break;
131 case GLOB_NOSPACE:
132 err_str = "no dynamic memory";
133 break;
134 default:
135 err_str = "unknown problem";
136 break;
137 }
138 if (err_str) {
139 //fprintf(stderr, "func_glob: %s: %s\n", pattern, err_str);
140 PyErr_SetString(PyExc_RuntimeError, err_str);
141 return NULL;
142 }
143
144 // http://stackoverflow.com/questions/3512414/does-this-pylist-appendlist-py-buildvalue-leak
145 size_t n = results.gl_pathc;
146 PyObject* matches = PyList_New(n);
147
148 // Print array of results
149 size_t i;
150 for (i = 0; i < n; i++) {
151 //printf("%s\n", results.gl_pathv[i]);
152 PyObject* m = Py_BuildValue("s", results.gl_pathv[i]);
153 PyList_SetItem(matches, i, m);
154 }
155 globfree(&results);
156
157 return matches;
158}
159
160static PyObject *
161func_regex_search(PyObject *self, PyObject *args) {
162 const char* pattern;
163 const char* str;
164 int cflags = 0;
165 int eflags = 0;
166 int pos = 0;
167
168 if (!PyArg_ParseTuple(args, "sisi|i", &pattern, &cflags, &str, &eflags, &pos)) {
169 return NULL;
170 }
171
172 cflags |= REG_EXTENDED;
173 regex_t pat;
174 int status = regcomp(&pat, pattern, cflags);
175 if (status != 0) {
176 char error_desc[50];
177 regerror(status, &pat, error_desc, 50);
178
179 char error_message[80];
180 snprintf(error_message, 80, "Invalid regex %s (%s)", pattern, error_desc);
181
182 PyErr_SetString(PyExc_ValueError, error_message);
183 return NULL;
184 }
185
186 int num_groups = pat.re_nsub + 1;
187 PyObject *ret = PyList_New(num_groups * 2);
188
189 if (ret == NULL) {
190 regfree(&pat);
191 return NULL;
192 }
193
194 regmatch_t *pmatch = (regmatch_t*) malloc(sizeof(regmatch_t) * num_groups);
195 int match = regexec(&pat, str + pos, num_groups, pmatch, eflags);
196 if (match == 0) {
197 int i;
198 for (i = 0; i < num_groups; i++) {
199 int start = pmatch[i].rm_so;
200 if (start != -1) {
201 start += pos;
202 }
203 PyList_SetItem(ret, 2*i, PyInt_FromLong(start));
204
205 int end = pmatch[i].rm_eo;
206 if (end != -1) {
207 end += pos;
208 }
209 PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end));
210 }
211 }
212
213 free(pmatch);
214 regfree(&pat);
215
216 if (match != 0) {
217 Py_RETURN_NONE;
218 }
219
220 return ret;
221}
222
223// For ${//}, the number of groups is always 1, so we want 2 match position
224// results -- the whole regex (which we ignore), and then first group.
225//
226// For [[ =~ ]], do we need to count how many matches the user gave?
227
228#define NMATCH 2
229
230static PyObject *
231func_regex_first_group_match(PyObject *self, PyObject *args) {
232 const char* pattern;
233 const char* str;
234 int pos;
235 if (!PyArg_ParseTuple(args, "ssi", &pattern, &str, &pos)) {
236 return NULL;
237 }
238
239 regex_t pat;
240 regmatch_t m[NMATCH];
241
242 // Could have been checked by regex_parse for [[ =~ ]], but not for glob
243 // patterns like ${foo/x*/y}.
244
245 int status = regcomp(&pat, pattern, REG_EXTENDED);
246 if (status != 0) {
247 char error_string[80];
248 regerror(status, &pat, error_string, 80);
249 PyErr_SetString(PyExc_RuntimeError, error_string);
250 return NULL;
251 }
252
253 debug("first_group_match pat %s str %s pos %d", pattern, str, pos);
254
255 // Match at offset 'pos'
256 int result = regexec(&pat, str + pos, NMATCH, m, 0 /*flags*/);
257 regfree(&pat);
258
259 if (result != 0) {
260 Py_RETURN_NONE; // no match
261 }
262
263 // Assume there is a match
264 regoff_t start = m[1].rm_so;
265 regoff_t end = m[1].rm_eo;
266 return Py_BuildValue("(i,i)", pos + start, pos + end);
267}
268
269// We do this in C so we can remove '%f' % 0.1 from the CPython build. That
270// involves dtoa.c and pystrod.c, which are thousands of lines of code.
271static PyObject *
272func_print_time(PyObject *self, PyObject *args) {
273 double real, user, sys;
274 if (!PyArg_ParseTuple(args, "ddd", &real, &user, &sys)) {
275 return NULL;
276 }
277 fprintf(stderr, "real\t%.3f\n", real);
278 fprintf(stderr, "user\t%.3f\n", user);
279 fprintf(stderr, "sys\t%.3f\n", sys);
280 Py_RETURN_NONE;
281}
282
283// A copy of socket.gethostname() from socketmodule.c. That module brings in
284// too many dependencies.
285
286static PyObject *errno_error;
287
288static PyObject *
289socket_gethostname(PyObject *self, PyObject *unused)
290{
291 char buf[1024];
292 int res;
293 Py_BEGIN_ALLOW_THREADS
294 res = gethostname(buf, (int) sizeof buf - 1);
295 //res = gethostname(buf, 0); // For testing errors
296 Py_END_ALLOW_THREADS
297 if (res < 0)
298 return PyErr_SetFromErrno(errno_error);
299 buf[sizeof buf - 1] = '\0';
300 return PyString_FromString(buf);
301}
302
303static PyObject *
304func_get_terminal_width(PyObject *self, PyObject *unused) {
305 struct winsize w;
306 int res;
307 res = ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
308 if (res < 0)
309 return PyErr_SetFromErrno(errno_error);
310 return PyLong_FromLong(w.ws_col);
311}
312
313static PyObject *
314func_wcswidth(PyObject *self, PyObject *args){
315 char *string;
316 if (!PyArg_ParseTuple(args, "s", &string)) {
317 return NULL;
318 }
319
320 int num_wide_chars = mbstowcs(NULL, string, 0);
321 if (num_wide_chars == -1) {
322 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 1");
323 return NULL;
324 }
325 int buf_size = (num_wide_chars + 1) * sizeof(wchar_t);
326 wchar_t* wide_chars = (wchar_t*)malloc(buf_size);
327 assert(wide_chars != NULL);
328
329 num_wide_chars = mbstowcs(wide_chars, string, num_wide_chars);
330 if (num_wide_chars == -1) {
331 PyErr_SetString(PyExc_UnicodeError, "mbstowcs() 2");
332 return NULL;
333 }
334
335 int width = wcswidth(wide_chars, num_wide_chars);
336 if (width == -1) {
337 PyErr_SetString(PyExc_UnicodeError, "wcswidth()");
338 return NULL;
339 }
340
341 return PyInt_FromLong(width);
342}
343
344static PyObject *
345func_cpython_reset_locale(PyObject *self, PyObject *unused)
346{
347 // From man setlocale:
348 // The locale "C" or "POSIX" is a portable locale; it exists on all conforming systems.
349 // On startup of the main program, the portable "C" locale is selected as default.
350
351 // Python overrides this, so we set it back.
352 if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
353 // Our CI machines don't work with C.UTF-8, even though it's supposed
354 // to exist?
355 if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) {
356 PyErr_SetString(PyExc_SystemError, "Couldn't set locale to C.UTF-8 or en_US.UTF-8");
357 return NULL;
358 }
359 }
360 Py_RETURN_NONE;
361}
362
363static PyObject *
364func_sleep_until_error(PyObject *self, PyObject *args) {
365 double seconds;
366 if (!PyArg_ParseTuple(args, "d", &seconds)) {
367 return NULL;
368 }
369
370 struct timespec req;
371 req.tv_sec = (time_t)seconds;
372 req.tv_nsec = (long)((seconds - req.tv_sec) * 1000000000);
373
374 // Return 0 or errno
375 int result = 0;
376 if (nanosleep(&req, NULL) < 0) {
377 result = errno;
378 }
379 return PyInt_FromLong(result);
380}
381
382static PyMethodDef methods[] = {
383 // Return the canonical version of a path with symlinks, or None if there is
384 // an error.
385 {"realpath", func_realpath, METH_VARARGS, ""},
386
387 // Return whether a string matches a pattern."
388 {"fnmatch", func_fnmatch, METH_VARARGS, ""},
389
390 // Return a list of files that match a pattern.
391 // We need this since Python's glob doesn't have char classes.
392 {"glob", func_glob, METH_VARARGS, ""},
393
394 // Search a string for regex. Returns a list of matches, None if no
395 // match. Raises RuntimeError if the regex is invalid.
396 {"regex_search", func_regex_search, METH_VARARGS, ""},
397
398 // If the regex matches the string, return the start and end position of the
399 // first group. Returns None if there is no match. Raises RuntimeError if
400 // the regex is invalid.
401 {"regex_first_group_match", func_regex_first_group_match, METH_VARARGS, ""},
402
403 // "Print three floating point values for the 'time' builtin.
404 {"print_time", func_print_time, METH_VARARGS, ""},
405
406 {"gethostname", socket_gethostname, METH_NOARGS, ""},
407
408 // ioctl() to get the terminal width.
409 {"get_terminal_width", func_get_terminal_width, METH_NOARGS, ""},
410
411 // Get the display width of a string. Throw an exception if the string is invalid UTF8.
412 {"wcswidth", func_wcswidth, METH_VARARGS, ""},
413
414 // Workaround for CPython's calling setlocale() in pythonrun.c. ONLY used
415 // by tests and bin/oil.py.
416 {"cpython_reset_locale", func_cpython_reset_locale, METH_NOARGS, ""},
417
418 {"sleep_until_error", func_sleep_until_error, METH_VARARGS, ""},
419 {NULL, NULL},
420};
421
422void initlibc(void) {
423 PyObject *module;
424
425 module = Py_InitModule("libc", methods);
426 if (module != NULL) {
427 // ./configure values
428 PyModule_AddIntConstant(module, "HAVE_GLOB_PERIOD", HAVE_GLOB_PERIOD);
429 PyModule_AddIntConstant(module, "HAVE_FNM_EXTMATCH", HAVE_FNM_EXTMATCH);
430
431 // Actual libc values
432 PyModule_AddIntConstant(module, "GLOB_PERIOD", GLOB_PERIOD);
433 PyModule_AddIntConstant(module, "FNM_CASEFOLD", FNM_CASEFOLD);
434 PyModule_AddIntConstant(module, "REG_ICASE", REG_ICASE);
435 PyModule_AddIntConstant(module, "REG_NEWLINE", REG_NEWLINE);
436 PyModule_AddIntConstant(module, "REG_NOTBOL", REG_NOTBOL);
437 }
438
439 errno_error = PyErr_NewException("libc.error",
440 PyExc_IOError, NULL);
441}