diff options
author | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@localhost> | 2011-05-03 11:25:13 +0000 |
commit | 458120dd40db6b4df55a4e96b650e16798ef06a0 (patch) | |
tree | 8f82685be24fef97e715c6f5ca4c68d34d5074ee /sys/src/cmd/python/Extra | |
parent | 3a742c699f6806c1145aea5149bf15de15a0afd7 (diff) |
add hg and python
Diffstat (limited to 'sys/src/cmd/python/Extra')
-rw-r--r-- | sys/src/cmd/python/Extra/dummy.c | 6 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/base85.c | 155 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/bdiff.c | 401 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/diffhelpers.c | 156 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/mpatch.c | 444 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/osutil.c | 534 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mercurial/parsers.c | 435 | ||||
-rw-r--r-- | sys/src/cmd/python/Extra/mkfile | 16 |
8 files changed, 2147 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Extra/dummy.c b/sys/src/cmd/python/Extra/dummy.c new file mode 100644 index 000000000..602aeaad9 --- /dev/null +++ b/sys/src/cmd/python/Extra/dummy.c @@ -0,0 +1,6 @@ +#include "Python.h" + +PyMODINIT_FUNC +initdummy(void) +{ +} diff --git a/sys/src/cmd/python/Extra/mercurial/base85.c b/sys/src/cmd/python/Extra/mercurial/base85.c new file mode 100644 index 000000000..3e6c0614c --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/base85.c @@ -0,0 +1,155 @@ +/* + base85 codec + + Copyright 2006 Brendan Cully <brendan@kublai.com> + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. + + Largely based on git's implementation +*/ + +#include <Python.h> + +static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; +static char b85dec[256]; + +static void +b85prep(void) +{ + int i; + + memset(b85dec, 0, sizeof(b85dec)); + for (i = 0; i < sizeof(b85chars); i++) + b85dec[(int)(b85chars[i])] = i + 1; +} + +static PyObject * +b85encode(PyObject *self, PyObject *args) +{ + const unsigned char *text; + PyObject *out; + char *dst; + int len, olen, i; + unsigned int acc, val, ch; + int pad = 0; + + if (!PyArg_ParseTuple(args, "s#|i", &text, &len, &pad)) + return NULL; + + if (pad) + olen = ((len + 3) / 4 * 5) - 3; + else { + olen = len % 4; + if (olen) + olen++; + olen += len / 4 * 5; + } + if (!(out = PyString_FromStringAndSize(NULL, olen + 3))) + return NULL; + + dst = PyString_AS_STRING(out); + + while (len) { + acc = 0; + for (i = 24; i >= 0; i -= 8) { + ch = *text++; + acc |= ch << i; + if (--len == 0) + break; + } + for (i = 4; i >= 0; i--) { + val = acc % 85; + acc /= 85; + dst[i] = b85chars[val]; + } + dst += 5; + } + + if (!pad) + _PyString_Resize(&out, olen); + + return out; +} + +static PyObject * +b85decode(PyObject *self, PyObject *args) +{ + PyObject *out; + const char *text; + char *dst; + int len, i, j, olen, c, cap; + unsigned int acc; + + if (!PyArg_ParseTuple(args, "s#", &text, &len)) + return NULL; + + olen = len / 5 * 4; + i = len % 5; + if (i) + olen += i - 1; + if (!(out = PyString_FromStringAndSize(NULL, olen))) + return NULL; + + dst = PyString_AS_STRING(out); + + i = 0; + while (i < len) + { + acc = 0; + cap = len - i - 1; + if (cap > 4) + cap = 4; + for (j = 0; j < cap; i++, j++) + { + c = b85dec[(int)*text++] - 1; + if (c < 0) + return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i); + acc = acc * 85 + c; + } + if (i++ < len) + { + c = b85dec[(int)*text++] - 1; + if (c < 0) + return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i); + /* overflow detection: 0xffffffff == "|NsC0", + * "|NsC" == 0x03030303 */ + if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) + return PyErr_Format(PyExc_ValueError, "Bad base85 sequence at position %d", i); + acc += c; + } + + cap = olen < 4 ? olen : 4; + olen -= cap; + for (j = 0; j < 4 - cap; j++) + acc *= 85; + if (cap && cap < 4) + acc += 0xffffff >> (cap - 1) * 8; + for (j = 0; j < cap; j++) + { + acc = (acc << 8) | (acc >> 24); + *dst++ = acc; + } + } + + return out; +} + +static char base85_doc[] = "Base85 Data Encoding"; + +static PyMethodDef methods[] = { + {"b85encode", b85encode, METH_VARARGS, + "Encode text in base85.\n\n" + "If the second parameter is true, pad the result to a multiple of " + "five characters.\n"}, + {"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC initbase85(void) +{ + Py_InitModule3("base85", methods, base85_doc); + + b85prep(); +} diff --git a/sys/src/cmd/python/Extra/mercurial/bdiff.c b/sys/src/cmd/python/Extra/mercurial/bdiff.c new file mode 100644 index 000000000..60d3c633b --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/bdiff.c @@ -0,0 +1,401 @@ +/* + bdiff.c - efficient binary diff extension for Mercurial + + Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. + + Based roughly on Python difflib +*/ + +#include <Python.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +#if defined __hpux || defined __SUNPRO_C || defined _AIX +# define inline +#endif + +#ifdef __linux +# define inline __inline +#endif + +#ifdef _WIN32 +#ifdef _MSC_VER +#define inline __inline +typedef unsigned long uint32_t; +#else +#include <stdint.h> +#endif +static uint32_t htonl(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} +#else +#include <sys/types.h> +#if defined __BEOS__ && !defined __HAIKU__ +#include <ByteOrder.h> +#else +#include <arpa/inet.h> +#endif +#include <inttypes.h> +#endif + +struct line { + int h, len, n, e; + const char *l; +}; + +struct pos { + int pos, len; +}; + +struct hunk { + int a1, a2, b1, b2; +}; + +struct hunklist { + struct hunk *base, *head; +}; + +int splitlines(const char *a, int len, struct line **lr) +{ + int h, i; + const char *p, *b = a; + const char * const plast = a + len - 1; + struct line *l; + + /* count the lines */ + i = 1; /* extra line for sentinel */ + for (p = a; p < a + len; p++) + if (*p == '\n' || p == plast) + i++; + + *lr = l = (struct line *)malloc(sizeof(struct line) * i); + if (!l) + return -1; + + /* build the line array and calculate hashes */ + h = 0; + for (p = a; p < a + len; p++) { + /* Leonid Yuriev's hash */ + h = (h * 1664525) + *p + 1013904223; + + if (*p == '\n' || p == plast) { + l->h = h; + h = 0; + l->len = p - b + 1; + l->l = b; + l->n = INT_MAX; + l++; + b = p + 1; + } + } + + /* set up a sentinel */ + l->h = l->len = 0; + l->l = a + len; + return i - 1; +} + +int inline cmp(struct line *a, struct line *b) +{ + return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len); +} + +static int equatelines(struct line *a, int an, struct line *b, int bn) +{ + int i, j, buckets = 1, t, scale; + struct pos *h = NULL; + + /* build a hash table of the next highest power of 2 */ + while (buckets < bn + 1) + buckets *= 2; + + /* try to allocate a large hash table to avoid collisions */ + for (scale = 4; scale; scale /= 2) { + h = (struct pos *)malloc(scale * buckets * sizeof(struct pos)); + if (h) + break; + } + + if (!h) + return 0; + + buckets = buckets * scale - 1; + + /* clear the hash table */ + for (i = 0; i <= buckets; i++) { + h[i].pos = INT_MAX; + h[i].len = 0; + } + + /* add lines to the hash table chains */ + for (i = bn - 1; i >= 0; i--) { + /* find the equivalence class */ + for (j = b[i].h & buckets; h[j].pos != INT_MAX; + j = (j + 1) & buckets) + if (!cmp(b + i, b + h[j].pos)) + break; + + /* add to the head of the equivalence class */ + b[i].n = h[j].pos; + b[i].e = j; + h[j].pos = i; + h[j].len++; /* keep track of popularity */ + } + + /* compute popularity threshold */ + t = (bn >= 4000) ? bn / 1000 : bn + 1; + + /* match items in a to their equivalence class in b */ + for (i = 0; i < an; i++) { + /* find the equivalence class */ + for (j = a[i].h & buckets; h[j].pos != INT_MAX; + j = (j + 1) & buckets) + if (!cmp(a + i, b + h[j].pos)) + break; + + a[i].e = j; /* use equivalence class for quick compare */ + if (h[j].len <= t) + a[i].n = h[j].pos; /* point to head of match list */ + else + a[i].n = INT_MAX; /* too popular */ + } + + /* discard hash tables */ + free(h); + return 1; +} + +static int longest_match(struct line *a, struct line *b, struct pos *pos, + int a1, int a2, int b1, int b2, int *omi, int *omj) +{ + int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k; + + for (i = a1; i < a2; i++) { + /* skip things before the current block */ + for (j = a[i].n; j < b1; j = b[j].n) + ; + + /* loop through all lines match a[i] in b */ + for (; j < b2; j = b[j].n) { + /* does this extend an earlier match? */ + if (i > a1 && j > b1 && pos[j - 1].pos == i - 1) + k = pos[j - 1].len + 1; + else + k = 1; + pos[j].pos = i; + pos[j].len = k; + + /* best match so far? */ + if (k > mk) { + mi = i; + mj = j; + mk = k; + } + } + } + + if (mk) { + mi = mi - mk + 1; + mj = mj - mk + 1; + } + + /* expand match to include neighboring popular lines */ + while (mi - mb > a1 && mj - mb > b1 && + a[mi - mb - 1].e == b[mj - mb - 1].e) + mb++; + while (mi + mk < a2 && mj + mk < b2 && + a[mi + mk].e == b[mj + mk].e) + mk++; + + *omi = mi - mb; + *omj = mj - mb; + + return mk + mb; +} + +static void recurse(struct line *a, struct line *b, struct pos *pos, + int a1, int a2, int b1, int b2, struct hunklist *l) +{ + int i, j, k; + + /* find the longest match in this chunk */ + k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j); + if (!k) + return; + + /* and recurse on the remaining chunks on either side */ + recurse(a, b, pos, a1, i, b1, j, l); + l->head->a1 = i; + l->head->a2 = i + k; + l->head->b1 = j; + l->head->b2 = j + k; + l->head++; + recurse(a, b, pos, i + k, a2, j + k, b2, l); +} + +static struct hunklist diff(struct line *a, int an, struct line *b, int bn) +{ + struct hunklist l; + struct hunk *curr; + struct pos *pos; + int t; + + /* allocate and fill arrays */ + t = equatelines(a, an, b, bn); + pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos)); + /* we can't have more matches than lines in the shorter file */ + l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) * + ((an<bn ? an:bn) + 1)); + + if (pos && l.base && t) { + /* generate the matching block list */ + recurse(a, b, pos, 0, an, 0, bn, &l); + l.head->a1 = l.head->a2 = an; + l.head->b1 = l.head->b2 = bn; + l.head++; + } + + free(pos); + + /* normalize the hunk list, try to push each hunk towards the end */ + for (curr = l.base; curr != l.head; curr++) { + struct hunk *next = curr+1; + int shift = 0; + + if (next == l.head) + break; + + if (curr->a2 == next->a1) + while (curr->a2+shift < an && curr->b2+shift < bn + && !cmp(a+curr->a2+shift, b+curr->b2+shift)) + shift++; + else if (curr->b2 == next->b1) + while (curr->b2+shift < bn && curr->a2+shift < an + && !cmp(b+curr->b2+shift, a+curr->a2+shift)) + shift++; + if (!shift) + continue; + curr->b2 += shift; + next->b1 += shift; + curr->a2 += shift; + next->a1 += shift; + } + + return l; +} + +static PyObject *blocks(PyObject *self, PyObject *args) +{ + PyObject *sa, *sb, *rl = NULL, *m; + struct line *a, *b; + struct hunklist l = {NULL, NULL}; + struct hunk *h; + int an, bn, pos = 0; + + if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) + return NULL; + + an = splitlines(PyString_AsString(sa), PyString_Size(sa), &a); + bn = splitlines(PyString_AsString(sb), PyString_Size(sb), &b); + if (!a || !b) + goto nomem; + + l = diff(a, an, b, bn); + rl = PyList_New(l.head - l.base); + if (!l.head || !rl) + goto nomem; + + for (h = l.base; h != l.head; h++) { + m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); + PyList_SetItem(rl, pos, m); + pos++; + } + +nomem: + free(a); + free(b); + free(l.base); + return rl ? rl : PyErr_NoMemory(); +} + +static PyObject *bdiff(PyObject *self, PyObject *args) +{ + char *sa, *sb; + PyObject *result = NULL; + struct line *al, *bl; + struct hunklist l = {NULL, NULL}; + struct hunk *h; + char encode[12], *rb; + int an, bn, len = 0, la, lb; + + if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb)) + return NULL; + + an = splitlines(sa, la, &al); + bn = splitlines(sb, lb, &bl); + if (!al || !bl) + goto nomem; + + l = diff(al, an, bl, bn); + if (!l.head) + goto nomem; + + /* calculate length of output */ + la = lb = 0; + for (h = l.base; h != l.head; h++) { + if (h->a1 != la || h->b1 != lb) + len += 12 + bl[h->b1].l - bl[lb].l; + la = h->a2; + lb = h->b2; + } + + result = PyString_FromStringAndSize(NULL, len); + if (!result) + goto nomem; + + /* build binary patch */ + rb = PyString_AsString(result); + la = lb = 0; + + for (h = l.base; h != l.head; h++) { + if (h->a1 != la || h->b1 != lb) { + len = bl[h->b1].l - bl[lb].l; + *(uint32_t *)(encode) = htonl(al[la].l - al->l); + *(uint32_t *)(encode + 4) = htonl(al[h->a1].l - al->l); + *(uint32_t *)(encode + 8) = htonl(len); + memcpy(rb, encode, 12); + memcpy(rb + 12, bl[lb].l, len); + rb += 12 + len; + } + la = h->a2; + lb = h->b2; + } + +nomem: + free(al); + free(bl); + free(l.base); + return result ? result : PyErr_NoMemory(); +} + +static char mdiff_doc[] = "Efficient binary diff."; + +static PyMethodDef methods[] = { + {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, + {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC initbdiff(void) +{ + Py_InitModule3("bdiff", methods, mdiff_doc); +} + diff --git a/sys/src/cmd/python/Extra/mercurial/diffhelpers.c b/sys/src/cmd/python/Extra/mercurial/diffhelpers.c new file mode 100644 index 000000000..d9316ea4b --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/diffhelpers.c @@ -0,0 +1,156 @@ +/* + * diffhelpers.c - helper routines for mpatch + * + * Copyright 2007 Chris Mason <chris.mason@oracle.com> + * + * This software may be used and distributed according to the terms + * of the GNU General Public License v2, incorporated herein by reference. + */ + +#include <Python.h> +#include <stdlib.h> +#include <string.h> + +static char diffhelpers_doc[] = "Efficient diff parsing"; +static PyObject *diffhelpers_Error; + + +/* fixup the last lines of a and b when the patch has no newline at eof */ +static void _fix_newline(PyObject *hunk, PyObject *a, PyObject *b) +{ + int hunksz = PyList_Size(hunk); + PyObject *s = PyList_GET_ITEM(hunk, hunksz-1); + char *l = PyString_AS_STRING(s); + int sz = PyString_GET_SIZE(s); + int alen = PyList_Size(a); + int blen = PyList_Size(b); + char c = l[0]; + + PyObject *hline = PyString_FromStringAndSize(l, sz-1); + if (c == ' ' || c == '+') { + PyObject *rline = PyString_FromStringAndSize(l+1, sz-2); + PyList_SetItem(b, blen-1, rline); + } + if (c == ' ' || c == '-') { + Py_INCREF(hline); + PyList_SetItem(a, alen-1, hline); + } + PyList_SetItem(hunk, hunksz-1, hline); +} + +/* python callable form of _fix_newline */ +static PyObject * +fix_newline(PyObject *self, PyObject *args) +{ + PyObject *hunk, *a, *b; + if (!PyArg_ParseTuple(args, "OOO", &hunk, &a, &b)) + return NULL; + _fix_newline(hunk, a, b); + return Py_BuildValue("l", 0); +} + +/* + * read lines from fp into the hunk. The hunk is parsed into two arrays + * a and b. a gets the old state of the text, b gets the new state + * The control char from the hunk is saved when inserting into a, but not b + * (for performance while deleting files) + */ +static PyObject * +addlines(PyObject *self, PyObject *args) +{ + + PyObject *fp, *hunk, *a, *b, *x; + int i; + int lena, lenb; + int num; + int todoa, todob; + char *s, c; + PyObject *l; + if (!PyArg_ParseTuple(args, "OOiiOO", &fp, &hunk, &lena, &lenb, &a, &b)) + return NULL; + + while(1) { + todoa = lena - PyList_Size(a); + todob = lenb - PyList_Size(b); + num = todoa > todob ? todoa : todob; + if (num == 0) + break; + for (i = 0 ; i < num ; i++) { + x = PyFile_GetLine(fp, 0); + s = PyString_AS_STRING(x); + c = *s; + if (strcmp(s, "\\ No newline at end of file\n") == 0) { + _fix_newline(hunk, a, b); + continue; + } + if (c == '\n') { + /* Some patches may be missing the control char + * on empty lines. Supply a leading space. */ + Py_DECREF(x); + x = PyString_FromString(" \n"); + } + PyList_Append(hunk, x); + if (c == '+') { + l = PyString_FromString(s + 1); + PyList_Append(b, l); + Py_DECREF(l); + } else if (c == '-') { + PyList_Append(a, x); + } else { + l = PyString_FromString(s + 1); + PyList_Append(b, l); + Py_DECREF(l); + PyList_Append(a, x); + } + Py_DECREF(x); + } + } + return Py_BuildValue("l", 0); +} + +/* + * compare the lines in a with the lines in b. a is assumed to have + * a control char at the start of each line, this char is ignored in the + * compare + */ +static PyObject * +testhunk(PyObject *self, PyObject *args) +{ + + PyObject *a, *b; + long bstart; + int alen, blen; + int i; + char *sa, *sb; + + if (!PyArg_ParseTuple(args, "OOl", &a, &b, &bstart)) + return NULL; + alen = PyList_Size(a); + blen = PyList_Size(b); + if (alen > blen - bstart) { + return Py_BuildValue("l", -1); + } + for (i = 0 ; i < alen ; i++) { + sa = PyString_AS_STRING(PyList_GET_ITEM(a, i)); + sb = PyString_AS_STRING(PyList_GET_ITEM(b, i + bstart)); + if (strcmp(sa+1, sb) != 0) + return Py_BuildValue("l", -1); + } + return Py_BuildValue("l", 0); +} + +static PyMethodDef methods[] = { + {"addlines", addlines, METH_VARARGS, "add lines to a hunk\n"}, + {"fix_newline", fix_newline, METH_VARARGS, "fixup newline counters\n"}, + {"testhunk", testhunk, METH_VARARGS, "test lines in a hunk\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +initdiffhelpers(void) +{ + Py_InitModule3("diffhelpers", methods, diffhelpers_doc); + diffhelpers_Error = PyErr_NewException("diffhelpers.diffhelpersError", + NULL, NULL); +} + diff --git a/sys/src/cmd/python/Extra/mercurial/mpatch.c b/sys/src/cmd/python/Extra/mercurial/mpatch.c new file mode 100644 index 000000000..d9ceefcae --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/mpatch.c @@ -0,0 +1,444 @@ +/* + mpatch.c - efficient binary patching for Mercurial + + This implements a patch algorithm that's O(m + nlog n) where m is the + size of the output and n is the number of patches. + + Given a list of binary patches, it unpacks each into a hunk list, + then combines the hunk lists with a treewise recursion to form a + single hunk list. This hunk list is then applied to the original + text. + + The text (or binary) fragments are copied directly from their source + Python objects into a preallocated output string to avoid the + allocation of intermediate Python objects. Working memory is about 2x + the total number of hunks. + + Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. +*/ + +#include <Python.h> +#include <stdlib.h> +#include <string.h> + +/* Definitions to get compatibility with python 2.4 and earlier which + does not have Py_ssize_t. See also PEP 353. + Note: msvc (8 or earlier) does not have ssize_t, so we use Py_ssize_t. +*/ +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +#ifdef _WIN32 +# ifdef _MSC_VER +/* msvc 6.0 has problems */ +# define inline __inline +typedef unsigned long uint32_t; +# else +# include <stdint.h> +# endif +static uint32_t ntohl(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} +#else +/* not windows */ +# include <sys/types.h> +# if defined __BEOS__ && !defined __HAIKU__ +# include <ByteOrder.h> +# else +# include <arpa/inet.h> +# endif +# include <inttypes.h> +#endif + +static char mpatch_doc[] = "Efficient binary patching."; +static PyObject *mpatch_Error; + +struct frag { + int start, end, len; + const char *data; +}; + +struct flist { + struct frag *base, *head, *tail; +}; + +static struct flist *lalloc(int size) +{ + struct flist *a = NULL; + + if (size < 1) + size = 1; + + a = (struct flist *)malloc(sizeof(struct flist)); + if (a) { + a->base = (struct frag *)malloc(sizeof(struct frag) * size); + if (a->base) { + a->head = a->tail = a->base; + return a; + } + free(a); + a = NULL; + } + if (!PyErr_Occurred()) + PyErr_NoMemory(); + return NULL; +} + +static void lfree(struct flist *a) +{ + if (a) { + free(a->base); + free(a); + } +} + +static int lsize(struct flist *a) +{ + return a->tail - a->head; +} + +/* move hunks in source that are less cut to dest, compensating + for changes in offset. the last hunk may be split if necessary. +*/ +static int gather(struct flist *dest, struct flist *src, int cut, int offset) +{ + struct frag *d = dest->tail, *s = src->head; + int postend, c, l; + + while (s != src->tail) { + if (s->start + offset >= cut) + break; /* we've gone far enough */ + + postend = offset + s->start + s->len; + if (postend <= cut) { + /* save this hunk */ + offset += s->start + s->len - s->end; + *d++ = *s++; + } + else { + /* break up this hunk */ + c = cut - offset; + if (s->end < c) + c = s->end; + l = cut - offset - s->start; + if (s->len < l) + l = s->len; + + offset += s->start + l - c; + + d->start = s->start; + d->end = c; + d->len = l; + d->data = s->data; + d++; + s->start = c; + s->len = s->len - l; + s->data = s->data + l; + + break; + } + } + + dest->tail = d; + src->head = s; + return offset; +} + +/* like gather, but with no output list */ +static int discard(struct flist *src, int cut, int offset) +{ + struct frag *s = src->head; + int postend, c, l; + + while (s != src->tail) { + if (s->start + offset >= cut) + break; + + postend = offset + s->start + s->len; + if (postend <= cut) { + offset += s->start + s->len - s->end; + s++; + } + else { + c = cut - offset; + if (s->end < c) + c = s->end; + l = cut - offset - s->start; + if (s->len < l) + l = s->len; + + offset += s->start + l - c; + s->start = c; + s->len = s->len - l; + s->data = s->data + l; + + break; + } + } + + src->head = s; + return offset; +} + +/* combine hunk lists a and b, while adjusting b for offset changes in a/ + this deletes a and b and returns the resultant list. */ +static struct flist *combine(struct flist *a, struct flist *b) +{ + struct flist *c = NULL; + struct frag *bh, *ct; + int offset = 0, post; + + if (a && b) + c = lalloc((lsize(a) + lsize(b)) * 2); + + if (c) { + + for (bh = b->head; bh != b->tail; bh++) { + /* save old hunks */ + offset = gather(c, a, bh->start, offset); + + /* discard replaced hunks */ + post = discard(a, bh->end, offset); + + /* insert new hunk */ + ct = c->tail; + ct->start = bh->start - offset; + ct->end = bh->end - post; + ct->len = bh->len; + ct->data = bh->data; + c->tail++; + offset = post; + } + + /* hold on to tail from a */ + memcpy(c->tail, a->head, sizeof(struct frag) * lsize(a)); + c->tail += lsize(a); + } + + lfree(a); + lfree(b); + return c; +} + +/* decode a binary patch into a hunk list */ +static struct flist *decode(const char *bin, int len) +{ + struct flist *l; + struct frag *lt; + const char *data = bin + 12, *end = bin + len; + char decode[12]; /* for dealing with alignment issues */ + + /* assume worst case size, we won't have many of these lists */ + l = lalloc(len / 12); + if (!l) + return NULL; + + lt = l->tail; + + while (data <= end) { + memcpy(decode, bin, 12); + lt->start = ntohl(*(uint32_t *)decode); + lt->end = ntohl(*(uint32_t *)(decode + 4)); + lt->len = ntohl(*(uint32_t *)(decode + 8)); + if (lt->start > lt->end) + break; /* sanity check */ + bin = data + lt->len; + if (bin < data) + break; /* big data + big (bogus) len can wrap around */ + lt->data = data; + data = bin + 12; + lt++; + } + + if (bin != end) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, "patch cannot be decoded"); + lfree(l); + return NULL; + } + + l->tail = lt; + return l; +} + +/* calculate the size of resultant text */ +static int calcsize(int len, struct flist *l) +{ + int outlen = 0, last = 0; + struct frag *f = l->head; + + while (f != l->tail) { + if (f->start < last || f->end > len) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, + "invalid patch"); + return -1; + } + outlen += f->start - last; + last = f->end; + outlen += f->len; + f++; + } + + outlen += len - last; + return outlen; +} + +static int apply(char *buf, const char *orig, int len, struct flist *l) +{ + struct frag *f = l->head; + int last = 0; + char *p = buf; + + while (f != l->tail) { + if (f->start < last || f->end > len) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, + "invalid patch"); + return 0; + } + memcpy(p, orig + last, f->start - last); + p += f->start - last; + memcpy(p, f->data, f->len); + last = f->end; + p += f->len; + f++; + } + memcpy(p, orig + last, len - last); + return 1; +} + +/* recursively generate a patch of all bins between start and end */ +static struct flist *fold(PyObject *bins, int start, int end) +{ + int len; + Py_ssize_t blen; + const char *buffer; + + if (start + 1 == end) { + /* trivial case, output a decoded list */ + PyObject *tmp = PyList_GetItem(bins, start); + if (!tmp) + return NULL; + if (PyObject_AsCharBuffer(tmp, &buffer, &blen)) + return NULL; + return decode(buffer, blen); + } + + /* divide and conquer, memory management is elsewhere */ + len = (end - start) / 2; + return combine(fold(bins, start, start + len), + fold(bins, start + len, end)); +} + +static PyObject * +patches(PyObject *self, PyObject *args) +{ + PyObject *text, *bins, *result; + struct flist *patch; + const char *in; + char *out; + int len, outlen; + Py_ssize_t inlen; + + if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins)) + return NULL; + + len = PyList_Size(bins); + if (!len) { + /* nothing to do */ + Py_INCREF(text); + return text; + } + + if (PyObject_AsCharBuffer(text, &in, &inlen)) + return NULL; + + patch = fold(bins, 0, len); + if (!patch) + return NULL; + + outlen = calcsize(inlen, patch); + if (outlen < 0) { + result = NULL; + goto cleanup; + } + result = PyString_FromStringAndSize(NULL, outlen); + if (!result) { + result = NULL; + goto cleanup; + } + out = PyString_AsString(result); + if (!apply(out, in, inlen, patch)) { + Py_DECREF(result); + result = NULL; + } +cleanup: + lfree(patch); + return result; +} + +/* calculate size of a patched file directly */ +static PyObject * +patchedsize(PyObject *self, PyObject *args) +{ + long orig, start, end, len, outlen = 0, last = 0; + int patchlen; + char *bin, *binend, *data; + char decode[12]; /* for dealing with alignment issues */ + + if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen)) + return NULL; + + binend = bin + patchlen; + data = bin + 12; + + while (data <= binend) { + memcpy(decode, bin, 12); + start = ntohl(*(uint32_t *)decode); + end = ntohl(*(uint32_t *)(decode + 4)); + len = ntohl(*(uint32_t *)(decode + 8)); + if (start > end) + break; /* sanity check */ + bin = data + len; + if (bin < data) + break; /* big data + big (bogus) len can wrap around */ + data = bin + 12; + outlen += start - last; + last = end; + outlen += len; + } + + if (bin != binend) { + if (!PyErr_Occurred()) + PyErr_SetString(mpatch_Error, "patch cannot be decoded"); + return NULL; + } + + outlen += orig - last; + return Py_BuildValue("l", outlen); +} + +static PyMethodDef methods[] = { + {"patches", patches, METH_VARARGS, "apply a series of patches\n"}, + {"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC +initmpatch(void) +{ + Py_InitModule3("mpatch", methods, mpatch_doc); + mpatch_Error = PyErr_NewException("mpatch.mpatchError", NULL, NULL); +} + diff --git a/sys/src/cmd/python/Extra/mercurial/osutil.c b/sys/src/cmd/python/Extra/mercurial/osutil.c new file mode 100644 index 000000000..a9874d0c9 --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/osutil.c @@ -0,0 +1,534 @@ +/* + osutil.c - native operating system services + + Copyright 2007 Matt Mackall and others + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#define _ATFILE_SOURCE +#include <Python.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +#else +# include <dirent.h> +# include <sys/stat.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +// some platforms lack the PATH_MAX definition (eg. GNU/Hurd) +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifdef _WIN32 +/* +stat struct compatible with hg expectations +Mercurial only uses st_mode, st_size and st_mtime +the rest is kept to minimize changes between implementations +*/ +struct hg_stat { + int st_dev; + int st_mode; + int st_nlink; + __int64 st_size; + int st_mtime; + int st_ctime; +}; +struct listdir_stat { + PyObject_HEAD + struct hg_stat st; +}; +#else +struct listdir_stat { + PyObject_HEAD + struct stat st; +}; +#endif + +#define listdir_slot(name) \ + static PyObject *listdir_stat_##name(PyObject *self, void *x) \ + { \ + return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \ + } + +listdir_slot(st_dev) +listdir_slot(st_mode) +listdir_slot(st_nlink) +#ifdef _WIN32 +static PyObject *listdir_stat_st_size(PyObject *self, void *x) +{ + return PyLong_FromLongLong( + (PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size); +} +#else +listdir_slot(st_size) +#endif +listdir_slot(st_mtime) +listdir_slot(st_ctime) + +static struct PyGetSetDef listdir_stat_getsets[] = { + {"st_dev", listdir_stat_st_dev, 0, 0, 0}, + {"st_mode", listdir_stat_st_mode, 0, 0, 0}, + {"st_nlink", listdir_stat_st_nlink, 0, 0, 0}, + {"st_size", listdir_stat_st_size, 0, 0, 0}, + {"st_mtime", listdir_stat_st_mtime, 0, 0, 0}, + {"st_ctime", listdir_stat_st_ctime, 0, 0, 0}, + {0, 0, 0, 0, 0} +}; + +static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject *k) +{ + return t->tp_alloc(t, 0); +} + +static void listdir_stat_dealloc(PyObject *o) +{ + o->ob_type->tp_free(o); +} + +static PyTypeObject listdir_stat_type = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "osutil.stat", /*tp_name*/ + sizeof(struct listdir_stat), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)listdir_stat_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "stat objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + listdir_stat_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + listdir_stat_new, /* tp_new */ +}; + +#ifdef _WIN32 + +static int to_python_time(const FILETIME *tm) +{ + /* number of seconds between epoch and January 1 1601 */ + const __int64 a0 = (__int64)134774L * (__int64)24L * (__int64)3600L; + /* conversion factor from 100ns to 1s */ + const __int64 a1 = 10000000; + /* explicit (int) cast to suspend compiler warnings */ + return (int)((((__int64)tm->dwHighDateTime << 32) + + tm->dwLowDateTime) / a1 - a0); +} + +static PyObject *make_item(const WIN32_FIND_DATAA *fd, int wantstat) +{ + PyObject *py_st; + struct hg_stat *stp; + + int kind = (fd->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + ? _S_IFDIR : _S_IFREG; + + if (!wantstat) + return Py_BuildValue("si", fd->cFileName, kind); + + py_st = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL); + if (!py_st) + return NULL; + + stp = &((struct listdir_stat *)py_st)->st; + /* + use kind as st_mode + rwx bits on Win32 are meaningless + and Hg does not use them anyway + */ + stp->st_mode = kind; + stp->st_mtime = to_python_time(&fd->ftLastWriteTime); + stp->st_ctime = to_python_time(&fd->ftCreationTime); + if (kind == _S_IFREG) + stp->st_size = ((__int64)fd->nFileSizeHigh << 32) + + fd->nFileSizeLow; + return Py_BuildValue("siN", fd->cFileName, + kind, py_st); +} + +static PyObject *_listdir(char *path, int plen, int wantstat, char *skip) +{ + PyObject *rval = NULL; /* initialize - return value */ + PyObject *list; + HANDLE fh; + WIN32_FIND_DATAA fd; + char *pattern; + + /* build the path + \* pattern string */ + pattern = malloc(plen+3); /* path + \* + \0 */ + if (!pattern) { + PyErr_NoMemory(); + goto error_nomem; + } + strcpy(pattern, path); + + if (plen > 0) { + char c = path[plen-1]; + if (c != ':' && c != '/' && c != '\\') + pattern[plen++] = '\\'; + } + strcpy(pattern + plen, "*"); + + fh = FindFirstFileA(pattern, &fd); + if (fh == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErrWithFilename(GetLastError(), path); + goto error_file; + } + + list = PyList_New(0); + if (!list) + goto error_list; + + do { + PyObject *item; + + if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + if (!strcmp(fd.cFileName, ".") + || !strcmp(fd.cFileName, "..")) + continue; + + if (skip && !strcmp(fd.cFileName, skip)) { + rval = PyList_New(0); + goto error; + } + } + + item = make_item(&fd, wantstat); + if (!item) + goto error; + + if (PyList_Append(list, item)) { + Py_XDECREF(item); + goto error; + } + + Py_XDECREF(item); + } while (FindNextFileA(fh, &fd)); + + if (GetLastError() != ERROR_NO_MORE_FILES) { + PyErr_SetFromWindowsErrWithFilename(GetLastError(), path); + goto error; + } + + rval = list; + Py_XINCREF(rval); +error: + Py_XDECREF(list); +error_list: + FindClose(fh); +error_file: + free(pattern); +error_nomem: + return rval; +} + +#else + +int entkind(struct dirent *ent) +{ +#ifdef DT_REG + switch (ent->d_type) { + case DT_REG: return S_IFREG; + case DT_DIR: return S_IFDIR; + case DT_LNK: return S_IFLNK; + case DT_BLK: return S_IFBLK; + case DT_CHR: return S_IFCHR; + case DT_FIFO: return S_IFIFO; + case DT_SOCK: return S_IFSOCK; + } +#endif + return -1; +} + +static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip) +{ + PyObject *list, *elem, *stat, *ret = NULL; + char fullpath[PATH_MAX + 10]; + int kind, err; + struct stat st; + struct dirent *ent; + DIR *dir; +#ifdef AT_SYMLINK_NOFOLLOW + int dfd = -1; +#endif + + if (pathlen >= PATH_MAX) { + PyErr_SetString(PyExc_ValueError, "path too long"); + goto error_value; + } + strncpy(fullpath, path, PATH_MAX); + fullpath[pathlen] = '/'; + +#ifdef AT_SYMLINK_NOFOLLOW + dfd = open(path, O_RDONLY); + if (dfd == -1) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path); + goto error_value; + } + dir = fdopendir(dfd); +#else + dir = opendir(path); +#endif + if (!dir) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, path); + goto error_dir; + } + + list = PyList_New(0); + if (!list) + goto error_list; + + while ((ent = readdir(dir))) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + + kind = entkind(ent); + if (kind == -1 || keepstat) { +#ifdef AT_SYMLINK_NOFOLLOW + err = fstatat(dfd, ent->d_name, &st, + AT_SYMLINK_NOFOLLOW); +#else + strncpy(fullpath + pathlen + 1, ent->d_name, + PATH_MAX - pathlen); + fullpath[PATH_MAX] = 0; + err = lstat(fullpath, &st); +#endif + if (err == -1) { + strncpy(fullpath + pathlen + 1, ent->d_name, + PATH_MAX - pathlen); + fullpath[PATH_MAX] = 0; + PyErr_SetFromErrnoWithFilename(PyExc_OSError, + fullpath); + goto error; + } + kind = st.st_mode & S_IFMT; + } + + /* quit early? */ + if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) { + ret = PyList_New(0); + goto error; + } + + if (keepstat) { + stat = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL); + if (!stat) + goto error; + memcpy(&((struct listdir_stat *)stat)->st, &st, sizeof(st)); + elem = Py_BuildValue("siN", ent->d_name, kind, stat); + } else + elem = Py_BuildValue("si", ent->d_name, kind); + if (!elem) + goto error; + + PyList_Append(list, elem); + Py_DECREF(elem); + } + + ret = list; + Py_INCREF(ret); + +error: + Py_DECREF(list); +error_list: + closedir(dir); +error_dir: +#ifdef AT_SYMLINK_NOFOLLOW + close(dfd); +#endif +error_value: + return ret; +} + +#endif /* ndef _WIN32 */ + +static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *statobj = NULL; /* initialize - optional arg */ + PyObject *skipobj = NULL; /* initialize - optional arg */ + char *path, *skip = NULL; + int wantstat, plen; + + static char *kwlist[] = {"path", "stat", "skip", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|OO:listdir", + kwlist, &path, &plen, &statobj, &skipobj)) + return NULL; + + wantstat = statobj && PyObject_IsTrue(statobj); + + if (skipobj && skipobj != Py_None) { + skip = PyString_AsString(skipobj); + if (!skip) + return NULL; + } + + return _listdir(path, plen, wantstat, skip); +} + +#ifdef _WIN32 +static PyObject *posixfile(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"name", "mode", "buffering", NULL}; + PyObject *file_obj = NULL; + char *name = NULL; + char *mode = "rb"; + DWORD access = 0; + DWORD creation; + HANDLE handle; + int fd, flags = 0; + int bufsize = -1; + char m0, m1, m2; + char fpmode[4]; + int fppos = 0; + int plus; + FILE *fp; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:posixfile", kwlist, + Py_FileSystemDefaultEncoding, + &name, &mode, &bufsize)) + return NULL; + + m0 = mode[0]; + m1 = m0 ? mode[1] : '\0'; + m2 = m1 ? mode[2] : '\0'; + plus = m1 == '+' || m2 == '+'; + + fpmode[fppos++] = m0; + if (m1 == 'b' || m2 == 'b') { + flags = _O_BINARY; + fpmode[fppos++] = 'b'; + } + else + flags = _O_TEXT; + if (plus) { + flags |= _O_RDWR; + access = GENERIC_READ | GENERIC_WRITE; + fpmode[fppos++] = '+'; + } + fpmode[fppos++] = '\0'; + + switch (m0) { + case 'r': + creation = OPEN_EXISTING; + if (!plus) { + flags |= _O_RDONLY; + access = GENERIC_READ; + } + break; + case 'w': + creation = CREATE_ALWAYS; + if (!plus) { + access = GENERIC_WRITE; + flags |= _O_WRONLY; + } + break; + case 'a': + creation = OPEN_ALWAYS; + flags |= _O_APPEND; + if (!plus) { + flags |= _O_WRONLY; + access = GENERIC_WRITE; + } + break; + default: + PyErr_Format(PyExc_ValueError, + "mode string must begin with one of 'r', 'w', " + "or 'a', not '%c'", m0); + goto bail; + } + + handle = CreateFile(name, access, + FILE_SHARE_READ | FILE_SHARE_WRITE | + FILE_SHARE_DELETE, + NULL, + creation, + FILE_ATTRIBUTE_NORMAL, + 0); + + if (handle == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErrWithFilename(GetLastError(), name); + goto bail; + } + + fd = _open_osfhandle((intptr_t) handle, flags); + if (fd == -1) { + CloseHandle(handle); + PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); + goto bail; + } + + fp = _fdopen(fd, fpmode); + if (fp == NULL) { + _close(fd); + PyErr_SetFromErrnoWithFilename(PyExc_IOError, name); + goto bail; + } + + file_obj = PyFile_FromFile(fp, name, mode, fclose); + if (file_obj == NULL) { + fclose(fp); + goto bail; + } + + PyFile_SetBufSize(file_obj, bufsize); +bail: + PyMem_Free(name); + return file_obj; +} +#endif + +static char osutil_doc[] = "Native operating system services."; + +static PyMethodDef methods[] = { + {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS, + "list a directory\n"}, +#ifdef _WIN32 + {"posixfile", (PyCFunction)posixfile, METH_VARARGS | METH_KEYWORDS, + "Open a file with POSIX-like semantics.\n" +"On error, this function may raise either a WindowsError or an IOError."}, +#endif + {NULL, NULL} +}; + +PyMODINIT_FUNC initosutil(void) +{ + if (PyType_Ready(&listdir_stat_type) == -1) + return; + + Py_InitModule3("osutil", methods, osutil_doc); +} diff --git a/sys/src/cmd/python/Extra/mercurial/parsers.c b/sys/src/cmd/python/Extra/mercurial/parsers.c new file mode 100644 index 000000000..93c10c05d --- /dev/null +++ b/sys/src/cmd/python/Extra/mercurial/parsers.c @@ -0,0 +1,435 @@ +/* + parsers.c - efficient content parsing + + Copyright 2008 Matt Mackall <mpm@selenic.com> and others + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#include <Python.h> +#include <ctype.h> +#include <string.h> + +static int hexdigit(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); + return 0; +} + +/* + * Turn a hex-encoded string into binary. + */ +static PyObject *unhexlify(const char *str, int len) +{ + PyObject *ret; + const char *c; + char *d; + + ret = PyString_FromStringAndSize(NULL, len / 2); + if (!ret) + return NULL; + + d = PyString_AS_STRING(ret); + for (c = str; c < str + len;) { + int hi = hexdigit(*c++); + int lo = hexdigit(*c++); + *d++ = (hi << 4) | lo; + } + + return ret; +} + +/* + * This code assumes that a manifest is stitched together with newline + * ('\n') characters. + */ +static PyObject *parse_manifest(PyObject *self, PyObject *args) +{ + PyObject *mfdict, *fdict; + char *str, *cur, *start, *zero; + int len; + + if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", + &PyDict_Type, &mfdict, + &PyDict_Type, &fdict, + &str, &len)) + goto quit; + + for (start = cur = str, zero = NULL; cur < str + len; cur++) { + PyObject *file = NULL, *node = NULL; + PyObject *flags = NULL; + int nlen; + + if (!*cur) { + zero = cur; + continue; + } + else if (*cur != '\n') + continue; + + if (!zero) { + PyErr_SetString(PyExc_ValueError, + "manifest entry has no separator"); + goto quit; + } + + file = PyString_FromStringAndSize(start, zero - start); + if (!file) + goto bail; + + nlen = cur - zero - 1; + + node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); + if (!node) + goto bail; + + if (nlen > 40) { + PyObject *flags; + + flags = PyString_FromStringAndSize(zero + 41, + nlen - 40); + if (!flags) + goto bail; + + if (PyDict_SetItem(fdict, file, flags) == -1) + goto bail; + } + + if (PyDict_SetItem(mfdict, file, node) == -1) + goto bail; + + start = cur + 1; + zero = NULL; + + Py_XDECREF(flags); + Py_XDECREF(node); + Py_XDECREF(file); + continue; + bail: + Py_XDECREF(flags); + Py_XDECREF(node); + Py_XDECREF(file); + goto quit; + } + + if (len > 0 && *(cur - 1) != '\n') { + PyErr_SetString(PyExc_ValueError, + "manifest contains trailing garbage"); + goto quit; + } + + Py_INCREF(Py_None); + return Py_None; +quit: + return NULL; +} + +#ifdef _WIN32 +# ifdef _MSC_VER +/* msvc 6.0 has problems */ +# define inline __inline +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; +# else +# include <stdint.h> +# endif +static uint32_t ntohl(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} +#else +/* not windows */ +# include <sys/types.h> +# if defined __BEOS__ && !defined __HAIKU__ +# include <ByteOrder.h> +# else +# include <arpa/inet.h> +# endif +# include <inttypes.h> +#endif + +static PyObject *parse_dirstate(PyObject *self, PyObject *args) +{ + PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; + PyObject *fname = NULL, *cname = NULL, *entry = NULL; + char *str, *cur, *end, *cpos; + int state, mode, size, mtime; + unsigned int flen; + int len; + char decode[16]; /* for alignment */ + + if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", + &PyDict_Type, &dmap, + &PyDict_Type, &cmap, + &str, &len)) + goto quit; + + /* read parents */ + if (len < 40) + goto quit; + + parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); + if (!parents) + goto quit; + + /* read filenames */ + cur = str + 40; + end = str + len; + + while (cur < end - 17) { + /* unpack header */ + state = *cur; + memcpy(decode, cur + 1, 16); + mode = ntohl(*(uint32_t *)(decode)); + size = ntohl(*(uint32_t *)(decode + 4)); + mtime = ntohl(*(uint32_t *)(decode + 8)); + flen = ntohl(*(uint32_t *)(decode + 12)); + cur += 17; + if (flen > end - cur) { + PyErr_SetString(PyExc_ValueError, "overflow in dirstate"); + goto quit; + } + + entry = Py_BuildValue("ciii", state, mode, size, mtime); + if (!entry) + goto quit; + PyObject_GC_UnTrack(entry); /* don't waste time with this */ + + cpos = memchr(cur, 0, flen); + if (cpos) { + fname = PyString_FromStringAndSize(cur, cpos - cur); + cname = PyString_FromStringAndSize(cpos + 1, + flen - (cpos - cur) - 1); + if (!fname || !cname || + PyDict_SetItem(cmap, fname, cname) == -1 || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + Py_DECREF(cname); + } else { + fname = PyString_FromStringAndSize(cur, flen); + if (!fname || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + } + cur += flen; + Py_DECREF(fname); + Py_DECREF(entry); + fname = cname = entry = NULL; + } + + ret = parents; + Py_INCREF(ret); +quit: + Py_XDECREF(fname); + Py_XDECREF(cname); + Py_XDECREF(entry); + Py_XDECREF(parents); + return ret; +} + +const char nullid[20]; +const int nullrev = -1; + +/* create an index tuple, insert into the nodemap */ +static PyObject * _build_idx_entry(PyObject *nodemap, int n, uint64_t offset_flags, + int comp_len, int uncomp_len, int base_rev, + int link_rev, int parent_1, int parent_2, + const char *c_node_id) +{ + int err; + PyObject *entry, *node_id, *n_obj; + + node_id = PyString_FromStringAndSize(c_node_id, 20); + n_obj = PyInt_FromLong(n); + if (!node_id || !n_obj) + err = -1; + else + err = PyDict_SetItem(nodemap, node_id, n_obj); + + Py_XDECREF(n_obj); + if (err) + goto error_dealloc; + + entry = Py_BuildValue("LiiiiiiN", offset_flags, comp_len, + uncomp_len, base_rev, link_rev, + parent_1, parent_2, node_id); + if (!entry) + goto error_dealloc; + PyObject_GC_UnTrack(entry); /* don't waste time with this */ + + return entry; + +error_dealloc: + Py_XDECREF(node_id); + return NULL; +} + +/* RevlogNG format (all in big endian, data may be inlined): + * 6 bytes: offset + * 2 bytes: flags + * 4 bytes: compressed length + * 4 bytes: uncompressed length + * 4 bytes: base revision + * 4 bytes: link revision + * 4 bytes: parent 1 revision + * 4 bytes: parent 2 revision + * 32 bytes: nodeid (only 20 bytes used) + */ +static int _parse_index_ng (const char *data, int size, int inlined, + PyObject *index, PyObject *nodemap) +{ + PyObject *entry; + int n = 0, err; + uint64_t offset_flags; + int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + const char *c_node_id; + const char *end = data + size; + char decode[64]; /* to enforce alignment with inline data */ + + while (data < end) { + unsigned int step; + + memcpy(decode, data, 64); + offset_flags = ntohl(*((uint32_t *) (decode + 4))); + if (n == 0) /* mask out version number for the first entry */ + offset_flags &= 0xFFFF; + else { + uint32_t offset_high = ntohl(*((uint32_t *) decode)); + offset_flags |= ((uint64_t) offset_high) << 32; + } + + comp_len = ntohl(*((uint32_t *) (decode + 8))); + uncomp_len = ntohl(*((uint32_t *) (decode + 12))); + base_rev = ntohl(*((uint32_t *) (decode + 16))); + link_rev = ntohl(*((uint32_t *) (decode + 20))); + parent_1 = ntohl(*((uint32_t *) (decode + 24))); + parent_2 = ntohl(*((uint32_t *) (decode + 28))); + c_node_id = decode + 32; + + entry = _build_idx_entry(nodemap, n, offset_flags, + comp_len, uncomp_len, base_rev, + link_rev, parent_1, parent_2, + c_node_id); + if (!entry) + return 0; + + if (inlined) { + err = PyList_Append(index, entry); + Py_DECREF(entry); + if (err) + return 0; + } else + PyList_SET_ITEM(index, n, entry); /* steals reference */ + + n++; + step = 64 + (inlined ? comp_len : 0); + if (end - data < step) + break; + data += step; + } + if (data != end) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, "corrupt index file"); + return 0; + } + + /* create the nullid/nullrev entry in the nodemap and the + * magic nullid entry in the index at [-1] */ + entry = _build_idx_entry(nodemap, + nullrev, 0, 0, 0, -1, -1, -1, -1, nullid); + if (!entry) + return 0; + if (inlined) { + err = PyList_Append(index, entry); + Py_DECREF(entry); + if (err) + return 0; + } else + PyList_SET_ITEM(index, n, entry); /* steals reference */ + + return 1; +} + +/* This function parses a index file and returns a Python tuple of the + * following format: (index, nodemap, cache) + * + * index: a list of tuples containing the RevlogNG records + * nodemap: a dict mapping node ids to indices in the index list + * cache: if data is inlined, a tuple (index_file_content, 0) else None + */ +static PyObject *parse_index(PyObject *self, PyObject *args) +{ + const char *data; + int size, inlined; + PyObject *rval = NULL, *index = NULL, *nodemap = NULL, *cache = NULL; + PyObject *data_obj = NULL, *inlined_obj; + + if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj)) + return NULL; + inlined = inlined_obj && PyObject_IsTrue(inlined_obj); + + /* If no data is inlined, we know the size of the index list in + * advance: size divided by size of one one revlog record (64 bytes) + * plus one for the nullid */ + index = inlined ? PyList_New(0) : PyList_New(size / 64 + 1); + if (!index) + goto quit; + + nodemap = PyDict_New(); + if (!nodemap) + goto quit; + + /* set up the cache return value */ + if (inlined) { + /* Note that the reference to data_obj is only borrowed */ + data_obj = PyTuple_GET_ITEM(args, 0); + cache = Py_BuildValue("iO", 0, data_obj); + if (!cache) + goto quit; + } else { + cache = Py_None; + Py_INCREF(Py_None); + } + + /* actually populate the index and the nodemap with data */ + if (!_parse_index_ng (data, size, inlined, index, nodemap)) + goto quit; + + rval = Py_BuildValue("NNN", index, nodemap, cache); + if (!rval) + goto quit; + return rval; + +quit: + Py_XDECREF(index); + Py_XDECREF(nodemap); + Py_XDECREF(cache); + Py_XDECREF(rval); + return NULL; +} + + +static char parsers_doc[] = "Efficient content parsing."; + +static PyMethodDef methods[] = { + {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, + {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, + {"parse_index", parse_index, METH_VARARGS, "parse a revlog index\n"}, + {NULL, NULL} +}; + +PyMODINIT_FUNC initparsers(void) +{ + Py_InitModule3("parsers", methods, parsers_doc); +} diff --git a/sys/src/cmd/python/Extra/mkfile b/sys/src/cmd/python/Extra/mkfile new file mode 100644 index 000000000..cf30e453d --- /dev/null +++ b/sys/src/cmd/python/Extra/mkfile @@ -0,0 +1,16 @@ +APE=/sys/src/ape +<$APE/config + +LIB=../libextra.a$O + +OFILES=`{du -a . | grep '.c$' | awk '{print $2}' | sed 's/.$/\$O/'} + +</sys/src/cmd/mklib + +CFLAGS=-c -I.. -I../Include -DT$objtype -D_SUSV2_SOURCE -DNDEBUG + +%.$O: %.c + $CC $CFLAGS -o $stem.$O $stem.c + +clean:V: + rm -fr $OFILES |