changeset 36655:68026dd7c4f9

cext: accept arguments as Py_buffer The s*/y* value formatters receive a Py_buffer instead of a char *. This value format is more flexible in the types that it allows. We change bdiff() to accept any object that conforms to the buffer protocol. We validate the buffers are contiguous and have a single dimension. This allows memoryview instances to be handled by the function, so we revert a recent change to cast arguments to bytes before calling this function. Differential Revision: https://phab.mercurial-scm.org/D2587
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 03 Mar 2018 11:26:30 -0500
parents b864f4536ca8
children 5c4c9eb1feb6
files mercurial/cext/bdiff.c mercurial/mdiff.py
diffstat 2 files changed, 25 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/cext/bdiff.c	Sat Mar 03 11:19:43 2018 -0500
+++ b/mercurial/cext/bdiff.c	Sat Mar 03 11:26:30 2018 -0500
@@ -60,7 +60,8 @@
 
 static PyObject *bdiff(PyObject *self, PyObject *args)
 {
-	char *sa, *sb, *rb, *ia, *ib;
+	Py_buffer ba, bb;
+	char *rb, *ia, *ib;
 	PyObject *result = NULL;
 	struct bdiff_line *al = NULL, *bl = NULL;
 	struct bdiff_hunk l, *h;
@@ -70,25 +71,39 @@
 
 	l.next = NULL;
 
-	if (!PyArg_ParseTuple(args, PY23("s#s#:bdiff", "y#y#:bdiff"), &sa, &la,
-	                      &sb, &lb))
+	if (!PyArg_ParseTuple(args, PY23("s*s*:bdiff", "y*y*:bdiff"), &ba, &bb))
 		return NULL;
 
+	if (!PyBuffer_IsContiguous(&ba, 'C') || ba.ndim > 1) {
+		PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
+		goto cleanup;
+	}
+
+	if (!PyBuffer_IsContiguous(&bb, 'C') || bb.ndim > 1) {
+		PyErr_SetString(PyExc_ValueError, "bdiff input not contiguous");
+		goto cleanup;
+	}
+
+	la = ba.len;
+	lb = bb.len;
+
 	if (la > UINT_MAX || lb > UINT_MAX) {
 		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
-		return NULL;
+		goto cleanup;
 	}
 
 	_save = PyEval_SaveThread();
 
 	lmax = la > lb ? lb : la;
-	for (ia = sa, ib = sb; li < lmax && *ia == *ib; ++li, ++ia, ++ib)
+	for (ia = ba.buf, ib = bb.buf; li < lmax && *ia == *ib;
+	     ++li, ++ia, ++ib) {
 		if (*ia == '\n')
 			lcommon = li + 1;
+	}
 	/* we can almost add: if (li == lmax) lcommon = li; */
 
-	an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
-	bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
+	an = bdiff_splitlines(ba.buf + lcommon, la - lcommon, &al);
+	bn = bdiff_splitlines(bb.buf + lcommon, lb - lcommon, &bl);
 	if (!al || !bl) {
 		PyErr_NoMemory();
 		goto cleanup;
@@ -137,6 +152,8 @@
 cleanup:
 	if (_save)
 		PyEval_RestoreThread(_save);
+	PyBuffer_Release(&ba);
+	PyBuffer_Release(&bb);
 	if (al) {
 		free(al);
 	}
--- a/mercurial/mdiff.py	Sat Mar 03 11:19:43 2018 -0500
+++ b/mercurial/mdiff.py	Sat Mar 03 11:26:30 2018 -0500
@@ -30,17 +30,9 @@
 fixws = bdiff.fixws
 patches = mpatch.patches
 patchedsize = mpatch.patchedsize
-_textdiff = bdiff.bdiff
+textdiff = bdiff.bdiff
 splitnewlines = bdiff.splitnewlines
 
-# On Python 3, util.buffer() creates a memoryview, which appears not
-# supporting the buffer protocol
-if pycompat.ispy3:
-    def textdiff(a, b):
-        return _textdiff(bytes(a), bytes(b))
-else:
-    textdiff = _textdiff
-
 class diffopts(object):
     '''context is the number of context lines
     text treats all files as text