diff --git a/docs/build.rst b/docs/build.rst index dc209c21f..8d190ab71 100644 --- a/docs/build.rst +++ b/docs/build.rst @@ -85,7 +85,7 @@ Run-time dependencies: * ``zlib`` * ``libpng`` * ``liblcms2`` -* ``librsync`` +* ``libxxhash`` * ``openssl`` * ``freetype`` (not needed on macOS) * ``fontconfig`` (not needed on macOS) @@ -114,7 +114,7 @@ Build-time dependencies: - ``liblcms2-dev`` - ``libssl-dev`` - ``libpython3-dev`` - - ``librsync-dev`` + - ``libxxhash-dev`` Build and run from source with Nix diff --git a/kittens/transfer/algorithm.c b/kittens/transfer/algorithm.c new file mode 100644 index 000000000..d5a95654e --- /dev/null +++ b/kittens/transfer/algorithm.c @@ -0,0 +1,276 @@ +/* + * algorithm.c + * Copyright (C) 2023 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#include "data-types.h" +#define XXH_INLINE_ALL +#include + +static PyObject *RsyncError = NULL; +typedef void*(*new_hash_t)(void); +typedef void(*delete_hash_t)(void*); +typedef bool(*reset_hash_t)(void*); +typedef bool(*update_hash_t)(void*, const void *input, size_t length); +typedef void(*digest_hash_t)(const void*, void *output); +typedef uint64_t(*digest_hash64_t)(const void*); + +typedef struct hasher_t { + size_t hash_size, block_size; + void *state; + new_hash_t new; + delete_hash_t delete; + reset_hash_t reset; + update_hash_t update; + digest_hash_t digest; + digest_hash64_t digest64; +} hasher_t; + +static void xxh64_delete(void* s) { XXH3_freeState(s); } +static bool xxh64_reset(void* s) { return XXH3_64bits_reset(s) == XXH_OK; } +static void* xxh64_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh64_reset(ans); return ans; } +static bool xxh64_update(void* s, const void *input, size_t length) { return XXH3_64bits_update(s, input, length) == XXH_OK; } +static uint64_t xxh64_digest64(const void* s) { return XXH3_64bits_digest(s); } +static void xxh64_digest(const void* s, void *output) { + XXH64_hash_t ans = XXH3_64bits_digest(s); + XXH64_canonical_t c; + XXH64_canonicalFromHash(&c, ans); + memcpy(output, c.digest, sizeof(c.digest)); +} + +static hasher_t +xxh64_hasher(void) { + hasher_t ans = { + .hash_size=sizeof(XXH64_hash_t), .block_size = 64, + .new=xxh64_create, .delete=xxh64_delete, .reset=xxh64_reset, .update=xxh64_update, .digest=xxh64_digest, .digest64=xxh64_digest64 + }; + return ans; +} + +static bool xxh128_reset(void* s) { return XXH3_128bits_reset(s) == XXH_OK; } +static void* xxh128_create(void) { void *ans = XXH3_createState(); if (ans != NULL) xxh128_reset(ans); return ans; } +static bool xxh128_update(void* s, const void *input, size_t length) { return XXH3_128bits_update(s, input, length) == XXH_OK; } +static void xxh128_digest(const void* s, void *output) { + XXH128_hash_t ans = XXH3_128bits_digest(s); + XXH128_canonical_t c; + XXH128_canonicalFromHash(&c, ans); + memcpy(output, c.digest, sizeof(c.digest)); +} + +static hasher_t +xxh128_hasher(void) { + hasher_t ans = { + .hash_size=sizeof(XXH128_hash_t), .block_size = 64, + .new=xxh128_create, .delete=xxh64_delete, .reset=xxh128_reset, .update=xxh128_update, .digest=xxh128_digest, + }; + return ans; +} + + +typedef hasher_t(*hasher_constructor_t)(void); + +typedef struct Rsync { + size_t block_size; + + hasher_constructor_t hasher_constructor, checksummer_constructor; + hasher_t hasher, checksummer; + + void *buffer; size_t buffer_cap, buffer_sz; +} Rsync; + +static void +free_rsync(Rsync* r) { + if (r->hasher.state) { r->hasher.delete(r->hasher.state); r->hasher.state = NULL; } + if (r->checksummer.state) { r->checksummer.delete(r->checksummer.state); r->checksummer.state = NULL; } + if (r->buffer) { free(r->buffer); r->buffer = NULL; } + free(r); +} + +static Rsync* +new_rsync(size_t block_size, int strong_hash_type, int checksum_type) { + Rsync *ans = calloc(1, sizeof(Rsync)); + if (ans != NULL) { + ans->block_size = block_size; + if (strong_hash_type == 0) ans->hasher_constructor = xxh64_hasher; + if (checksum_type == 0) ans->checksummer_constructor = xxh128_hasher; + if (ans->hasher_constructor == NULL) { free_rsync(ans); return NULL; } + if (ans->checksummer_constructor == NULL) { free_rsync(ans); return NULL; } + ans->hasher = ans->hasher_constructor(); + ans->checksummer = ans->checksummer_constructor(); + ans->buffer = malloc(block_size); + if (ans->buffer == NULL) { free(ans); return NULL; } + ans->buffer_cap = block_size; + } + return ans; +} + +typedef struct rolling_checksum { + uint32_t alpha, beta, val, l, first_byte_of_previous_window; +} rolling_checksum; + +static const uint32_t _M = (1 << 16); + +static uint32_t +rolling_checksum_full(rolling_checksum *self, uint8_t *data, uint32_t len) { + uint32_t alpha = 0, beta = 0; + self->l = len; + for (uint32_t i = 0; i < len; i++) { + alpha += data[i]; + beta += (self->l - i) * data[i]; + } + self->first_byte_of_previous_window = data[0]; + self->alpha = alpha % _M; + self->beta = beta % _M; + self->val = self->alpha + _M*self->beta; + return self->val; +} + +static void +rolling_checksum_add_one_byte(rolling_checksum *self, uint8_t first_byte, uint8_t last_byte) { + self->alpha = (self->alpha - self->first_byte_of_previous_window + last_byte) % _M; + self->beta = (self->beta - (self->l)*self->first_byte_of_previous_window + self->alpha) % _M; + self->val = self->alpha + _M*self->beta; + self->first_byte_of_previous_window = first_byte; +} + +// Python interface {{{ + +typedef struct { + PyObject_HEAD + hasher_t h; + const char *name; +} Hasher; + +static int +Hasher_init(PyObject *s, PyObject *args, PyObject *kwds) { + Hasher *self = (Hasher*)s; + static char *kwlist[] = {"which", "data", NULL}; + const char *which = "xxh3-64"; + FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|sy*", kwlist, &which, &data)) return -1; + if (strcmp(which, "xxh3-64") == 0) { + self->h = xxh64_hasher(); + self->name = "xxh3-64"; + } else if (strcmp(which, "xxh3-128") == 0) { + self->h = xxh128_hasher(); + self->name = "xxh3-128"; + } else { + PyErr_Format(PyExc_KeyError, "Unknown hash type: %s", which); + return -1; + } + self->h.state = self->h.new(); + if (self->h.state == NULL) { PyErr_NoMemory(); return -1; } + if (data.buf && data.len > 0) { + self->h.update(self->h.state, data.buf, data.len); + } + return 0; +} + +static void +Hasher_dealloc(PyObject *self) { + Hasher *h = (Hasher*)self; + if (h->h.state) { h->h.delete(h->h.state); h->h.state = NULL; } + Py_TYPE(self)->tp_free(self); +} + +static PyObject* +update(Hasher *self, PyObject *o) { + FREE_BUFFER_AFTER_FUNCTION Py_buffer data = {0}; + if (PyObject_GetBuffer(o, &data, PyBUF_SIMPLE) == -1) return NULL; + if (data.buf && data.len > 0) { + self->h.update(self->h.state, data.buf, data.len); + } + Py_RETURN_NONE; +} + +static PyObject* +digest(Hasher *self) { + PyObject *ans = PyBytes_FromStringAndSize(NULL, self->h.hash_size); + if (ans) self->h.digest(self->h.state, PyBytes_AS_STRING(ans)); + return ans; +} + +static PyObject* +hexdigest(Hasher *self) { + uint8_t digest[64]; char hexdigest[128]; + self->h.digest(self->h.state, digest); + static const char * hex = "0123456789abcdef"; + char *pout = hexdigest; const uint8_t *pin = digest; + for (; pin < digest + self->h.hash_size; pin++) { + *pout++ = hex[(*pin>>4) & 0xF]; + *pout++ = hex[ *pin & 0xF]; + } + return PyUnicode_FromStringAndSize(hexdigest, self->h.hash_size * 2); +} + + +static PyObject* +Hasher_digest_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.hash_size); } +static PyObject* +Hasher_block_size(Hasher* self, void* closure UNUSED) { return PyLong_FromSize_t(self->h.block_size); } +static PyObject* +Hasher_name(Hasher* self, void* closure UNUSED) { return PyUnicode_FromString(self->name); } + +static PyMethodDef Hasher_methods[] = { + METHODB(update, METH_O), + METHODB(digest, METH_NOARGS), + METHODB(hexdigest, METH_NOARGS), + {NULL} /* Sentinel */ +}; + +PyGetSetDef Hasher_getsets[] = { + {"digest_size", (getter)Hasher_digest_size, NULL, NULL, NULL}, + {"block_size", (getter)Hasher_block_size, NULL, NULL, NULL}, + {"name", (getter)Hasher_name, NULL, NULL, NULL}, + {NULL} +}; + + +PyTypeObject Hasher_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "rsync.Hasher", + .tp_basicsize = sizeof(Hasher), + .tp_dealloc = Hasher_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = "Hasher", + .tp_methods = Hasher_methods, + .tp_new = PyType_GenericNew, + .tp_init = Hasher_init, + .tp_getset = Hasher_getsets, +}; + +static PyMethodDef module_methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static int +exec_module(PyObject *m) { + RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL); + if (RsyncError == NULL) return -1; + PyModule_AddObject(m, "RsyncError", RsyncError); + if (PyType_Ready(&Hasher_Type) < 0) return -1; + Py_INCREF(&Hasher_Type); + if (PyModule_AddObject(m, "Hasher", (PyObject *) &Hasher_Type) < 0) return -1; + + return 0; +} + +IGNORE_PEDANTIC_WARNINGS +static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} }; +END_IGNORE_PEDANTIC_WARNINGS + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "rsync", /* name of module */ + .m_doc = NULL, + .m_slots = slots, + .m_methods = module_methods +}; + +EXPORTED PyMODINIT_FUNC +PyInit_rsync(void) { + return PyModuleDef_Init(&module); +} +// }}} diff --git a/kittens/transfer/librsync.py b/kittens/transfer/librsync.py deleted file mode 100644 index 0f537970d..000000000 --- a/kittens/transfer/librsync.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python -# License: GPLv3 Copyright: 2021, Kovid Goyal - -import os -import tempfile -from typing import IO, TYPE_CHECKING, Iterator, Union - -from .rsync import IO_BUFFER_SIZE, RsyncError, begin_create_delta, begin_create_signature, begin_load_signature, begin_patch, build_hash_table, iter_job - -if TYPE_CHECKING: - from .rsync import JobCapsule, SignatureCapsule - - -class StreamingJob: - - expected_input_size = IO_BUFFER_SIZE - - def __init__(self, job: 'JobCapsule', output_buf_size: int = IO_BUFFER_SIZE): - self.job = job - self.finished = False - self.calls_with_no_data = 0 - self.output_buf = bytearray(output_buf_size) - self.uncomsumed_data = b'' - - def __call__(self, input_data: Union[memoryview, bytes] = b'') -> Iterator[memoryview]: - if self.finished: - if input_data: - raise RsyncError('There was too much input data') - return memoryview(self.output_buf)[:0] - if self.uncomsumed_data: - input_data = self.uncomsumed_data + bytes(input_data) - self.uncomsumed_data = b'' - while True: - self.finished, sz_of_unused_input, output_size = iter_job(self.job, input_data, self.output_buf) - if output_size: - yield memoryview(self.output_buf)[:output_size] - consumed_some_input = sz_of_unused_input < len(input_data) - produced_some_output = output_size > 0 - if self.finished or (not sz_of_unused_input and len(input_data)) or (not consumed_some_input and not produced_some_output): - break - input_data = memoryview(input_data)[-sz_of_unused_input:] - if sz_of_unused_input: - self.uncomsumed_data = bytes(input_data[-sz_of_unused_input:]) - - def get_remaining_output(self) -> Iterator[memoryview]: - if not self.finished: - yield from self() - if not self.finished: - raise RsyncError('Insufficient input data') - if self.uncomsumed_data: - raise RsyncError(f'{len(self.uncomsumed_data)} bytes of unconsumed input data') - - -def drive_job_on_file(f: IO[bytes], job: 'JobCapsule', input_buf_size: int = IO_BUFFER_SIZE, output_buf_size: int = IO_BUFFER_SIZE) -> Iterator[memoryview]: - sj = StreamingJob(job, output_buf_size=output_buf_size) - input_buf = bytearray(input_buf_size) - while not sj.finished: - sz = f.readinto(input_buf) # type: ignore - if not sz: - del input_buf - yield from sj.get_remaining_output() - break - yield from sj(memoryview(input_buf)[:sz]) - - -def signature_of_file(path: str) -> Iterator[memoryview]: - with open(path, 'rb') as f: - f.seek(0, os.SEEK_END) - fsz = f.tell() - job, block_len, strong_len = begin_create_signature(fsz) - strong_len = max(0, strong_len) - f.seek(0) - # see whole.c in librsync source for size calculations - yield from drive_job_on_file(f, job, input_buf_size=4 * block_len, output_buf_size=12 + 4 * (4 + (strong_len or IO_BUFFER_SIZE))) - - -class LoadSignature(StreamingJob): - - # see whole.c in librsync source for size calculations - expected_input_size = 16 * 1024 - autocommit = True - - def __init__(self) -> None: - job, self.signature = begin_load_signature() - super().__init__(job, output_buf_size=0) - - def add_chunk(self, chunk: bytes) -> None: - for ignored in self(chunk): - pass - - def commit(self) -> None: - for ignored in self.get_remaining_output(): - pass - build_hash_table(self.signature) - - -def delta_for_file(path: str, sig: 'SignatureCapsule') -> Iterator[memoryview]: - job = begin_create_delta(sig) - with open(path, 'rb') as f: - # see whole.c in librsync source for size calculations - yield from drive_job_on_file(f, job, input_buf_size=8 * IO_BUFFER_SIZE, output_buf_size=4 * IO_BUFFER_SIZE) - - -class PatchFile(StreamingJob): - - # see whole.c in librsync source for size calculations - expected_input_size = IO_BUFFER_SIZE - - def __init__(self, src_path: str, output_path: str = ''): - self.overwrite_src = not output_path - self.src_file = open(src_path, 'rb') - if self.overwrite_src: - self.dest_file: IO[bytes] = tempfile.NamedTemporaryFile(mode='wb', dir=os.path.dirname(os.path.abspath(os.path.realpath(src_path))), delete=False) - else: - self.dest_file = open(output_path, 'wb') - job = begin_patch(self.read_from_src) - super().__init__(job, output_buf_size=4 * IO_BUFFER_SIZE) - - def tell(self) -> int: - if self.dest_file.closed: - return os.path.getsize(self.src_file.name if self.overwrite_src else self.dest_file.name) - return self.dest_file.tell() - - def read_from_src(self, b: memoryview, pos: int) -> int: - self.src_file.seek(pos) - return self.src_file.readinto(b) - - def close(self) -> None: - if not self.src_file.closed: - self.get_remaining_output() - self.src_file.close() - count = 100 - while not self.finished and count > 0: - self() - count -= 1 - self.dest_file.close() - if self.overwrite_src: - os.replace(self.dest_file.name, self.src_file.name) - - def write(self, data: bytes) -> None: - for output in self(data): - self.dest_file.write(output) - - def __enter__(self) -> 'PatchFile': - return self - - def __exit__(self, *a: object) -> None: - self.close() - - -def develop() -> None: - import sys - src = sys.argv[-1] - sig_loader = LoadSignature() - with open(f'{src}.sig', 'wb') as f: - for chunk in signature_of_file(src): - sig_loader.add_chunk(chunk) - f.write(chunk) - sig_loader.commit() - with open(f'{src}.delta', 'wb') as f, PatchFile(src, f'{src}.output') as patcher: - for chunk in delta_for_file(src, sig_loader.signature): - f.write(chunk) - patcher.write(chunk) diff --git a/kittens/transfer/rsync.c b/kittens/transfer/rsync.c deleted file mode 100644 index c90b26a98..000000000 --- a/kittens/transfer/rsync.c +++ /dev/null @@ -1,251 +0,0 @@ -//go:build exclude -/* - * rsync.c - * Copyright (C) 2021 Kovid Goyal - * - * Distributed under terms of the GPL3 license. - */ - -#include "data-types.h" -#include - -#define SIGNATURE_CAPSULE "rs_signature_t" -#define JOB_WITH_CALLBACK_CAPSULE "rs_callback_job_t" -// See whole.c in the librsync source code for estimating IO_BUFFER_SIZE -#define IO_BUFFER_SIZE (64u * 1024u) -static PyObject *RsyncError = NULL; - -static void -free_job_with_callback_capsule(PyObject *capsule) { - if (PyCapsule_IsValid(capsule, JOB_WITH_CALLBACK_CAPSULE)) { - void *job = PyCapsule_GetPointer(capsule, JOB_WITH_CALLBACK_CAPSULE); - if (job && job != RsyncError) rs_job_free(job); - PyObject *callback = PyCapsule_GetContext(capsule); - Py_CLEAR(callback); - } -} - -static void -free_sig_capsule(PyObject *capsule) { - rs_signature_t *sig = PyCapsule_GetPointer(capsule, SIGNATURE_CAPSULE); - if (sig) rs_free_sumset(sig); -} - -#define CREATE_JOB(func, cb, ...) \ - PyObject *job_capsule = PyCapsule_New(RsyncError, JOB_WITH_CALLBACK_CAPSULE, free_job_with_callback_capsule); \ - if (job_capsule) { \ - rs_job_t *job = func(__VA_ARGS__); \ - if (job) { \ - if (PyCapsule_SetPointer(job_capsule, job) == 0) { \ - if (cb) { \ - if (PyCapsule_SetContext(job_capsule, cb) == 0) { Py_INCREF(cb); } \ - else { Py_CLEAR(job_capsule); } \ - } \ - } else { \ - rs_job_free(job); Py_CLEAR(job_capsule); \ - } \ - } else { \ - Py_CLEAR(job_capsule); \ - } \ - } - - -static PyObject* -begin_create_signature(PyObject *self UNUSED, PyObject *args) { - long long file_size = -1; - long sl = 0; - if (!PyArg_ParseTuple(args, "|Ll", &file_size, &sl)) return NULL; - rs_magic_number magic_number = 0; - size_t block_len = 0, strong_len = sl; -#ifdef KITTY_HAS_RS_SIG_ARGS - rs_result res = rs_sig_args(file_size, &magic_number, &block_len, &strong_len); - if (res != RS_DONE) { - PyErr_SetString(PyExc_ValueError, rs_strerror(res)); - return NULL; - } -#else - block_len = RS_DEFAULT_BLOCK_LEN; - strong_len = 8; - magic_number = RS_MD4_SIG_MAGIC; -#endif - CREATE_JOB(rs_sig_begin, NULL, block_len, strong_len, magic_number); - return Py_BuildValue("Nnn", job_capsule, (Py_ssize_t)block_len, (Py_ssize_t)strong_len); -} - -#define GET_JOB_FROM_CAPSULE \ - rs_job_t *job = PyCapsule_GetPointer(job_capsule, JOB_WITH_CALLBACK_CAPSULE); \ - if (!job) { PyErr_SetString(PyExc_TypeError, "Not a job capsule"); return NULL; } \ - - -static PyObject* -iter_job(PyObject *self UNUSED, PyObject *args) { - FREE_BUFFER_AFTER_FUNCTION Py_buffer input_buf = {0}; - FREE_BUFFER_AFTER_FUNCTION Py_buffer output_buf = {0}; - PyObject *job_capsule, *output_array; - if (!PyArg_ParseTuple(args, "O!y*O!", &PyCapsule_Type, &job_capsule, &input_buf, &PyByteArray_Type, &output_array)) return NULL; - GET_JOB_FROM_CAPSULE; - if (PyObject_GetBuffer(output_array, &output_buf, PyBUF_WRITE) != 0) return NULL; - int eof = input_buf.len > 0 ? 0 : 1; - rs_buffers_t buffer = { - .avail_in=input_buf.len, .next_in=input_buf.buf, .eof_in=eof, - .avail_out=output_buf.len, .next_out=output_buf.buf - }; - size_t before = buffer.avail_out; - rs_result result = rs_job_iter(job, &buffer); - Py_ssize_t output_size = before - buffer.avail_out; - if (result == RS_DONE || result == RS_BLOCKED) { - Py_ssize_t unused_input = buffer.avail_in; - return Py_BuildValue("Onn", result == RS_DONE ? Py_True : Py_False, unused_input, output_size); - } - PyErr_SetString(RsyncError, rs_strerror(result)); - return NULL; -} - -static PyObject* -begin_load_signature(PyObject *self UNUSED, PyObject *args UNUSED) { - rs_signature_t *sig = NULL; - CREATE_JOB(rs_loadsig_begin, NULL, &sig); - if (!job_capsule) { rs_free_sumset(sig); return NULL; } - PyObject *sc = PyCapsule_New(sig, SIGNATURE_CAPSULE, free_sig_capsule); - if (!sc) { Py_CLEAR(job_capsule); rs_free_sumset(sig); return NULL; } - return Py_BuildValue("NN", job_capsule, sc); -} - -#define GET_SIG_FROM_CAPSULE \ - rs_signature_t *sig = PyCapsule_GetPointer(sig_capsule, SIGNATURE_CAPSULE); \ - if (!sig) { PyErr_SetString(PyExc_TypeError, "Not a sig capsule"); return NULL; } - - -static PyObject* -build_hash_table(PyObject *self UNUSED, PyObject *args) { - PyObject *sig_capsule; - if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL; - GET_SIG_FROM_CAPSULE; - rs_result res = rs_build_hash_table(sig); - if (res != RS_DONE) { - PyErr_SetString(RsyncError, rs_strerror(res)); - return NULL; - } - Py_RETURN_NONE; -} - -static PyObject* -begin_create_delta(PyObject *self UNUSED, PyObject *args) { - PyObject *sig_capsule; - if (!PyArg_ParseTuple(args, "O!", &PyCapsule_Type, &sig_capsule)) return NULL; - GET_SIG_FROM_CAPSULE; - CREATE_JOB(rs_delta_begin, NULL, sig); - return job_capsule; -} - -static rs_result -copy_callback(void *opaque, rs_long_t pos, size_t *len, void **buf) { - PyObject *callback = opaque; - long long p = pos; - PyObject *mem = PyMemoryView_FromMemory(*buf, *len, PyBUF_WRITE); - if (!mem) { PyErr_Clear(); return RS_MEM_ERROR; } - PyObject *res = PyObject_CallFunction(callback, "OL", mem, p); - Py_DECREF(mem); - if (res == NULL) { PyErr_Print(); return RS_IO_ERROR; } - rs_result r = RS_DONE; - if (PyLong_Check(res)) { *len = PyLong_AsSize_t(res); } - else { r = RS_INTERNAL_ERROR; } - Py_DECREF(res); - return r; -} - -static PyObject* -begin_patch(PyObject *self UNUSED, PyObject *callback) { - if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be a callable"); return NULL; } - CREATE_JOB(rs_patch_begin, callback, copy_callback, callback); - return job_capsule; -} - -static bool -call_ftc_callback(PyObject *callback, char *src, Py_ssize_t key_start, Py_ssize_t key_length, Py_ssize_t val_start, Py_ssize_t val_length) { - while(src[key_start] == ';' && key_length > 0 ) { key_start++; key_length--; } - DECREF_AFTER_FUNCTION PyObject *k = PyMemoryView_FromMemory(src + key_start, key_length, PyBUF_READ); - if (!k) return false; - DECREF_AFTER_FUNCTION PyObject *v = PyMemoryView_FromMemory(src + val_start, val_length, PyBUF_READ); - if (!v) return false; - DECREF_AFTER_FUNCTION PyObject *ret = PyObject_CallFunctionObjArgs(callback, k, v, NULL); - return ret != NULL; -} - -static PyObject* -decode_utf8_buffer(PyObject *self UNUSED, PyObject *args) { - FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0}; - if (!PyArg_ParseTuple(args, "s*", &buf)) return NULL; - return PyUnicode_FromStringAndSize(buf.buf, buf.len); -} - -static PyObject* -parse_ftc(PyObject *self UNUSED, PyObject *args) { - FREE_BUFFER_AFTER_FUNCTION Py_buffer buf = {0}; - PyObject *callback; - size_t i = 0, key_start = 0, key_length = 0, val_start = 0, val_length = 0; - if (!PyArg_ParseTuple(args, "s*O", &buf, &callback)) return NULL; - char *src = buf.buf; - size_t sz = buf.len; - if (!PyCallable_Check(callback)) { PyErr_SetString(PyExc_TypeError, "callback must be callable"); return NULL; } - for (i = 0; i < sz; i++) { - char ch = src[i]; - if (key_length == 0) { - if (ch == '=') { - key_length = i - key_start; - val_start = i + 1; - } - } else { - if (ch == ';') { - val_length = i - val_start; - if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL; - key_length = 0; key_start = i + 1; val_start = 0; - } - } - } - if (key_length && val_start) { - val_length = sz - val_start; - if (!call_ftc_callback(callback, src, key_start, key_length, val_start, val_length)) return NULL; - } - Py_RETURN_NONE; -} - -static PyMethodDef module_methods[] = { - {"begin_create_signature", begin_create_signature, METH_VARARGS, ""}, - {"begin_load_signature", begin_load_signature, METH_NOARGS, ""}, - {"build_hash_table", build_hash_table, METH_VARARGS, ""}, - {"begin_patch", begin_patch, METH_O, ""}, - {"begin_create_delta", begin_create_delta, METH_VARARGS, ""}, - {"iter_job", iter_job, METH_VARARGS, ""}, - {"parse_ftc", parse_ftc, METH_VARARGS, ""}, - {"decode_utf8_buffer", decode_utf8_buffer, METH_VARARGS, ""}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - -static int -exec_module(PyObject *m) { - RsyncError = PyErr_NewException("rsync.RsyncError", NULL, NULL); - if (RsyncError == NULL) return -1; - PyModule_AddObject(m, "RsyncError", RsyncError); - - - PyModule_AddIntMacro(m, IO_BUFFER_SIZE); - return 0; -} - -IGNORE_PEDANTIC_WARNINGS -static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} }; -END_IGNORE_PEDANTIC_WARNINGS - -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "rsync", /* name of module */ - .m_doc = NULL, - .m_slots = slots, - .m_methods = module_methods -}; - -EXPORTED PyMODINIT_FUNC -PyInit_rsync(void) { - return PyModuleDef_Init(&module); -} diff --git a/kittens/transfer/rsync.pyi b/kittens/transfer/rsync.pyi index fb17ddcb0..ba99280fc 100644 --- a/kittens/transfer/rsync.pyi +++ b/kittens/transfer/rsync.pyi @@ -1,47 +1,3 @@ -from typing import Callable, Tuple, Union - -IO_BUFFER_SIZE: int - - -class JobCapsule: - pass - - -class SignatureCapsule: - pass - class RsyncError(Exception): pass - - -def begin_create_signature(file_size: int = -1, strong_len: int = 0) -> Tuple[JobCapsule, int, int]: - pass - - -def begin_load_signature() -> Tuple[JobCapsule, SignatureCapsule]: - pass - - -def build_hash_table(sig: SignatureCapsule) -> None: - pass - - -def begin_create_delta(sig: SignatureCapsule) -> JobCapsule: - pass - - -def begin_patch(callback: Callable[[memoryview, int], int]) -> JobCapsule: - pass - - -def iter_job(job_capsule: JobCapsule, input_data: bytes, output_buf: bytearray) -> Tuple[bool, int, int]: - pass - - -def parse_ftc(src: Union[str, bytes, memoryview], callback: Callable[[memoryview, memoryview], None]) -> None: - pass - - -def decode_utf8_buffer(src: Union[str, bytes, memoryview]) -> str: - pass diff --git a/setup.py b/setup.py index 026abb5dd..062e7df2f 100755 --- a/setup.py +++ b/setup.py @@ -299,24 +299,6 @@ def set_arches(flags: List[str], arches: Iterable[str] = ('x86_64', 'arm64')) -> flags.extend(('-arch', arch)) -def detect_librsync(cc: List[str], cflags: List[str], ldflags: List[str]) -> str: - if not test_compile( - cc, *cflags, libraries=('rsync',), ldflags=ldflags, show_stderr=True, - src='#include \nint main(void) { rs_strerror(0); return 0; }'): - raise SystemExit('The librsync library is required') - # check for rs_sig_args() which was added to librsync in Apr 2020 version 2.3.0 - if test_compile(cc, *cflags, libraries=('rsync',), ldflags=ldflags, src=''' -#include -int main(void) { - rs_magic_number magic_number = 0; - size_t block_len = 0, strong_len = 0; - rs_sig_args(1024, &magic_number, &block_len, &strong_len); - return 0; -}'''): - return '-DKITTY_HAS_RS_SIG_ARGS' - return '' - - def is_gcc(cc: Iterable[str]) -> bool: @lru_cache() @@ -434,10 +416,6 @@ def init_env( cflags.insert(0, f'-I{os.environ["DEVELOP_ROOT"]}/include') ldpaths.insert(0, f'-L{os.environ["DEVELOP_ROOT"]}/lib') - rs_cflag = detect_librsync(cc, cflags, ldflags + ldpaths) - if rs_cflag: - cflags.append(rs_cflag) - if build_universal_binary: set_arches(cflags) set_arches(ldflags) @@ -830,7 +808,7 @@ def compile_kittens(args: Options) -> None: return kitten, sources, headers, f'kittens/{kitten}/{output}', includes, libraries for kitten, sources, all_headers, dest, includes, libraries in ( - files('transfer', 'rsync', libraries=('rsync',)), + files('transfer', 'rsync', libraries=('xxhash',)), ): final_env = kenv.copy() final_env.cflags.extend(f'-I{x}' for x in includes) diff --git a/tools/rsync/algorithm.go b/tools/rsync/algorithm.go index 6bc566292..5f5cba2a8 100644 --- a/tools/rsync/algorithm.go +++ b/tools/rsync/algorithm.go @@ -226,7 +226,7 @@ type signature_iterator struct { index uint64 } -// ans is valid only iff err == nil +// ans is valid iff err == nil func (self *signature_iterator) next() (ans BlockHash, err error) { n, err := io.ReadAtLeast(self.src, self.buffer, cap(self.buffer)) switch err {