kirr has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmocom-bb/+/40055?usp=email )
Change subject: trx_toolkit/udp_link: Optimize UDPLink.send ......................................................................
trx_toolkit/udp_link: Optimize UDPLink.send
As can be seen from http://navytux.spb.ru/~kirr/osmo/fake_trx/pyx-base.html (UDPLink_11send) UDPLink.send spends ~ 30% of its time in python overhead, with doing only ~ 70% of the time in the sendto syscall. As fake_trx invokes send a lot it makes sense to cut that overhead.
-> Do that:
- prepare destination address in parsed form only once instead of parsing it in py runtime at every call - avoid doing PyGetBuffer... dance - just support bytes|bytearray|str for data and use fast CAPI macros to retrieve underlying data pointer - invoke sendto syscall directly ourselves - do not release/reacquire GIL to avoid corresponding performance penalty. As Iaa675c95059ec8ccfad667f69984d5a7f608c249 (trx_toolkit/clck_gen: Don't use threads because Python GIL is latency killer) shown GIL-related functions can be little in the profile, but harm latency a lot. We can skip doing release/reacquire GIL because we know UDPLink's socket is non-blocking.
Change-Id: I83204545066a925dadbcd0b72cbbc2e3407129fe --- M src/target/trx_toolkit/udp_link.pxd M src/target/trx_toolkit/udp_link.pyx 2 files changed, 64 insertions(+), 9 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmocom-bb refs/changes/55/40055/1
diff --git a/src/target/trx_toolkit/udp_link.pxd b/src/target/trx_toolkit/udp_link.pxd index 2d02497..d50fe5f 100644 --- a/src/target/trx_toolkit/udp_link.pxd +++ b/src/target/trx_toolkit/udp_link.pxd @@ -1,6 +1,21 @@ # cython: language_level=3
+from libc.stdint cimport uint16_t, uint32_t + +cdef extern from "<arpa/inet.h>": + struct sockaddr: + pass + struct in_addr: + uint32_t s_addr + struct sockaddr_in: + int sin_family + uint16_t sin_port + in_addr sin_addr + cdef class UDPLink: cdef readonly object sock - cdef str remote_addr - cdef int remote_port + cdef int sock_fd + cdef sockaddr_in remote_addr + + +cdef _raise_oserr() diff --git a/src/target/trx_toolkit/udp_link.pyx b/src/target/trx_toolkit/udp_link.pyx index ae90021..d75e058 100644 --- a/src/target/trx_toolkit/udp_link.pyx +++ b/src/target/trx_toolkit/udp_link.pyx @@ -20,38 +20,78 @@
import socket
+from cpython cimport PyBytes_AS_STRING, PyBytes_GET_SIZE, PyUnicode_AsUTF8AndSize +from cpython.bytearray cimport PyByteArray_FromStringAndSize, PyByteArray_AS_STRING, PyByteArray_GET_SIZE + +from libc.errno cimport errno +from libc.string cimport strerror + +cdef extern from "<arpa/inet.h>": + int inet_pton(int af, const char *src, void *dst) + char *inet_ntoa(in_addr) + uint16_t htons(uint16_t) + +cdef extern from "<sys/socket.h>": + ssize_t sendto(int fd, const void *buf, size_t len, int flags, const sockaddr *dst_addr, int addrlen) + + cdef class UDPLink: def __init__(self, str remote_addr, int remote_port, bind_addr = '0.0.0.0', bind_port = 0): + # Save remote address in parsed form + self.remote_addr.sin_family = socket.AF_INET + self.remote_addr.sin_port = htons(remote_port) + err = inet_pton(socket.AF_INET, remote_addr.encode(), &self.remote_addr.sin_addr) + if err <= 0: + _raise_oserr() + self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.sock.bind((bind_addr, bind_port)) self.sock.setblocking(False)
- # Save remote info - self.remote_addr = remote_addr - self.remote_port = remote_port + self.sock_fd = self.sock.fileno()
def __del__(self): self.sock.close() + self.sock_fd = -1
def desc_local(self): (bind_addr, bind_port) = self.sock.getsockname() return "%s:%u" % (bind_addr, bind_port)
def desc_remote(self): - return "%s:%u" % (self.remote_addr, self.remote_port) + return "%s:%u" % (inet_ntoa(self.remote_addr.sin_addr), self.remote_addr.sin_port)
def desc_link(self): return "L:%s <-> R:%s" % (self.desc_local(), self.desc_remote())
def send(self, data): - if type(data) not in [bytearray, bytes]: - data = data.encode() + cdef const char *buf + cdef Py_ssize_t buflen + if type(data) is bytearray: + buf = PyByteArray_AS_STRING(data) + buflen = PyByteArray_GET_SIZE(data) + elif type(data) is bytes: + buf = PyBytes_AS_STRING(data) + buflen = PyBytes_GET_SIZE(data) + elif type(data) is str: + buf = PyUnicode_AsUTF8AndSize(data, &buflen) + else: + raise TypeError("send: accept only bytes|bytearray|str ; got %r" % type(data))
- self.sock.sendto(data, (self.remote_addr, self.remote_port)) + # NOTE we do not release/reacquire gil to save us from gil ping-pong performance penalty + # we can do that because the socket is non-blocking + n = sendto(self.sock_fd, buf, buflen, + 0, <sockaddr*>&self.remote_addr, sizeof(self.remote_addr)) + if n == -1: + _raise_oserr()
def sendto(self, data, remote): if type(data) not in [bytearray, bytes]: data = data.encode()
self.sock.sendto(data, remote) + + +cdef _raise_oserr(): + raise OSError(errno, strerror(errno))