-
Notifications
You must be signed in to change notification settings - Fork 37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
How to use ld_preload and ptrace to hook the program so that the program can use lwip API #19
Comments
To use LD_PRELOAD with https://github.com/yasukata/glue-lwip-dpdk-zpoline , please try the following by replacing the content of glue-lwip-dpdk-zpoline/zpoline/main.c#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <dis-asm.h>
#include <sched.h>
#include <dlfcn.h>
extern void syscall_addr(void);
extern long enter_syscall(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t);
extern void asm_syscall_hook(void);
void ____asm_impl(void)
{
/*
* enter_syscall triggers a kernel-space system call
*/
asm volatile (
".globl enter_syscall \n\t"
"enter_syscall: \n\t"
"movq %rdi, %rax \n\t"
"movq %rsi, %rdi \n\t"
"movq %rdx, %rsi \n\t"
"movq %rcx, %rdx \n\t"
"movq %r8, %r10 \n\t"
"movq %r9, %r8 \n\t"
"movq 8(%rsp),%r9 \n\t"
".globl syscall_addr \n\t"
"syscall_addr: \n\t"
"syscall \n\t"
"ret \n\t"
);
}
static long (*hook_fn)(int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6,
int64_t a7) = enter_syscall;
long syscall_hook(int64_t rdi, int64_t rsi,
int64_t rdx, int64_t __rcx __attribute__((unused)),
int64_t r8, int64_t r9,
int64_t r10_on_stack /* 4th arg for syscall */,
int64_t rax_on_stack,
int64_t retptr
__attribute__((unused))
)
{
return hook_fn(rax_on_stack, rdi, rsi, rdx, r10_on_stack, r8, r9);
}
static void load_hook_lib(void)
{
void *handle;
{
const char *filename;
filename = getenv("LIBZPHOOK");
if (!filename) {
printf("-- env LIBZPHOOK is empty, so skip to load a hook library\n");
return;
}
printf("-- load %s\n", filename);
handle = dlmopen(LM_ID_NEWLM, filename, RTLD_NOW | RTLD_LOCAL);
if (!handle) {
printf("\n");
printf("dlmopen failed: %s\n", dlerror());
printf("\n");
printf("NOTE: this may occur when the compilation of your hook function library misses some specifications in LDFLAGS. or if you are using a C++ compiler, dlmopen may fail to find a symbol, and adding 'extern \"C\"' to the definition may resolve the issue.\n");
exit(1);
}
}
{
int (*hook_init)(long, ...);
hook_init = dlsym(handle, "__hook_init");
assert(hook_init);
printf("-- call hook init\n");
assert(hook_init(0, &hook_fn) == 0);
}
}
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <pthread.h>
ssize_t read(int fd, void *buf, size_t count)
{
long ret = syscall_hook(fd, (int64_t) buf, count, 0, 0, 0, 0, __NR_read, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
ssize_t write(int fd, const void *buf, size_t count)
{
long ret = syscall_hook(fd, (int64_t) buf, count, 0, 0, 0, 0, __NR_write, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int close(int fd)
{
long ret = syscall_hook(fd, 0, 0, 0, 0, 0, 0, __NR_close, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int ioctl(int fd, unsigned long request, ...)
{
// FIXME
long ret = syscall_hook(fd, request, 0, 0, 0, 0, 0, __NR_ioctl, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
{
long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, 0, 0, 0, 0, __NR_accept, 0);
if (ret < 0) {
errno = -ret;
return -1;
} else
return ret;
}
int accept4(int sockfd, struct sockaddr *addr,
socklen_t *addrlen, int flags)
{
long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, flags, 0, 0, 0, __NR_accept4, 0);
if (ret < 0) {
errno = -ret;
return -1;
} else
return ret;
}
int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
long ret = syscall_hook(sockfd, (int64_t) addr, (int64_t) addrlen, 0, 0, 0, 0, __NR_bind, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int listen(int sockfd, int backlog)
{
long ret = syscall_hook(sockfd, backlog, 0, 0, 0, 0, 0, __NR_listen, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int getsockopt(int sockfd, int level, int optname, void *optval, socklen_t *optlen)
{
long ret = syscall_hook(sockfd, level, optname, 0, (int64_t) optlen, 0, (int64_t) optval, __NR_getsockopt, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int setsockopt(int sockfd, int level, int optname, const void *optval, socklen_t optlen)
{
long ret = syscall_hook(sockfd, level, optname, 0, (int64_t) optlen, 0, (int64_t) optval, __NR_setsockopt, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int fcntl(int fd, int cmd, ... /* arg */ )
{
// FIXME
long ret = syscall_hook(fd, cmd, 0, 0, 0, 0, 0, __NR_fcntl, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int socket(int domain, int type, int protocol)
{
long ret = syscall_hook(domain, type, protocol, 0, 0, 0, 0, __NR_socket, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int epoll_create(int size)
{
long ret = syscall_hook(size, 0, 0, 0, 0, 0, 0, __NR_epoll_create, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int epoll_create1(int flags)
{
long ret = syscall_hook(flags, 0, 0, 0, 0, 0, 0, __NR_epoll_create1, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
{
long ret = syscall_hook(epfd, (int64_t) events, maxevents, 0, 0, 0, timeout, __NR_epoll_wait, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
{
long ret = syscall_hook(epfd, op, fd, 0, 0, 0, (int64_t) event, __NR_epoll_ctl, 0);
if (ret < 0) {
errno = ret;
return -1;
} else
return ret;
}
__attribute__((constructor(0xffff))) static void __do_hook_init(void)
{
load_hook_lib();
} Supposedly, the program above can be compiled by
To use ptrace with https://github.com/yasukata/glue-lwip-dpdk-zpoline , please try the following by replacing the content of glue-lwip-dpdk-zpoline/main.c#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <stddef.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <wait.h>
#include <assert.h>
#include <syscall.h>
#include <sys/epoll.h>
#include <sys/uio.h>
#include <sys/user.h>
#include <sys/ptrace.h>
#include <arpa/inet.h>
#include <rte_common.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_bus_pci.h>
/* workaround to avoid conflicts between dpdk and lwip definitions */
#undef IP_DF
#undef IP_MF
#undef IP_RF
#undef IP_OFFMASK
#include <lwip/opt.h>
#include <lwip/init.h>
#include <lwip/pbuf.h>
#include <lwip/netif.h>
#include <lwip/etharp.h>
#include <lwip/tcpip.h>
#include <lwip/tcp.h>
#include <lwip/timeouts.h>
#include <lwip/prot/tcp.h>
#include <netif/ethernet.h>
static ssize_t copy_between_user(pid_t pid, void *uaddr, void *buf, size_t count, bool to)
{
ssize_t ret;
const struct iovec local_iov = {
.iov_base = buf,
.iov_len = count,
};
const struct iovec remote_iov = {
.iov_base = uaddr,
.iov_len = count,
};
if (to)
ret = process_vm_writev(pid, &local_iov, 1, &remote_iov, 1, 0);
else
ret = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
assert(ret >= 0);
return ret;
}
static pid_t pid;
static ssize_t copy_to_user(void *uaddr, void *buf, size_t count)
{
return copy_between_user(pid, uaddr, buf, count, true);
}
static ssize_t copy_from_user(void *uaddr, void *buf, size_t count)
{
return copy_between_user(pid, uaddr, buf, count, false);
}
typedef long (*syscall_fn_t)(long, long, long, long, long, long, long);
static syscall_fn_t next_sys_call = NULL;
#define MAX_PKT_BURST (32)
#define NUM_SLOT (256)
#define MEMPOOL_CACHE_SIZE (256)
#define PACKET_BUF_SIZE (1518)
static struct rte_mempool *pktmbuf_pool = NULL;
static int tx_idx = 0;
static struct rte_mbuf *tx_mbufs[MAX_PKT_BURST] = { 0 };
static void tx_flush(void)
{
int xmit = tx_idx, xmitted = 0;
while (xmitted != xmit)
xmitted += rte_eth_tx_burst(0 /* port id */, 0 /* queue id */, &tx_mbufs[xmitted], xmit - xmitted);
tx_idx = 0;
}
static err_t low_level_output(struct netif *netif __attribute__((unused)), struct pbuf *p)
{
char buf[PACKET_BUF_SIZE];
void *bufptr, *largebuf = NULL;
if (sizeof(buf) < p->tot_len) {
largebuf = (char *) malloc(p->tot_len);
assert(largebuf);
bufptr = largebuf;
} else
bufptr = buf;
pbuf_copy_partial(p, bufptr, p->tot_len, 0);
assert((tx_mbufs[tx_idx] = rte_pktmbuf_alloc(pktmbuf_pool)) != NULL);
assert(p->tot_len <= RTE_MBUF_DEFAULT_BUF_SIZE);
rte_memcpy(rte_pktmbuf_mtod(tx_mbufs[tx_idx], void *), bufptr, p->tot_len);
rte_pktmbuf_pkt_len(tx_mbufs[tx_idx]) = rte_pktmbuf_data_len(tx_mbufs[tx_idx]) = p->tot_len;
if (++tx_idx == MAX_PKT_BURST)
tx_flush();
if (largebuf)
free(largebuf);
return ERR_OK;
}
#define MAX_ACCEPT_FD (512)
#define MAX_RXPBUF (512)
#define MAX_FD (1024)
struct lwip_fd {
char used;
char close_posted;
unsigned short num_accept_fd;
int accept_fd[MAX_ACCEPT_FD];
size_t tmp_pbuf_off;
unsigned short num_rxpbuf;
struct pbuf *rxpbuf[MAX_RXPBUF];
struct tcp_pcb *tpcb;
int epfd;
};
struct lwip_fd lfd[MAX_FD] = { 0 };
#define MAX_EPOLL_FD (512)
struct epoll_fd {
char used;
int num_fd;
int fd[MAX_EPOLL_FD];
};
struct epoll_fd efd[MAX_FD] = { 0 };
static struct netif _netif = { 0 };
static int close_post_cnt = 0;
static int close_post_queue[MAX_FD] = { 0 };
static void dpdk_poll(void)
{
struct rte_mbuf *rx_mbufs[MAX_PKT_BURST];
unsigned short i, nb_rx = rte_eth_rx_burst(0 /* port id */, 0 /* queue id */, rx_mbufs, MAX_PKT_BURST);
for (i = 0; i < nb_rx; i++) {
{
struct pbuf *p;
assert((p = pbuf_alloc(PBUF_RAW, rte_pktmbuf_pkt_len(rx_mbufs[i]), PBUF_POOL)) != NULL);
pbuf_take(p, rte_pktmbuf_mtod(rx_mbufs[i], void *), rte_pktmbuf_pkt_len(rx_mbufs[i]));
p->len = p->tot_len = rte_pktmbuf_pkt_len(rx_mbufs[i]);
assert(_netif.input(p, &_netif) == ERR_OK);
}
rte_pktmbuf_free(rx_mbufs[i]);
}
tx_flush();
sys_check_timeouts();
}
static int lwip_syscall_close(int fd);
static void tcp_destroy_handeler(u8_t id __attribute__((unused)), void *data)
{
int fd = (int) ((uintptr_t) data);
{
unsigned short i;
for (i = 0; i < lfd[fd].num_rxpbuf; i++)
pbuf_free(lfd[fd].rxpbuf[i]);
}
{
unsigned short i;
for (i = 0; i < lfd[fd].num_accept_fd; i++)
lwip_syscall_close(lfd[fd].accept_fd[i]);
}
memset(&lfd[fd], 0, sizeof(lfd[fd]));
asm volatile ("" ::: "memory");
close(fd);
}
static const struct tcp_ext_arg_callbacks tcp_ext_arg_cbs = {
.destroy = tcp_destroy_handeler,
};
static void tcp_destroy_handeler_dummy(u8_t id __attribute__((unused)),
void *data __attribute__((unused)))
{
}
static const struct tcp_ext_arg_callbacks tcp_ext_arg_cbs_dummy = {
.destroy = tcp_destroy_handeler_dummy,
};
static err_t tcp_recv_handler(void *arg, struct tcp_pcb *tpcb,
struct pbuf *p, err_t err)
{
if (err != ERR_OK)
return err;
if (!p) {
tcp_close(tpcb);
return ERR_OK;
}
lfd[(int)((uintptr_t) arg)].rxpbuf[lfd[(int)((uintptr_t) arg)].num_rxpbuf++] = p;
return ERR_OK;
}
static err_t accept_handler(void *arg, struct tcp_pcb *tpcb, err_t err)
{
if (err != ERR_OK)
return err;
{
int newfd;
assert((newfd = open("/dev/null", O_RDONLY)) != -1);
lfd[newfd].used = 1;
lfd[newfd].tpcb = tpcb;
tcp_arg(tpcb, (void *)((uintptr_t) newfd));
lfd[(int)((uintptr_t) arg)].accept_fd[lfd[(int)((uintptr_t) arg)].num_accept_fd++] = newfd;
}
tcp_recv(tpcb, tcp_recv_handler);
tcp_setprio(tpcb, TCP_PRIO_MAX);
tpcb->so_options |= SOF_KEEPALIVE;
tpcb->keep_intvl = (60 * 1000);
tpcb->keep_idle = (60 * 1000);
tpcb->keep_cnt = 1;
return err;
}
static err_t if_init(struct netif *netif)
{
{
struct rte_ether_addr ports_eth_addr;
assert(rte_eth_macaddr_get(0 /* port id */, &ports_eth_addr) >= 0);
memcpy(netif->hwaddr, ports_eth_addr.addr_bytes, 6);
}
assert(rte_eth_dev_get_mtu(0 /* port id */, &netif->mtu) >= 0); assert(netif->mtu <= PACKET_BUF_SIZE);
netif->output = etharp_output;
netif->linkoutput = low_level_output;
netif->hwaddr_len = 6;
netif->flags = NETIF_FLAG_BROADCAST | NETIF_FLAG_ETHARP;
return ERR_OK;
}
static ssize_t lwip_syscall_read(int fd, char *buf, size_t count)
{
char _buf[0x2000];
if (count > sizeof(_buf))
count = sizeof(_buf);
if (!lfd[fd].num_rxpbuf) {
dpdk_poll();
if (!lfd[fd].num_rxpbuf)
return -EAGAIN;
}
{
unsigned short i; size_t c;
for (i = 0, c = 0; i < lfd[fd].num_rxpbuf && c < count; i++) {
struct pbuf *p = lfd[fd].rxpbuf[i];
size_t l = ((count - c) < (p->tot_len - lfd[fd].tmp_pbuf_off) ? (count - c) : (p->tot_len - lfd[fd].tmp_pbuf_off));
pbuf_copy_partial(p, &_buf[c], l, lfd[fd].tmp_pbuf_off);
c += l;
if (p->tot_len != l) {
assert(c == count);
lfd[fd].tmp_pbuf_off = l;
} else {
tcp_recved(lfd[fd].tpcb, p->tot_len);
pbuf_free(p);
}
}
memmove(&lfd[fd].rxpbuf[0],
&lfd[fd].rxpbuf[i - (lfd[fd].tmp_pbuf_off ? 1 : 0)],
(i - (lfd[fd].tmp_pbuf_off ? 1 : 0)) * sizeof(struct pbuf *));
lfd[fd].num_rxpbuf -= (i - (lfd[fd].tmp_pbuf_off ? 1 : 0));
copy_to_user(buf, _buf, c);
return c;
}
}
static ssize_t lwip_syscall_write(int fd, const char *buf, size_t count)
{
assert(tcp_sndbuf(lfd[fd].tpcb) >= count);
char _buf[0x2000];
assert(count < sizeof(_buf));
copy_from_user((void *) buf, _buf, count);
assert(tcp_write(lfd[fd].tpcb, _buf, count, TCP_WRITE_FLAG_COPY) == ERR_OK);
assert(tcp_output(lfd[fd].tpcb) == ERR_OK);
return count;
}
static int lwip_syscall_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
static int lwip_syscall_close(int fd)
{
if (!lfd[fd].close_posted) {
assert(!lwip_syscall_epoll_ctl(lfd[fd].epfd, EPOLL_CTL_DEL, fd, NULL));
close_post_queue[close_post_cnt++] = fd;
lfd[fd].close_posted = 1;
}
return 0;
}
static int lwip_syscall_socket(int domain, int type, int protocol)
{
if (domain == AF_INET
&& type == SOCK_STREAM
&& (protocol == 0 || protocol == IPPROTO_TCP)) {
int fd;
assert((fd = open("/dev/null", O_RDONLY)) != -1);
lfd[fd].used = 1;
assert((lfd[fd].tpcb = tcp_new()) != NULL);
tcp_arg(lfd[fd].tpcb, (void *)((uintptr_t) fd));
tcp_ext_arg_set_callbacks(lfd[fd].tpcb, 0, &tcp_ext_arg_cbs);
tcp_ext_arg_set(lfd[fd].tpcb, 0, (void *) ((uintptr_t) fd));
return fd;
} else
return next_sys_call(__NR_socket, domain, type, protocol, 0, 0, 0);
}
static int lwip_syscall_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
{
(void) addr;
(void) addrlen;
if (!lfd[sockfd].num_accept_fd) {
dpdk_poll();
if (!lfd[sockfd].num_accept_fd)
return -EAGAIN;
}
return lfd[sockfd].accept_fd[--lfd[sockfd].num_accept_fd];
}
static int lwip_syscall_bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
(void) addr;
(void) addrlen;
struct sockaddr_storage _addr;
copy_from_user((void *) addr, &_addr, addrlen);
assert(tcp_bind(lfd[sockfd].tpcb,
(const ip_addr_t *) &((const struct sockaddr_in *) &_addr)->sin_addr.s_addr,
ntohs(((const struct sockaddr_in *) &_addr)->sin_port)) == ERR_OK);
return 0;
}
static int lwip_syscall_listen(int sockfd, int backlog __attribute__((unused)))
{
tcp_ext_arg_set_callbacks(lfd[sockfd].tpcb, 0, &tcp_ext_arg_cbs_dummy);
assert((lfd[sockfd].tpcb = tcp_listen(lfd[sockfd].tpcb)) != NULL);
tcp_arg(lfd[sockfd].tpcb, (void *)((uintptr_t) sockfd));
tcp_ext_arg_set_callbacks(lfd[sockfd].tpcb, 0, &tcp_ext_arg_cbs);
tcp_accept(lfd[sockfd].tpcb, accept_handler);
return 0;
}
static int lwip_syscall_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event __attribute__((unused)))
{
switch (op) {
case EPOLL_CTL_ADD:
efd[epfd].fd[efd[epfd].num_fd++] = fd;
lfd[fd].epfd = epfd;
break;
case EPOLL_CTL_DEL:
{
int i;
for (i = 0; i < efd[epfd].num_fd; i++) {
if (efd[epfd].fd[i] == fd) {
efd[epfd].fd[i] = efd[epfd].fd[--efd[epfd].num_fd];
lfd[fd].epfd = 0;
break;
}
}
}
break;
default:
assert(0);
break;
}
return 0;
}
static int lwip_syscall_epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
{
struct epoll_event _events[2048];
if (maxevents > 2048)
maxevents = 2048;
int e = 0;
{
struct timespec start;
assert(!clock_gettime(CLOCK_REALTIME, &start));
while (1) {
{
int i;
for (i = 0; i < efd[epfd].num_fd && !e && e < maxevents; i++) {
if (lfd[efd[epfd].fd[i]].num_accept_fd || lfd[efd[epfd].fd[i]].num_rxpbuf) {
struct epoll_event *ev = &_events[e++];
ev->data.fd = efd[epfd].fd[i];
ev->events = EPOLLIN;
}
}
if (e)
break;
}
if (timeout >= 0) {
struct timespec now;
assert(!clock_gettime(CLOCK_REALTIME, &now));
if (((unsigned long) timeout * 1000) <
((now.tv_sec * 1000000000UL + now.tv_nsec) -
(start.tv_sec * 1000000000UL + start.tv_nsec))) {
break;
}
}
dpdk_poll();
}
}
if (e)
copy_to_user(events, _events, sizeof(struct epoll_event) * e);
return e;
}
static long lwip_syscall(long a1, long a2, long a3,
long a4,
long a5 __attribute__((unused)),
long a6 __attribute__((unused)),
long a7 __attribute__((unused)))
{
long ret = 0;
switch (a1) {
case __NR_read: // 0
ret = lwip_syscall_read((int) a2, (void *) a3, (size_t) a4);
break;
case __NR_write: // 1
ret = lwip_syscall_write((int) a2, (const void *) a3, (size_t) a4);
break;
case __NR_close: // 3
ret = lwip_syscall_close((int) a2);
break;
case __NR_ioctl: // 16
ret = 0;
break;
case __NR_socket: // 41
ret = lwip_syscall_socket((int) a2, (int) a3, (int) a4);
break;
case __NR_accept: // 43
case __NR_accept4: // 288
ret = lwip_syscall_accept((int) a2, (struct sockaddr *) a3, (socklen_t *) a4);
break;
case __NR_bind: // 49
ret = lwip_syscall_bind((int) a2, (const struct sockaddr *) a3, (socklen_t) a4);
break;
case __NR_listen: // 50
ret = lwip_syscall_listen((int) a2, (int) a3);
break;
case __NR_setsockopt: // 54
ret = 0;
break;
case __NR_getsockopt: // 55
ret = 0;
break;
case __NR_fcntl: // 72
ret = 0;
break;
default:
printf("unhandled %lu\n", a1);
assert(0);
break;
}
return ret;
}
static int lwip_syscall_epoll_close(int fd)
{
memset(&efd[fd], 0, sizeof(efd[fd]));
asm volatile ("" ::: "memory");
close(fd);
return 0;
}
static int lwip_syscall_epoll_create(int size __attribute__((unused)))
{
int fd;
assert((fd = open("/dev/null", O_RDONLY)) != -1);
efd[fd].used = 1;
return fd;
}
static long epoll_syscall(long a1, long a2, long a3,
long a4, long a5,
long a6 __attribute__((unused)),
long a7 __attribute__((unused)))
{
long ret = 0;
switch (a1) {
case __NR_close: // 3
ret = lwip_syscall_epoll_close((int) a2);
break;
case __NR_fcntl: // 72
ret = 0;
break;
case __NR_epoll_create: // 213
ret = lwip_syscall_epoll_create((int) a2);
break;
case __NR_epoll_ctl_old: // 214
ret = lwip_syscall_epoll_ctl((int) a2, (int) a3, (int) a4, (struct epoll_event *) a5);
break;
case __NR_epoll_wait_old: // 215
ret = lwip_syscall_epoll_wait((int) a2, (struct epoll_event *) a3, (int) a4, (int) a5);
break;
case __NR_epoll_wait: // 232
ret = lwip_syscall_epoll_wait((int) a2, (struct epoll_event *) a3, (int) a4, (int) a5);
break;
case __NR_epoll_ctl: // 233
ret = lwip_syscall_epoll_ctl((int) a2, (int) a3, (int) a4, (struct epoll_event *) a5);
break;
case __NR_epoll_create1: // 291
ret = lwip_syscall_epoll_create((int) a2);
break;
default:
printf("unhandled %lu\n", a1);
assert(0);
break;
}
return ret;
}
static long hook_function(long a1, long a2, long a3,
long a4, long a5, long a6,
long a7)
{
switch (a1) {
case __NR_socket: // 41
return lwip_syscall(a1, a2, a3, a4, a5, a6, a7);
case __NR_close: // 3
case __NR_fcntl: // 72
if (lfd[a2].used)
return lwip_syscall(a1, a2, a3, a4, a5, a6, a7);
if (efd[a2].used)
return epoll_syscall(a1, a2, a3, a4, a5, a6, a7);
return next_sys_call(a1, a2, a3, a4, a5, a6, a7);
case __NR_read: // 0
case __NR_write: // 1
case __NR_ioctl: // 16
case __NR_accept: // 43
case __NR_bind: // 49
case __NR_listen: // 50
case __NR_setsockopt: // 54
case __NR_getsockopt: // 55
case __NR_accept4: //288
if (lfd[a2].used)
return lwip_syscall(a1, a2, a3, a4, a5, a6, a7);
return next_sys_call(a1, a2, a3, a4, a5, a6, a7);
case __NR_epoll_create: // 213
case __NR_epoll_ctl_old: // 214
case __NR_epoll_wait_old: // 215
case __NR_epoll_wait: // 232
case __NR_epoll_ctl: // 233
case __NR_epoll_create1: // 291
return epoll_syscall(a1, a2, a3, a4, a5, a6, a7);
default:
return next_sys_call(a1, a2, a3, a4, a5, a6, a7);
}
}
int __hook_init(long placeholder __attribute__((unused)),
void *sys_call_hook_ptr)
{
if (!getenv("NET_ADDR"))
return -1;
if (!getenv("NET_MASK"))
return -1;
if (!getenv("NET_GATE"))
return -1;
if (!getenv("DPDK_ARGS"))
return -1;
/* setting up dpdk */
{
{
int argc = 0;
char **argv = NULL;
char *argstr;
assert((argstr = strdup(getenv("DPDK_ARGS"))) != NULL);
{
size_t l = strlen(argstr);
int argvlen = 8;
assert((argv = realloc(argv, sizeof(*argv) * argvlen)) != NULL);
argv[argc++] = "app";
{
bool prev_space = true;
{
size_t i;
for (i = 0; i < l; i++) {
if (prev_space) {
if (argstr[i] != ' ') {
if (argvlen < argc + 2) {
argvlen += 16;
assert((argv = realloc(argv, sizeof(*argv) * argvlen)) != NULL);
}
argv[argc++] = &argstr[i];
prev_space = false;
} else
argstr[i] = '\0';
} else if (argstr[i] == ' ') {
argstr[i] = '\0';
prev_space = true;
}
}
}
argv[argc] = NULL;
}
}
assert(rte_eal_init(argc, argv) >= 0);
free(argv);
free(argstr);
}
{
uint16_t nb_rxd = NUM_SLOT;
uint16_t nb_txd = NUM_SLOT;
assert(rte_eth_dev_count_avail() == 1);
assert((pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool",
RTE_MAX(1 /* nb_ports */ * (nb_rxd + nb_txd + MAX_PKT_BURST + 1 * MEMPOOL_CACHE_SIZE), 8192),
MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
rte_socket_id())) != NULL);
{
struct rte_eth_dev_info dev_info;
struct rte_eth_conf local_port_conf = { 0 };
assert(rte_eth_dev_info_get(0 /* port id */, &dev_info) >= 0);
assert(rte_eth_dev_configure(0 /* port id */, 1 /* num queues */, 1 /* num queues */, &local_port_conf) >= 0);
assert(rte_eth_dev_adjust_nb_rx_tx_desc(0 /* port id */, &nb_rxd, &nb_txd) >= 0);
assert(rte_eth_rx_queue_setup(0 /* port id */, 0 /* queue */, nb_rxd,
rte_eth_dev_socket_id(0 /* port id */),
&dev_info.default_rxconf,
pktmbuf_pool) >= 0);
assert(rte_eth_tx_queue_setup(0 /* port id */, 0 /* queue */, nb_txd,
rte_eth_dev_socket_id(0 /* port id */),
&dev_info.default_txconf) >= 0);
assert(rte_eth_dev_start(0 /* port id */) >= 0);
assert(rte_eth_promiscuous_enable(0 /* port id */) >= 0);
}
}
}
/* setting up lwip */
{
lwip_init();
{
ip4_addr_t _addr, _mask, _gate;
inet_pton(AF_INET, getenv("NET_ADDR"), &_addr);
inet_pton(AF_INET, getenv("NET_MASK"), &_mask);
inet_pton(AF_INET, getenv("NET_GATE"), &_gate);
assert(netif_add(&_netif, &_addr, &_mask, &_gate, NULL, if_init, ethernet_input) != NULL);
}
netif_set_default(&_netif);
netif_set_link_up(&_netif);
netif_set_up(&_netif);
}
next_sys_call = *((syscall_fn_t *) sys_call_hook_ptr);
*((syscall_fn_t *) sys_call_hook_ptr) = hook_function;
return 0;
}
static bool should_skip(long rax, long rdi)
{
switch (rax) {
case __NR_socket: // 41
if (rdi == AF_INET)
return true;
else
return false;
case __NR_close: // 3
case __NR_fcntl: // 72
if (lfd[rdi].used)
return true;
if (efd[rdi].used)
return true;
return false;
case __NR_read: // 0
case __NR_write: // 1
case __NR_ioctl: // 16
case __NR_accept: // 43
case __NR_bind: // 49
case __NR_listen: // 50
case __NR_setsockopt: // 54
case __NR_getsockopt: // 55
case __NR_accept4: //288
if (lfd[rdi].used)
return true;
return false;
case __NR_epoll_create: // 213
case __NR_epoll_ctl_old: // 214
case __NR_epoll_wait_old: // 215
case __NR_epoll_wait: // 232
case __NR_epoll_ctl: // 233
case __NR_epoll_create1: // 291
return true;
default:
return false;
}
}
extern long enter_syscall(int64_t, int64_t, int64_t, int64_t, int64_t, int64_t, int64_t);
void ____asm_impl(void)
{
/*
* enter_syscall triggers a kernel-space system call
*/
asm volatile (
".globl enter_syscall \n\t"
"enter_syscall: \n\t"
"movq %rdi, %rax \n\t"
"movq %rsi, %rdi \n\t"
"movq %rdx, %rsi \n\t"
"movq %rcx, %rdx \n\t"
"movq %r8, %r10 \n\t"
"movq %r9, %r8 \n\t"
"movq 8(%rsp),%r9 \n\t"
".globl syscall_addr \n\t"
"syscall_addr: \n\t"
"syscall \n\t"
"ret \n\t"
);
}
static long (*hook_fn)(int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6,
int64_t a7) = enter_syscall;
int main(int argc, char* const* argv)
{
assert(argc > 1);
pid = fork();
assert(pid >= 0);
if (pid == 0) {
assert(!ptrace(PTRACE_TRACEME, 0L, 0L, 0L));
execvp(argv[1], &argv[1]);
} else {
assert(!__hook_init(0, &hook_fn));
{
int status;
pid = wait(&status);
assert(!ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_EXITKILL));
assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0));
while (1) {
bool skipped = false;
struct user_regs_struct regs;
pid = wait(&status);
if (WIFEXITED(status))
break;
assert(!ptrace(PTRACE_GETREGS, pid, 0, ®s));
if (should_skip(regs.orig_rax, regs.rdi)) {
assert(!ptrace(PTRACE_POKEUSER, pid, offsetof(struct user_regs_struct, orig_rax), __NR_getpid));
skipped = true;
}
assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0));
pid = wait(&status);
if (WIFEXITED(status))
break;
if (skipped) {
regs.rax = hook_function(
regs.orig_rax,
regs.rdi,
regs.rsi,
regs.rdx,
regs.r10,
regs.r8,
regs.r9);
assert(!ptrace(PTRACE_SETREGS, pid, 0, ®s));
}
assert(!ptrace(PTRACE_SYSCALL, pid, 0, 0));
}
}
}
return 0;
} glue-lwip-dpdk-zpoline/MakefilePROGS = ptracenet
CC = gcc
PKGCONF = pkg-config
DPDK_VER=22.11.1
LWIP_VER=2.1.3
CONTRIB_VER=2.1.0
CLEANFILES = $(PROGS) *.o *.d
SRCDIR ?= ./
NO_MAN=
CFLAGS = -O3 -pipe
CFLAGS += -Wall -Wunused-function
CFLAGS += -Wextra
LDFLAGS +=
C_SRCS = main.c
OBJS = $(C_SRCS:.c=.o)
# for dpdk
DPDK_DIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))/dpdk
DPDK_SRC_DIR = $(DPDK_DIR)/dpdk-$(DPDK_VER)
DPDK_INSTALL_DIR = $(DPDK_DIR)/install
DPDK_PKG_CONFIG_PATH=$(DPDK_INSTALL_DIR)/lib/x86_64-linux-gnu/pkgconfig
DPDK_PKG_CONFIG_FILE=$(DPDK_PKG_CONFIG_PATH)/libdpdk.pc
CFLAGS += $(shell PKG_CONFIG_PATH=$(DPDK_PKG_CONFIG_PATH) $(PKGCONF) --cflags libdpdk)
LDFLAGS += $(shell PKG_CONFIG_PATH=$(DPDK_PKG_CONFIG_PATH) $(PKGCONF) --libs libdpdk)
# for lwip
LWIP_DIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))/lwip
LWIP_SRC_DIR = $(LWIP_DIR)/lwip-$(LWIP_VER)
CONTRIB_SRC_DIR = $(LWIP_DIR)/contrib-$(CONTRIB_VER)
CFLAGS += -I$(LWIP_SRC_DIR)/src/include -I$(CONTRIB_SRC_DIR) -I$(CONTRIB_SRC_DIR)/ports/unix/port/include
LWIP_OBJS = $(LWIP_SRC_DIR)/src/api/api_lib.o \
$(LWIP_SRC_DIR)/src/api/api_msg.o \
$(LWIP_SRC_DIR)/src/api/err.o \
$(LWIP_SRC_DIR)/src/api/if_api.o \
$(LWIP_SRC_DIR)/src/api/netbuf.o \
$(LWIP_SRC_DIR)/src/api/netdb.o \
$(LWIP_SRC_DIR)/src/api/netifapi.o \
$(LWIP_SRC_DIR)/src/api/sockets.o \
$(LWIP_SRC_DIR)/src/api/tcpip.o \
$(LWIP_SRC_DIR)/src/core/altcp_alloc.o \
$(LWIP_SRC_DIR)/src/core/altcp.o \
$(LWIP_SRC_DIR)/src/core/altcp_tcp.o \
$(LWIP_SRC_DIR)/src/core/def.o \
$(LWIP_SRC_DIR)/src/core/dns.o \
$(LWIP_SRC_DIR)/src/core/inet_chksum.o \
$(LWIP_SRC_DIR)/src/core/init.o \
$(LWIP_SRC_DIR)/src/core/ip.o \
$(LWIP_SRC_DIR)/src/core/ipv4/autoip.o \
$(LWIP_SRC_DIR)/src/core/ipv4/dhcp.o \
$(LWIP_SRC_DIR)/src/core/ipv4/etharp.o \
$(LWIP_SRC_DIR)/src/core/ipv4/icmp.o \
$(LWIP_SRC_DIR)/src/core/ipv4/igmp.o \
$(LWIP_SRC_DIR)/src/core/ipv4/ip4_addr.o \
$(LWIP_SRC_DIR)/src/core/ipv4/ip4.o \
$(LWIP_SRC_DIR)/src/core/ipv4/ip4_frag.o \
$(LWIP_SRC_DIR)/src/core/ipv6/dhcp6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/ethip6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/icmp6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/inet6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/ip6_addr.o \
$(LWIP_SRC_DIR)/src/core/ipv6/ip6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/ip6_frag.o \
$(LWIP_SRC_DIR)/src/core/ipv6/mld6.o \
$(LWIP_SRC_DIR)/src/core/ipv6/nd6.o \
$(LWIP_SRC_DIR)/src/core/mem.o \
$(LWIP_SRC_DIR)/src/core/memp.o \
$(LWIP_SRC_DIR)/src/core/netif.o \
$(LWIP_SRC_DIR)/src/core/pbuf.o \
$(LWIP_SRC_DIR)/src/core/raw.o \
$(LWIP_SRC_DIR)/src/core/stats.o \
$(LWIP_SRC_DIR)/src/core/sys.o \
$(LWIP_SRC_DIR)/src/core/tcp.o \
$(LWIP_SRC_DIR)/src/core/tcp_in.o \
$(LWIP_SRC_DIR)/src/core/tcp_out.o \
$(LWIP_SRC_DIR)/src/core/timeouts.o \
$(LWIP_SRC_DIR)/src/core/udp.o \
$(LWIP_SRC_DIR)/src/netif/ethernet.o \
$(CONTRIB_SRC_DIR)/ports/unix/port/sys_arch.o
OBJS += $(LWIP_OBJS)
CLEANFILES += $(LWIP_OBJS)
.PHONY: all
all: $(PROGS)
$(DPDK_SRC_DIR).tar.xz:
wget -P $(DPDK_DIR) https://fast.dpdk.org/rel/dpdk-$(DPDK_VER).tar.xz
$(CONTRIB_SRC_DIR).zip:
wget -P $(LWIP_DIR) http://download.savannah.nongnu.org/releases/lwip/contrib-$(CONTRIB_VER).zip
$(LWIP_SRC_DIR).zip:
wget -P $(LWIP_DIR) http://download.savannah.nongnu.org/releases/lwip/lwip-$(LWIP_VER).zip
$(DPDK_SRC_DIR): $(DPDK_SRC_DIR).tar.xz
mkdir -p $(DPDK_SRC_DIR)
tar xvf $< -C $(DPDK_SRC_DIR) --strip-components 1
$(CONTRIB_SRC_DIR): $(CONTRIB_SRC_DIR).zip
unzip -n $< -d $(LWIP_DIR)
$(LWIP_SRC_DIR): $(LWIP_SRC_DIR).zip
unzip -n $< -d $(LWIP_DIR)
$(DPDK_PKG_CONFIG_FILE): $(DPDK_SRC_DIR)
meson --prefix=$(DPDK_INSTALL_DIR) --libdir=lib/x86_64-linux-gnu $(DPDK_SRC_DIR)/build $(DPDK_SRC_DIR)
ninja -C $(DPDK_SRC_DIR)/build
ninja -C $(DPDK_SRC_DIR)/build install
$(OBJS): $(CONTRIB_SRC_DIR) $(LWIP_SRC_DIR) $(DPDK_PKG_CONFIG_FILE)
$(PROGS): $(OBJS)
$(CC) -Werror $(CFLAGS) -o $@ $^ $(LDFLAGS)
clean:
-@rm -rf $(CLEANFILES) Supposedly, the program above can be compiled by
Thank you very much for your message. |
Hello, I used the code of glue-lwip-dpdk-zpoline and successfully used zpoline to improve the efficiency of a program.
Then I want to test the efficiency improvement of the program after applying lwip API compared with other hook mechanisms.
I can test SUD and int3 signaling successfully, but when I tested ld_preload, it crashed. Can I know the source code of your program using ld_preload to use lwip API?
This is the hook code I wrote for LD_PRELOAD, it is compiled into libpreload.so and then loaded via LD_PRELOAD.
I basically did not modify main.c in glue-lwip-dpdk-zpoline
, but replaced the places where next_sys_call appeared with the corresponding original system call name.
This is the source code of main.c after I modified it: main.c
This is the command I use to start the program:
When the hooked object is redis-server, the following error occurs:
Accepting client connection: accept: Socket operation on non-socket
When the hooked object is a simple server program, the program crashes after connecting. After debugging it, I found that the accept return value is often 0xfffffff5.
The source code of the simple server program is at server program
In addition, I would like to know how you use ptrace to let the program apply lwip's API.
As we discussed before, ptrace cannot jump over the original system call #10(comment). How do you avoid this problem?
Looking forward to your reply.
The text was updated successfully, but these errors were encountered: