1 year ago
#357628
JHAhn
Why eBPF requires two memory copies for look up elememnts?
I am reading Linux kernel 5.18 and find out that Linux requires two memory copies for bpf(BPF_LOOKUP_ELEM..) system call. BPF_LOOKUP_ELEM calls map_lookup_elem defined at linux/bpf/syscall.c. In map_lookup_elem, bpf_map_copy_value copies lookuped element to value. And later, copy_to_user copies value to user provieded pointer.
static int map_lookup_elem(union bpf_attr *attr)
{
void __user *ukey = u64_to_user_ptr(attr->key);
void __user *uvalue = u64_to_user_ptr(attr->value);
int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value;
u32 value_size;
struct fd f;
int err;
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
return -EINVAL;
if (attr->flags & ~BPF_F_LOCK)
return -EINVAL;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
if ((attr->flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
err = -EINVAL;
goto err_put;
}
key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
}
value_size = bpf_map_value_size(map);
err = -ENOMEM;
value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
if (copy_from_user(value, uvalue, value_size))
err = -EFAULT;
else
err = bpf_map_copy_value(map, key, value, attr->flags);
goto free_value;
}
err = bpf_map_copy_value(map, key, value, attr->flags);
if (err)
goto free_value;
err = -EFAULT;
if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value;
err = 0;
free_value:
kvfree(value);
free_key:
kvfree(key);
err_put:
fdput(f);
return err;
}
I cannot understand why eBPF requires kernel and userlevel copy for lookup elements. It can be changed to shared memory between kernel and userspace memory I think. For example, Kernel makes mapping structure and pass the element location to fd to userspace.
I cannot understand why map_lookup_elem first copies the element value to value and copies the value to uvalue? Isn't it ok just copy ptr from map to uvale once? Why kernel tries to first kernel ptr to value and value to user space?
linux
linux-kernel
ebpf
0 Answers
Your Answer