vm: address-space: implement copy-on-write address-space duplication

This commit is contained in:
2026-04-01 18:36:25 +01:00
parent c8202c6741
commit a0cab068da
2 changed files with 238 additions and 0 deletions
+232
View File
@@ -1206,6 +1206,238 @@ kern_status_t address_space_release(
return status;
}
static struct vm_area *area_duplicate(struct vm_area *area)
{
struct vm_area *out = vm_cache_alloc(&vm_area_cache, VM_NORMAL);
if (!out) {
return NULL;
}
out->vma_prot = area->vma_prot;
out->vma_object_offset = area->vma_object_offset;
out->vma_base = area->vma_base;
out->vma_limit = area->vma_limit;
return out;
}
static kern_status_t update_area_pte_cow(
struct address_space *src,
struct address_space *dest,
struct vm_area *area)
{
if (!area->vma_object) {
return KERN_OK;
}
for (virt_addr_t i = area->vma_base; i < area->vma_limit;
i += VM_PAGE_SIZE) {
off_t pg_offset = i - area->vma_base + area->vma_object_offset;
struct vm_page *pg = vm_object_get_page(
area->vma_object,
pg_offset,
0,
NULL);
vm_prot_t temp_prot = area->vma_prot;
temp_prot &= ~VM_PROT_WRITE;
if (pg) {
pmap_add(
src->s_pmap,
i,
vm_page_get_pfn(pg),
temp_prot,
PMAP_NORMAL);
pmap_add(
dest->s_pmap,
i,
vm_page_get_pfn(pg),
temp_prot,
PMAP_NORMAL);
tracek("PTE %zx -> %zx [%x]",
i,
vm_page_get_paddr(pg),
temp_prot);
}
}
return KERN_OK;
}
static kern_status_t prepare_duplicate_areas(
struct address_space *src,
struct address_space *dest)
{
struct btree_node *cur_node = btree_first(&src->s_mappings);
while (cur_node) {
struct vm_area *tmp_area
= BTREE_CONTAINER(struct vm_area, vma_node, cur_node);
if (!tmp_area->vma_object) {
cur_node = btree_next(cur_node);
continue;
}
struct vm_object *src_vmo = tmp_area->vma_object;
vm_object_lock(src_vmo);
struct vm_object *dest_vmo = NULL;
struct queue_entry *cur_entry
= queue_first(&src_vmo->vo_mappings);
while (cur_entry) {
struct vm_area *src_area = QUEUE_CONTAINER(
struct vm_area,
vma_object_entry,
cur_entry);
if (src_area->vma_space != src) {
cur_entry = queue_next(cur_entry);
continue;
}
struct vm_area *dest_area = get_entry(
&dest->s_mappings,
src_area->vma_base,
GET_ENTRY_EXACT);
if (!dest_area) {
/* this shouldn't happen. the duplicate vm_areas
* were already created by
* address_space_duplicate */
panic("create_duplicate_vmo: corresponding "
"vm_area is missing");
}
if (dest_area->vma_object) {
cur_entry = queue_next(cur_entry);
continue;
}
if (!dest_vmo) {
tracek("[%zx-%zx %x] creating COW duplicate of "
"vmo %p",
src_area->vma_base,
src_area->vma_limit,
src_area->vma_prot,
src_vmo);
dest_vmo = vm_object_duplicate_cow(src_vmo);
tracek("[%zx-%zx %x] created COW duplicate of "
"vmo %p -> %p",
src_area->vma_base,
src_area->vma_limit,
src_area->vma_prot,
src_vmo,
dest_vmo);
}
dest_area->vma_object = dest_vmo;
update_area_pte_cow(src, dest, src_area);
cur_entry = queue_next(cur_entry);
}
vm_object_unlock(src_vmo);
cur_node = btree_next(cur_node);
}
return KERN_OK;
}
kern_status_t address_space_duplicate(
struct address_space *dest,
struct address_space *src)
{
// address_space_dump(src);
/* clang-format off
* strategy for COW address space duplication:
* 1. duplicate each vm_area in the address space
* a. all details except for the vm_object pointer are copied.
* b. create a duplicate vm_object, where all the details are the
* same, but don't copy the pages or vm_page pointers.
* c. if the vm_object is attached to a vm_controller, don't inform
* the controller yet.
* d. for both the original and duplicate vm_area, duplicate the PTE
* entries, changing all of them to read-only. increment the
* p_cow_ref counters for all committed vm_pages.
* e. use the vm_object's vm_area list, and the vm_area's vma_space
* pointer, to ensure that only one duplicate is created for each
* unique vm-object referenced by an address-space.
* 2. when a page fault occurs:
* a. find the relevant vm_area as normal.
* b. if the faulted page is present and the vm_area's prot flags
* should allow the access, a COW is required.
* c. if the relevant page is already present in the vm_area's
* vm_object, this is the original vm_area. otherwise, this is the
* clone vm_area.
* d. if this is the source vm_area:
* i. decrement p_cow_ref in the page. if it is 0, skip to step v.
* ii. remove the relevant page from the vm_area
* iii. allocate a new page and copy the data.
* iv. add the new page to the vm_object at the same offset.
* v. change the PTE entry to the proper protection flags.
* vi. resume the faulting task.
* e. otherwise, if this is the clone vm_area:
* i. if the vm-object has a controller, send
* PAGE_REQUEST_DUPLICATE to it. the controller needs to
* prepare itself to receive page requests from this vm-object,
* which includes priving it an equeue_key_t.
* i. use the physical address stored in the PTE to find the
* relevant vm_page.
* ii. decrement p_cow_ref in the page.
* iii. if p_cow_ref is > 0, allocate a new page and copy the data.
* otherwise, use the existing page as-is.
* iv. add the page from step iii to the vm_object at the correct
* offset.
* v. change the PTE entry to the proper protection flags.
* vi. resume the faulting task.
* 3. when destroying a vm_area:
* a. for pages already present in a vm-object, handle as normal.
* b. for pages not present in a vm-object, but for which a valid PTE
* exists, use the PTE physical address to find the vm_page.
* c. decrement p_cow_ref in this page.
* d. if p_cow_ref == 0, de-allocate the page.
* clang-format on
*/
struct btree_node *cur = btree_first(&src->s_mappings);
while (cur) {
struct vm_area *src_area
= BTREE_CONTAINER(struct vm_area, vma_node, cur);
struct vm_area *dest_area = area_duplicate(src_area);
tracek("duplicated vm_area [%zx-%zx] %p -> %p",
src_area->vma_base,
src_area->vma_limit,
src_area,
dest_area);
/* TODO handle OOM */
put_entry(&dest->s_mappings, dest_area);
cur = btree_next(cur);
}
cur = btree_first(&src->s_reserved);
while (cur) {
struct vm_area *src_area
= BTREE_CONTAINER(struct vm_area, vma_node, cur);
struct vm_area *dest_area = area_duplicate(src_area);
tracek("duplicated vm_area [r] [%zx-%zx] %p -> %p",
src_area->vma_base,
src_area->vma_limit,
src_area,
dest_area);
/* TODO handle OOM */
put_entry(&dest->s_reserved, dest_area);
cur = btree_next(cur);
}
tracek("preparing duplicate areas");
kern_status_t status = prepare_duplicate_areas(src, dest);
tracek("prepared duplicate areas");
if (status != KERN_OK) {
return status;
}
return KERN_OK;
}
bool address_space_validate_access(
struct address_space *region,
virt_addr_t ptr,