vm: address-space: implement copy-on-write address-space duplication
This commit is contained in:
@@ -105,6 +105,12 @@ extern kern_status_t address_space_release(
|
||||
virt_addr_t base,
|
||||
size_t length);
|
||||
|
||||
/* duplicate all of the mappings in `src` within `dest. the duplication will use
|
||||
* copy-on-write; page data will not be copied until it is written to. */
|
||||
extern kern_status_t address_space_duplicate(
|
||||
struct address_space *dest,
|
||||
struct address_space *src);
|
||||
|
||||
extern bool address_space_validate_access(
|
||||
struct address_space *region,
|
||||
virt_addr_t base,
|
||||
|
||||
@@ -1206,6 +1206,238 @@ kern_status_t address_space_release(
|
||||
return status;
|
||||
}
|
||||
|
||||
static struct vm_area *area_duplicate(struct vm_area *area)
|
||||
{
|
||||
struct vm_area *out = vm_cache_alloc(&vm_area_cache, VM_NORMAL);
|
||||
if (!out) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
out->vma_prot = area->vma_prot;
|
||||
out->vma_object_offset = area->vma_object_offset;
|
||||
out->vma_base = area->vma_base;
|
||||
out->vma_limit = area->vma_limit;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static kern_status_t update_area_pte_cow(
|
||||
struct address_space *src,
|
||||
struct address_space *dest,
|
||||
struct vm_area *area)
|
||||
{
|
||||
if (!area->vma_object) {
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
for (virt_addr_t i = area->vma_base; i < area->vma_limit;
|
||||
i += VM_PAGE_SIZE) {
|
||||
off_t pg_offset = i - area->vma_base + area->vma_object_offset;
|
||||
struct vm_page *pg = vm_object_get_page(
|
||||
area->vma_object,
|
||||
pg_offset,
|
||||
0,
|
||||
NULL);
|
||||
vm_prot_t temp_prot = area->vma_prot;
|
||||
temp_prot &= ~VM_PROT_WRITE;
|
||||
|
||||
if (pg) {
|
||||
pmap_add(
|
||||
src->s_pmap,
|
||||
i,
|
||||
vm_page_get_pfn(pg),
|
||||
temp_prot,
|
||||
PMAP_NORMAL);
|
||||
pmap_add(
|
||||
dest->s_pmap,
|
||||
i,
|
||||
vm_page_get_pfn(pg),
|
||||
temp_prot,
|
||||
PMAP_NORMAL);
|
||||
tracek("PTE %zx -> %zx [%x]",
|
||||
i,
|
||||
vm_page_get_paddr(pg),
|
||||
temp_prot);
|
||||
}
|
||||
}
|
||||
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
static kern_status_t prepare_duplicate_areas(
|
||||
struct address_space *src,
|
||||
struct address_space *dest)
|
||||
{
|
||||
struct btree_node *cur_node = btree_first(&src->s_mappings);
|
||||
while (cur_node) {
|
||||
struct vm_area *tmp_area
|
||||
= BTREE_CONTAINER(struct vm_area, vma_node, cur_node);
|
||||
if (!tmp_area->vma_object) {
|
||||
cur_node = btree_next(cur_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
struct vm_object *src_vmo = tmp_area->vma_object;
|
||||
vm_object_lock(src_vmo);
|
||||
|
||||
struct vm_object *dest_vmo = NULL;
|
||||
struct queue_entry *cur_entry
|
||||
= queue_first(&src_vmo->vo_mappings);
|
||||
|
||||
while (cur_entry) {
|
||||
struct vm_area *src_area = QUEUE_CONTAINER(
|
||||
struct vm_area,
|
||||
vma_object_entry,
|
||||
cur_entry);
|
||||
if (src_area->vma_space != src) {
|
||||
cur_entry = queue_next(cur_entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
struct vm_area *dest_area = get_entry(
|
||||
&dest->s_mappings,
|
||||
src_area->vma_base,
|
||||
GET_ENTRY_EXACT);
|
||||
if (!dest_area) {
|
||||
/* this shouldn't happen. the duplicate vm_areas
|
||||
* were already created by
|
||||
* address_space_duplicate */
|
||||
panic("create_duplicate_vmo: corresponding "
|
||||
"vm_area is missing");
|
||||
}
|
||||
|
||||
if (dest_area->vma_object) {
|
||||
cur_entry = queue_next(cur_entry);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!dest_vmo) {
|
||||
tracek("[%zx-%zx %x] creating COW duplicate of "
|
||||
"vmo %p",
|
||||
src_area->vma_base,
|
||||
src_area->vma_limit,
|
||||
src_area->vma_prot,
|
||||
src_vmo);
|
||||
dest_vmo = vm_object_duplicate_cow(src_vmo);
|
||||
tracek("[%zx-%zx %x] created COW duplicate of "
|
||||
"vmo %p -> %p",
|
||||
src_area->vma_base,
|
||||
src_area->vma_limit,
|
||||
src_area->vma_prot,
|
||||
src_vmo,
|
||||
dest_vmo);
|
||||
}
|
||||
|
||||
dest_area->vma_object = dest_vmo;
|
||||
update_area_pte_cow(src, dest, src_area);
|
||||
cur_entry = queue_next(cur_entry);
|
||||
}
|
||||
|
||||
vm_object_unlock(src_vmo);
|
||||
|
||||
cur_node = btree_next(cur_node);
|
||||
}
|
||||
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
kern_status_t address_space_duplicate(
|
||||
struct address_space *dest,
|
||||
struct address_space *src)
|
||||
{
|
||||
// address_space_dump(src);
|
||||
/* clang-format off
|
||||
* strategy for COW address space duplication:
|
||||
* 1. duplicate each vm_area in the address space
|
||||
* a. all details except for the vm_object pointer are copied.
|
||||
* b. create a duplicate vm_object, where all the details are the
|
||||
* same, but don't copy the pages or vm_page pointers.
|
||||
* c. if the vm_object is attached to a vm_controller, don't inform
|
||||
* the controller yet.
|
||||
* d. for both the original and duplicate vm_area, duplicate the PTE
|
||||
* entries, changing all of them to read-only. increment the
|
||||
* p_cow_ref counters for all committed vm_pages.
|
||||
* e. use the vm_object's vm_area list, and the vm_area's vma_space
|
||||
* pointer, to ensure that only one duplicate is created for each
|
||||
* unique vm-object referenced by an address-space.
|
||||
* 2. when a page fault occurs:
|
||||
* a. find the relevant vm_area as normal.
|
||||
* b. if the faulted page is present and the vm_area's prot flags
|
||||
* should allow the access, a COW is required.
|
||||
* c. if the relevant page is already present in the vm_area's
|
||||
* vm_object, this is the original vm_area. otherwise, this is the
|
||||
* clone vm_area.
|
||||
* d. if this is the source vm_area:
|
||||
* i. decrement p_cow_ref in the page. if it is 0, skip to step v.
|
||||
* ii. remove the relevant page from the vm_area
|
||||
* iii. allocate a new page and copy the data.
|
||||
* iv. add the new page to the vm_object at the same offset.
|
||||
* v. change the PTE entry to the proper protection flags.
|
||||
* vi. resume the faulting task.
|
||||
* e. otherwise, if this is the clone vm_area:
|
||||
* i. if the vm-object has a controller, send
|
||||
* PAGE_REQUEST_DUPLICATE to it. the controller needs to
|
||||
* prepare itself to receive page requests from this vm-object,
|
||||
* which includes priving it an equeue_key_t.
|
||||
* i. use the physical address stored in the PTE to find the
|
||||
* relevant vm_page.
|
||||
* ii. decrement p_cow_ref in the page.
|
||||
* iii. if p_cow_ref is > 0, allocate a new page and copy the data.
|
||||
* otherwise, use the existing page as-is.
|
||||
* iv. add the page from step iii to the vm_object at the correct
|
||||
* offset.
|
||||
* v. change the PTE entry to the proper protection flags.
|
||||
* vi. resume the faulting task.
|
||||
* 3. when destroying a vm_area:
|
||||
* a. for pages already present in a vm-object, handle as normal.
|
||||
* b. for pages not present in a vm-object, but for which a valid PTE
|
||||
* exists, use the PTE physical address to find the vm_page.
|
||||
* c. decrement p_cow_ref in this page.
|
||||
* d. if p_cow_ref == 0, de-allocate the page.
|
||||
* clang-format on
|
||||
*/
|
||||
struct btree_node *cur = btree_first(&src->s_mappings);
|
||||
while (cur) {
|
||||
struct vm_area *src_area
|
||||
= BTREE_CONTAINER(struct vm_area, vma_node, cur);
|
||||
struct vm_area *dest_area = area_duplicate(src_area);
|
||||
tracek("duplicated vm_area [%zx-%zx] %p -> %p",
|
||||
src_area->vma_base,
|
||||
src_area->vma_limit,
|
||||
src_area,
|
||||
dest_area);
|
||||
/* TODO handle OOM */
|
||||
put_entry(&dest->s_mappings, dest_area);
|
||||
|
||||
cur = btree_next(cur);
|
||||
}
|
||||
|
||||
cur = btree_first(&src->s_reserved);
|
||||
while (cur) {
|
||||
struct vm_area *src_area
|
||||
= BTREE_CONTAINER(struct vm_area, vma_node, cur);
|
||||
struct vm_area *dest_area = area_duplicate(src_area);
|
||||
tracek("duplicated vm_area [r] [%zx-%zx] %p -> %p",
|
||||
src_area->vma_base,
|
||||
src_area->vma_limit,
|
||||
src_area,
|
||||
dest_area);
|
||||
/* TODO handle OOM */
|
||||
put_entry(&dest->s_reserved, dest_area);
|
||||
|
||||
cur = btree_next(cur);
|
||||
}
|
||||
|
||||
tracek("preparing duplicate areas");
|
||||
kern_status_t status = prepare_duplicate_areas(src, dest);
|
||||
tracek("prepared duplicate areas");
|
||||
if (status != KERN_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return KERN_OK;
|
||||
}
|
||||
|
||||
bool address_space_validate_access(
|
||||
struct address_space *region,
|
||||
virt_addr_t ptr,
|
||||
|
||||
Reference in New Issue
Block a user