Fix pin_memory.
[monolithium.git] / kernel / src / memory / memory.c
1 /*
2  * memory.c
3  *
4  * Copyright (C) 2016 Aleksandar Andrejevic <theflash@sdf.lonestar.org>
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Affero General Public License as
8  * published by the Free Software Foundation, either version 3 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Affero General Public License for more details.
15  *
16  * You should have received a copy of the GNU Affero General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19
20 #include <memory.h>
21 #include <log.h>
22 #include <exception.h>
23 #include <process.h>
24 #include <syscalls.h>
25 #include <heap.h>
26 #include <cpu.h>
27 #include <semaphore.h>
28
29 static void **physical_memory_stack = (void**)MEM_STACK_VIRT_ADDR;
30 static DECLARE_LOCK(phys_mem_stack_lock);
31 static page_t *pages = NULL;
32 static void *current_page_directory = INVALID_PAGE;
33 static memory_address_space_t kernel_address_space;
34 static memory_address_space_t mapping_space;
35 static list_entry_t user_address_spaces = { &user_address_spaces, &user_address_spaces };
36 static dword_t total_physical_pages = 0;
37 static dword_t num_free_pages = 0;
38 static dword_t mem_tree_bitmap[TOTAL_PAGES / 32];
39 static DECLARE_LOCK(mem_tree_lock);
40 static semaphore_t temporary_page_semaphore;
41 static bool_t evicting = FALSE;
42 static DECLARE_LIST(transition_pages);
43 static DECLARE_LIST(page_stores);
44 static DECLARE_LOCK(page_store_lock);
45
46 static void *evict_page(void);
47
48 static inline void *alloc_physical_page(void)
49 {
50     void *page = INVALID_PAGE;
51
52     if (!evicting && num_free_pages <= EVICTION_THRESHOLD)
53     {
54         evicting = TRUE;
55         page = evict_page();
56         evicting = FALSE;
57
58         if (page != INVALID_PAGE) return page;
59     }
60
61     lock_acquire(&phys_mem_stack_lock);
62     if (num_free_pages) page = physical_memory_stack[--num_free_pages];
63     lock_release(&phys_mem_stack_lock);
64
65     return page;
66 }
67
68 static inline void free_physical_page(void *address)
69 {
70     lock_acquire(&phys_mem_stack_lock);
71     physical_memory_stack[num_free_pages++] = address;
72     lock_release(&phys_mem_stack_lock);
73 }
74
75 static int compare_page(const void *a, const void *b)
76 {
77     const page_t *page_a = (const page_t*)a;
78     const page_t *page_b = (const page_t*)b;
79
80     if (page_a->phys_addr < page_b->phys_addr) return -1;
81     else if (page_a->phys_addr > page_b->phys_addr) return 1;
82     else return 0;
83 }
84
85 static page_t *get_page(void *physical)
86 {
87     page_t key = { .phys_addr = (uintptr_t)physical };
88     if (pages == NULL) return NULL;
89     return (page_t*)bsearch(&key, pages, total_physical_pages, sizeof(page_t), compare_page);
90 }
91
92 static inline dword_t reference_page(void *physical)
93 {
94     page_t *page = get_page(physical);
95     if (!page) return 0;
96
97     return ++page->ref_count;
98 }
99
100 static inline dword_t dereference_page(void *physical)
101 {
102     page_t *page = get_page(physical);
103     if (!page) return 0;
104
105     return --page->ref_count;
106 }
107
108 static dword_t map_page(void *physical, void *virtual, dword_t flags)
109 {
110     dword_t i;
111     dword_t ret = ERR_SUCCESS;
112     critical_t critical;
113     dword_t phys_addr = PAGE_ALIGN((dword_t)physical);
114     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
115     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
116     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
117     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
118
119     flags &= 0x00000FFF;
120     enter_critical(&critical);
121
122     if (!(page_directory[pd_index] & PAGE_PRESENT))
123     {
124         void *table_page = alloc_physical_page();
125         if (table_page == INVALID_PAGE)
126         {
127             ret = ERR_NOMEMORY;
128             goto done;
129         }
130
131         reference_page(table_page);
132         page_directory[pd_index] = (dword_t)table_page | PAGE_PRESENT | PAGE_WRITABLE;
133
134         cpu_invalidate_tlb(page_table);
135         for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++) page_table[i] = 0;
136     }
137
138     page_directory[pd_index] |= flags;
139     if (page_table[pt_index] & PAGE_PRESENT)
140     {
141         ret = ERR_EXISTS;
142         goto done;
143     }
144
145     reference_page((void*)phys_addr);
146     page_table[pt_index] = phys_addr | flags | PAGE_PRESENT;
147     cpu_invalidate_tlb(virtual);
148
149 done:
150     leave_critical(&critical);
151     return ret;
152 }
153
154 static dword_t unmap_page(void *virtual)
155 {
156     dword_t i, ret = ERR_SUCCESS;
157     critical_t critical;
158     bool_t empty_dir = TRUE;
159     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
160     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
161     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
162     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
163
164     enter_critical(&critical);
165
166     if (!(page_directory[pd_index] & PAGE_PRESENT))
167     {
168         ret = ERR_NOTFOUND;
169         goto done;
170     }
171
172     if (!(page_table[pt_index] & PAGE_PRESENT))
173     {
174         ret = ERR_NOTFOUND;
175         goto done;
176     }
177
178     dereference_page((void*)PAGE_ALIGN(page_table[pt_index]));
179     page_table[pt_index] = 0;
180     cpu_invalidate_tlb((dword_t*)virt_addr);
181
182     for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++) if (page_table[i])
183     {
184         empty_dir = FALSE;
185         break;
186     }
187
188     if (empty_dir)
189     {
190         void *table_page = (void*)PAGE_ALIGN(page_directory[pd_index]);
191         page_directory[pd_index] = 0;
192         cpu_invalidate_tlb(page_table);
193
194         if (dereference_page(table_page) == 0)
195         {
196             free_physical_page(table_page);
197         }
198     }
199
200 done:
201     leave_critical(&critical);
202     return ret;
203 }
204
205 static dword_t get_page_flags(void *virtual)
206 {
207     dword_t virt_addr = PAGE_ALIGN((uintptr_t)virtual);
208     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
209     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
210     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
211
212     if (!(page_directory[pd_index] & PAGE_PRESENT)) return 0;
213     if (!(page_table[pt_index] & PAGE_PRESENT)) return 0;
214
215     return PAGE_OFFSET(page_table[pt_index]);
216 }
217
218 static dword_t set_page_flags(void *virtual, dword_t flags)
219 {
220     dword_t ret = ERR_SUCCESS;
221     critical_t critical;
222     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
223     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
224     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
225     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
226
227     flags &= 0x00000FFF;
228     enter_critical(&critical);
229
230     if (!(page_directory[pd_index] & PAGE_PRESENT))
231     {
232         ret = ERR_NOTFOUND;
233         goto done;
234     }
235
236     if (!(page_table[pt_index] & PAGE_PRESENT))
237     {
238         ret = ERR_NOTFOUND;
239         goto done;
240     }
241
242     page_directory[pd_index] |= flags;
243     page_table[pt_index] = PAGE_ALIGN(page_table[pt_index]) | flags | PAGE_PRESENT;
244     cpu_invalidate_tlb((void*)virt_addr);
245
246 done:
247     leave_critical(&critical);
248     return ret;
249 }
250
251 static void *map_temporary_page(void *physical, dword_t flags)
252 {
253     int i;
254     wait_semaphore(&temporary_page_semaphore, 1, NO_TIMEOUT);
255
256     for (i = TEMPORARY_PAGES - 1; i >= temporary_page_semaphore.count ; i--)
257     {
258         void *address = (void*)(TEMPORARY_ADDR + i * PAGE_SIZE);
259
260         if (get_physical_address(address) == INVALID_PAGE)
261         {
262             if (map_page(physical, address, flags) == ERR_SUCCESS) return address;
263             break;
264         }
265     }
266
267     return NULL;
268 }
269
270 static void unmap_temporary_page(void *virtual)
271 {
272     unmap_page(virtual);
273     release_semaphore(&temporary_page_semaphore, 1);
274 }
275
276 static inline dword_t alloc_page(void *virtual, dword_t flags)
277 {
278     void *phys = alloc_physical_page();
279     if (phys == INVALID_PAGE) return ERR_NOMEMORY;
280
281     dword_t ret = map_page(phys, virtual, flags);
282     if (ret != ERR_SUCCESS) free_physical_page(phys);
283
284     return ret;
285 }
286
287 static inline dword_t free_page(void *virtual)
288 {
289     void *phys = get_physical_address(virtual);
290     if (phys == INVALID_PAGE) return ERR_INVALID;
291
292     unmap_page(virtual);
293
294     page_t *page = get_page(phys);
295     if (page != NULL && page->ref_count > 0) free_physical_page(phys);
296
297     return ERR_SUCCESS;
298 }
299
300 static void *evict_page_from_address_space(memory_address_space_t *space)
301 {
302     void *physical = INVALID_PAGE;
303     int chances = 2;
304     dword_t cached_directory[PAGE_SIZE / sizeof(dword_t)];
305     dword_t *table = NULL;
306
307     if (read_physical(space->page_directory, cached_directory, PAGE_SIZE) != ERR_SUCCESS)
308     {
309         return INVALID_PAGE;
310     }
311
312     if (!space->evict_blk_ptr) space->evict_blk_ptr = space->evictable_blocks.next;
313     memory_block_t *block = CONTAINER_OF(space->evict_blk_ptr, memory_block_t, evict_link);
314     dword_t prev_pd_index = (dword_t)-1;
315     dword_t address;
316     dword_t pd_index, pt_index;
317
318     while (chances)
319     {
320         address = (dword_t)block->address + space->evict_page_num * PAGE_SIZE;
321         pd_index = ADDR_TO_PDE(address);
322         pt_index = ADDR_TO_PTE(address);
323         if (!(cached_directory[pd_index] & PAGE_PRESENT)) goto next;
324
325         if (prev_pd_index != pd_index)
326         {
327             if (table) unmap_temporary_page(table);
328             table = map_temporary_page((void*)PAGE_ALIGN(cached_directory[pd_index]),
329                                        PAGE_PRESENT | PAGE_WRITABLE);
330             if (table == NULL) break;
331             prev_pd_index = pd_index;
332         }
333
334         if (table[pt_index])
335         {
336             if (!(table[pt_index] & PAGE_ACCESSED))
337             {
338                 physical = (void*)PAGE_ALIGN(table[pt_index]);
339                 break;
340             }
341
342             table[pt_index] &= ~PAGE_ACCESSED;
343         }
344
345 next:
346         space->evict_page_num++;
347
348         if (space->evict_page_num == (dword_t)block->size)
349         {
350             space->evict_page_num = 0;
351             space->evict_blk_ptr = space->evict_blk_ptr->next;
352
353             if (space->evict_blk_ptr == &space->evictable_blocks)
354             {
355                 space->evict_blk_ptr = space->evict_blk_ptr->next;
356                 chances--;
357             }
358
359             if (space->evict_blk_ptr == &space->evictable_blocks) break;
360             block = CONTAINER_OF(space->evict_blk_ptr, memory_block_t, evict_link);
361         }
362     }
363
364     if (physical == INVALID_PAGE) goto cleanup;
365
366     dword_t i;
367     list_entry_t *ptr;
368     page_store_t *store = NULL;
369     byte_t buffer[PAGE_SIZE];
370
371     dword_t ret = read_physical(physical, buffer, PAGE_SIZE);
372     if (ret != ERR_SUCCESS)
373     {
374         physical = INVALID_PAGE;
375         goto cleanup;
376     }
377
378     for (ptr = page_stores.next; ptr != &page_stores; ptr = ptr->next)
379     {
380         store = CONTAINER_OF(ptr, page_store_t, link);
381
382         for (i = 0; i < store->max_entries; i++) if (!test_bit(store->bitmap, i)) break;
383         if (i == store->max_entries) continue;
384     }
385
386     if (ptr == &page_stores)
387     {
388         physical = INVALID_PAGE;
389         goto cleanup;
390     }
391
392     page_store_entry_t *entry = (page_store_entry_t*)malloc(sizeof(page_store_entry_t));
393     if (entry == NULL)
394     {
395         physical = INVALID_PAGE;
396         goto cleanup;
397     }
398
399     space->stats.evicted += PAGE_SIZE;
400     entry->address = (void*)address;
401     entry->address_space = space;
402     entry->number = INVALID_STORE_NUMBER;
403     entry->physical = INVALID_PAGE;
404
405     if (dereference_page(physical) == 0)
406     {
407         entry->number = i;
408
409         dword_t bytes_written;
410         ret = syscall_write_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_written);
411         if (ret != ERR_SUCCESS)
412         {
413             reference_page(physical);
414             free(entry);
415             physical = INVALID_PAGE;
416             goto cleanup;
417         }
418
419         set_bit(store->bitmap, i);
420         list_append(&store->entry_list, &entry->link);
421
422         for (ptr = transition_pages.next; ptr != &transition_pages; ptr = ptr->next)
423         {
424             page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
425
426             if (other_entry->physical == physical)
427             {
428                 ASSERT(other_entry->number == INVALID_STORE_NUMBER);
429
430                 list_remove(&other_entry->link);
431                 list_append(&store->entry_list, &other_entry->link);
432
433                 other_entry->number = entry->number;
434                 other_entry->physical = INVALID_PAGE;
435             }
436         }
437     }
438     else
439     {
440         entry->physical = physical;
441         list_append(&transition_pages, &entry->link);
442         physical = INVALID_PAGE;
443     }
444
445     table[pt_index] = 0;
446     if (space->page_directory == get_page_directory()) cpu_invalidate_tlb((void*)address);
447
448 cleanup:
449     if (table) unmap_temporary_page(table);
450     return physical;
451 }
452
453 static void *evict_page(void)
454 {
455     if (pages == NULL) return INVALID_PAGE;
456
457     list_entry_t *ptr;
458
459     for (ptr = user_address_spaces.next; ptr != &user_address_spaces; ptr = ptr->next)
460     {
461         memory_address_space_t *space = CONTAINER_OF(ptr, memory_address_space_t, link);
462         void *page = evict_page_from_address_space(space);
463         if (page != INVALID_PAGE) return page;
464     }
465
466     return evict_page_from_address_space(&kernel_address_space);
467 }
468
469 static memory_block_t *mem_tree_alloc(void)
470 {
471     dword_t i;
472     memory_block_t *block = NULL;
473
474     lock_acquire(&mem_tree_lock);
475     for (i = 0; i < TOTAL_PAGES; i++) if (!test_bit(mem_tree_bitmap, i)) break;
476
477     if (i < TOTAL_PAGES)
478     {
479         block = (memory_block_t*)(MEM_TREE_BLOCKS + i * sizeof(memory_block_t));
480
481         if ((get_physical_address(block) != INVALID_PAGE)
482             || (alloc_page(block, PAGE_GLOBAL | PAGE_WRITABLE | PAGE_PRESENT) == ERR_SUCCESS))
483         {
484             set_bit(mem_tree_bitmap, i);
485         }
486         else
487         {
488             block = NULL;
489         }
490     }
491
492     lock_release(&mem_tree_lock);
493     return block;
494 }
495
496 static void mem_tree_free(memory_block_t *block)
497 {
498     dword_t index = ((dword_t)block - MEM_TREE_BLOCKS) / sizeof(memory_block_t);
499     bool_t busy = FALSE;
500     dword_t i, page = PAGE_ALIGN((dword_t)block);
501
502     lock_acquire(&mem_tree_lock);
503     clear_bit(mem_tree_bitmap, index);
504
505     for (i = page; i < page + PAGE_SIZE; i += sizeof(memory_block_t))
506     {
507         index = (i - MEM_TREE_BLOCKS) / sizeof(memory_block_t);
508         if (test_bit(mem_tree_bitmap, index))
509         {
510             busy = TRUE;
511             break;
512         }
513     }
514
515     if (!busy) free_page((void*)page);
516     lock_release(&mem_tree_lock);
517 }
518
519 static memory_block_t *find_block_by_addr_internal(memory_block_t *block, void *address)
520 {
521     qword_t key = (qword_t)(dword_t)address;
522     qword_t start_addr = block->address;
523     qword_t end_addr = start_addr + block->size * PAGE_SIZE;
524
525     if (key >= start_addr && key < end_addr) return block;
526
527     if (key < start_addr)
528     {
529         if (!block->by_addr_node.left) return NULL;
530
531         memory_block_t *left_block = CONTAINER_OF(block->by_addr_node.left, memory_block_t, by_addr_node);
532         return find_block_by_addr_internal(left_block, address);
533     }
534     else
535     {
536         if (!block->by_addr_node.right) return NULL;
537
538         memory_block_t *right_block = CONTAINER_OF(block->by_addr_node.right, memory_block_t, by_addr_node);
539         return find_block_by_addr_internal(right_block, address);
540     }
541 }
542
543 static memory_block_t *find_block_by_addr(memory_address_space_t *space, void *address)
544 {
545     if (!space->by_addr_tree.root) return NULL;
546     memory_block_t *root = CONTAINER_OF(space->by_addr_tree.root, memory_block_t, by_addr_node);
547     return find_block_by_addr_internal(root, address);
548 }
549
550 static bool_t clone_blocks_recursive(memory_address_space_t *space, memory_block_t *block)
551 {
552     memory_block_t *clone = mem_tree_alloc();
553     if (clone == NULL) return FALSE;
554
555     clone->address = block->address;
556     clone->size = block->size;
557     block->flags |= MEMORY_BLOCK_COPY_ON_WRITE;
558     clone->flags = block->flags;
559     clone->address_space = space;
560     clone->section = block->section;
561
562     avl_tree_insert(&space->by_addr_tree, &clone->by_addr_node);
563     avl_tree_insert(&space->by_size_tree, &clone->by_size_node);
564
565     memory_block_t *left_block = CONTAINER_OF(block->by_addr_node.left, memory_block_t, by_addr_node);
566     memory_block_t *right_block = CONTAINER_OF(block->by_addr_node.right, memory_block_t, by_addr_node);
567
568     if ((block->by_addr_node.left && !clone_blocks_recursive(space, left_block))
569         || (block->by_addr_node.right && !clone_blocks_recursive(space, right_block)))
570     {
571         avl_tree_remove(&space->by_addr_tree, &clone->by_addr_node);
572         avl_tree_remove(&space->by_size_tree, &clone->by_size_node);
573         mem_tree_free(clone);
574         return FALSE;
575     }
576
577     return TRUE;
578 }
579
580 static inline void release_memory_block(memory_block_t *block)
581 {
582     dword_t page;
583     dword_t start_address = (dword_t)block->address;
584     dword_t end_address = start_address + (dword_t)block->size * PAGE_SIZE;
585
586     critical_t critical;
587     enter_critical(&critical);
588     void *old_page_dir = get_page_directory();
589     set_page_directory(block->address_space->page_directory);
590
591     for (page = start_address; page < end_address; page += PAGE_SIZE)
592     {
593         free_page((void*)page);
594     }
595
596     set_page_directory(old_page_dir);
597     leave_critical(&critical);
598
599     if (block->section)
600     {
601         dereference(&block->section->header);
602         block->section = NULL;
603     }
604
605     list_entry_t *i;
606
607     for (i = transition_pages.next; i != &transition_pages; i = i->next)
608     {
609         page_store_entry_t *entry = CONTAINER_OF(i, page_store_entry_t, link);
610
611         if (entry->address_space == block->address_space
612             && (dword_t)entry->address >= start_address
613             && ((dword_t)entry->address < end_address))
614         {
615             list_remove(&entry->link);
616             free(entry);
617         }
618     }
619
620     lock_acquire(&page_store_lock);
621
622     for (i = page_stores.next; i != &page_stores; i = i->next)
623     {
624         list_entry_t *j;
625         page_store_t *store = CONTAINER_OF(i, page_store_t, link);
626
627         for (j = store->entry_list.next; j != &store->entry_list; j = j->next)
628         {
629             page_store_entry_t *entry = CONTAINER_OF(j, page_store_entry_t, link);
630
631             if (entry->address_space == block->address_space
632                 && (dword_t)entry->address >= start_address
633                 && ((dword_t)entry->address < end_address))
634             {
635                 if (entry->number != INVALID_STORE_NUMBER) clear_bit(store->bitmap, entry->number);
636                 list_remove(&entry->link);
637                 free(entry);
638             }
639         }
640     }
641
642     lock_release(&page_store_lock);
643 }
644
645 static void free_blocks_recursive(memory_block_t *block)
646 {
647     release_memory_block(block);
648
649     if (block->by_addr_node.left)
650     {
651         memory_block_t *left_block = CONTAINER_OF(block->by_addr_node.left, memory_block_t, by_addr_node);
652         free_blocks_recursive(left_block);
653     }
654
655     if (block->by_addr_node.right)
656     {
657         memory_block_t *right_block = CONTAINER_OF(block->by_addr_node.right, memory_block_t, by_addr_node);
658         free_blocks_recursive(right_block);
659     }
660
661     mem_tree_free(block);
662 }
663
664 static memory_block_t *find_free_block_internal(memory_block_t *root, void *address, dword_t size)
665 {
666     avl_node_t *ptr;
667
668     if (root->by_size_node.left && (dword_t)root->size > size)
669     {
670         memory_block_t *left = CONTAINER_OF(root->by_size_node.left, memory_block_t, by_size_node);
671         memory_block_t *block = find_free_block_internal(left, address, size);
672         if (block) return block;
673     }
674
675     if ((dword_t)root->size >= size)
676     {
677         for (ptr = &root->by_size_node; ptr != NULL; ptr = ptr->next_equal)
678         {
679             memory_block_t *block = CONTAINER_OF(ptr, memory_block_t, by_size_node);
680
681             if (!(block->flags & MEMORY_BLOCK_FREE)) continue;
682
683             if (address != NULL)
684             {
685                 dword_t block_start = (dword_t)block->address;
686                 dword_t block_end = block_start + ((dword_t)block->size * PAGE_SIZE) - 1;
687
688                 dword_t needed_start = (dword_t)address;
689                 dword_t needed_end = needed_start + (size * PAGE_SIZE) - 1;
690
691                 if ((needed_start < block_start) || (needed_end > block_end)) continue;
692             }
693
694             return block;
695         }
696     }
697
698     if (!root->by_size_node.right) return NULL;
699     memory_block_t *right = CONTAINER_OF(root->by_size_node.right, memory_block_t, by_size_node);
700     return find_free_block_internal(right, address, size);
701 }
702
703 static memory_block_t *find_free_block(memory_address_space_t *address_space, void *address, dword_t size)
704 {
705     memory_block_t *root_block = CONTAINER_OF(address_space->by_size_tree.root, memory_block_t, by_size_node);
706     return find_free_block_internal(root_block, address, size);
707 }
708
709 static void *create_page_directory(void)
710 {
711     dword_t *current = (dword_t*)PAGE_DIRECTORY_ADDR;
712     dword_t new_dir_buffer[PAGE_SIZE / sizeof(dword_t)];
713
714     memset(&new_dir_buffer[USER_PAGE_START],
715            0,
716            (USER_PAGE_END - USER_PAGE_START + 1) * sizeof(dword_t));
717
718     memcpy(&new_dir_buffer[KERNEL_PAGE_START],
719            &current[KERNEL_PAGE_START],
720            (KERNEL_PAGE_END - KERNEL_PAGE_START + 1) * sizeof(dword_t));
721
722     void *directory = alloc_physical_page();
723     if (directory == NULL) return NULL;
724
725     new_dir_buffer[PAGEDIR_SELF_ENTRY] = (dword_t)directory | PAGE_PRESENT | PAGE_WRITABLE;
726     write_physical(directory, new_dir_buffer, PAGE_SIZE);
727
728     return directory;
729 }
730
731 static void fix_overlapping_sections(multiboot_tag_mmap_t *mmap)
732 {
733     multiboot_mmap_entry_t *entry;
734
735     for (entry = (multiboot_mmap_entry_t*)(mmap + 1);
736          (uintptr_t)entry < ((uintptr_t)mmap + mmap->size);
737          entry = (multiboot_mmap_entry_t*)((uintptr_t)entry + mmap->entry_size))
738     {
739         multiboot_mmap_entry_t *ptr;
740
741         for (ptr = (multiboot_mmap_entry_t*)(mmap + 1);
742              (uintptr_t)ptr < (uintptr_t)entry;
743              ptr = (multiboot_mmap_entry_t*)((uintptr_t)ptr + mmap->entry_size))
744         {
745             qword_t entry_end = entry->base + entry->length;
746             qword_t ptr_end = ptr->base + ptr->length;
747
748             if (entry->base > ptr->base && entry->base < ptr_end)
749             {
750                 entry->base = ptr_end;
751                 if (entry->base >= entry_end) entry->length = 0;
752                 else entry->length = entry_end - entry->base;
753             }
754             else if (ptr->base > entry->base && ptr->base < entry_end)
755             {
756                 ptr->base = entry_end;
757                 if (ptr->base >= ptr_end) ptr->length = 0;
758                 else ptr->length = ptr_end - ptr->base;
759             }
760         }
761     }
762 }
763
764 static inline memory_block_t *combine_blocks_forward(memory_block_t *mem_block)
765 {
766     while (TRUE)
767     {
768         avl_node_t *next = avl_get_next_node(&mem_block->by_addr_node);
769         if (!next) break;
770
771         memory_block_t *next_block = CONTAINER_OF(next, memory_block_t, by_addr_node);
772         if (!(next_block->flags & MEMORY_BLOCK_FREE)) break;
773
774         size_t new_size = mem_block->size + next_block->size;
775         avl_tree_change_key(&mem_block->address_space->by_size_tree, &mem_block->by_size_node, &new_size);
776
777         avl_tree_remove(&mem_block->address_space->by_addr_tree, &next_block->by_addr_node);
778         avl_tree_remove(&mem_block->address_space->by_size_tree, &next_block->by_size_node);
779         mem_tree_free(next_block);
780     }
781
782     return mem_block;
783 }
784
785 static inline memory_block_t *combine_blocks_backward(memory_block_t *mem_block)
786 {
787     while (TRUE)
788     {
789         avl_node_t *previous = avl_get_previous_node(&mem_block->by_addr_node);
790         if (!previous) break;
791
792         memory_block_t *prev_block = CONTAINER_OF(previous, memory_block_t, by_addr_node);
793         if (!(prev_block->flags & MEMORY_BLOCK_FREE)) break;
794
795         size_t new_size = prev_block->size + mem_block->size;
796         avl_tree_change_key(&mem_block->address_space->by_size_tree, &prev_block->by_size_node, &new_size);
797
798         avl_tree_remove(&mem_block->address_space->by_addr_tree, &mem_block->by_addr_node);
799         avl_tree_remove(&mem_block->address_space->by_size_tree, &mem_block->by_size_node);
800         mem_tree_free(mem_block);
801
802         mem_block = prev_block;
803     }
804
805     return mem_block;
806 }
807
808 void memory_cleanup(object_t *obj)
809 {
810     memory_section_t *section = (memory_section_t*)obj;
811     if (section->file) dereference(&section->file->header);
812 }
813
814 void *get_page_directory(void)
815 {
816     return current_page_directory;
817 }
818
819 void set_page_directory(void *phys_addr)
820 {
821     current_page_directory = phys_addr;
822     cpu_write_page_table_register((uintptr_t)phys_addr);
823 }
824
825 void *get_physical_address(void *virtual)
826 {
827     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
828     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
829     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
830     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
831
832     if (!(page_directory[pd_index] & PAGE_PRESENT)) return INVALID_PAGE;
833     if (!(page_table[pt_index] & PAGE_PRESENT)) return INVALID_PAGE;
834
835     return (void*)(PAGE_ALIGN(page_table[pt_index]) + PAGE_OFFSET((dword_t)virtual));
836 }
837
838 dword_t map_memory_internal(void *physical, void *virtual, uintptr_t size, dword_t page_flags)
839 {
840     dword_t i, j;
841     dword_t phys_addr = PAGE_ALIGN((dword_t)physical);
842     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
843     size = PAGE_ALIGN_UP(size);
844     page_flags &= 0xFFF;
845
846     for (i = 0; i < size; i += PAGE_SIZE)
847     {
848         dword_t ret = map_page((void*)(phys_addr + i), (void*)(virt_addr + i), page_flags);
849         if (ret != ERR_SUCCESS)
850         {
851             for (j = 0; j < i; j += PAGE_SIZE) unmap_page((void*)(virt_addr + j));
852             return ret;
853         }
854     }
855
856     return ERR_SUCCESS;
857 }
858
859 void unmap_memory_internal(void *virtual, dword_t size)
860 {
861     dword_t i;
862     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
863     size = PAGE_ALIGN_UP(size);
864
865     for (i = 0; i < size; i += PAGE_SIZE)
866     {
867         void *page_addr = (void*)(virt_addr + i);
868         void *physical = get_physical_address(page_addr);
869
870         unmap_page(page_addr);
871         dereference_page(physical);
872     }
873 }
874
875 dword_t map_memory_in_address_space(memory_address_space_t *address_space,
876                                     void *physical,
877                                     void **virtual,
878                                     uintptr_t size,
879                                     dword_t block_flags)
880 {
881     dword_t ret;
882     void *address = (void*)PAGE_ALIGN((uintptr_t)*virtual);
883     uintptr_t aligned_physical = PAGE_ALIGN((uintptr_t)physical);
884     if (*virtual != NULL && PAGE_OFFSET((uintptr_t)*virtual) != PAGE_OFFSET((uintptr_t)physical)) return ERR_INVALID;
885
886     size = (PAGE_ALIGN_UP((uintptr_t)physical + size - 1) - aligned_physical) >> 12;
887     lock_acquire(&address_space->lock);
888
889     memory_block_t *block = find_free_block(address_space, address, size);
890     if (block == NULL)
891     {
892         lock_release(&address_space->lock);
893         return ERR_NOMEMORY;
894     }
895
896     dword_t flags = PAGE_GLOBAL;
897     dword_t real_address = (address != NULL) ? (dword_t)address : (dword_t)block->address;
898
899     block_flags &= ~MEMORY_BLOCK_EVICTABLE;
900     if (block_flags & MEMORY_BLOCK_ACCESSIBLE) flags |= PAGE_PRESENT;
901     if (block_flags & MEMORY_BLOCK_WRITABLE) flags |= PAGE_WRITABLE;
902     if (block_flags & MEMORY_BLOCK_USERMODE) flags |= PAGE_USERMODE;
903
904     ret = map_memory_internal((void*)aligned_physical, (void*)real_address, size * PAGE_SIZE, flags);
905     if (ret != ERR_SUCCESS)
906     {
907         lock_release(&address_space->lock);
908         return ret;
909     }
910
911     if ((dword_t)block->address < (dword_t)address)
912     {
913         memory_block_t *new_block = mem_tree_alloc();
914         new_block->flags = MEMORY_BLOCK_FREE;
915         new_block->address = block->address;
916         new_block->size = (size_t)(((dword_t)address - block->address) / PAGE_SIZE);
917         new_block->address_space = address_space;
918         new_block->section = NULL;
919
920         size_t new_size = block->size - new_block->size;
921         avl_tree_change_key(&address_space->by_size_tree, &block->by_size_node, &new_size);
922         avl_tree_change_key(&address_space->by_addr_tree, &block->by_addr_node, &address);
923
924         avl_tree_insert(&address_space->by_addr_tree, &new_block->by_addr_node);
925         avl_tree_insert(&address_space->by_size_tree, &new_block->by_size_node);
926
927         combine_blocks_backward(new_block);
928     }
929
930     if (block->size > size)
931     {
932         memory_block_t *new_block = mem_tree_alloc();
933         new_block->flags = MEMORY_BLOCK_FREE;
934         new_block->address = (qword_t)(block->address + (size * PAGE_SIZE));
935         new_block->size = (qword_t)((dword_t)block->size - size);
936         new_block->address_space = address_space;
937         new_block->section = NULL;
938
939         avl_tree_change_key(&address_space->by_size_tree, &block->by_size_node, &size);
940
941         avl_tree_insert(&address_space->by_addr_tree, &new_block->by_addr_node);
942         avl_tree_insert(&address_space->by_size_tree, &new_block->by_size_node);
943
944         combine_blocks_forward(new_block);
945     }
946
947     block->flags = block_flags;
948     *virtual = (void*)((dword_t)block->address + PAGE_OFFSET((uintptr_t)physical));
949
950     lock_release(&address_space->lock);
951     return ERR_SUCCESS;
952 }
953
954 dword_t pin_memory(const void *virtual, void **pinned, uintptr_t size, bool_t lock_contents)
955 {
956     uintptr_t i;
957     uintptr_t virt_addr = PAGE_ALIGN((uintptr_t)virtual);
958     void *address = (void*)PAGE_ALIGN((uintptr_t)*pinned);
959     size = 1 + ((((uintptr_t)virtual + size - 1) - virt_addr) >> 12);
960     if (commit_pages((void*)virt_addr, size * PAGE_SIZE) != ERR_SUCCESS) return ERR_BADPTR;
961
962     memory_address_space_t *address_space = check_usermode(virtual, 1) ? &get_current_process()->memory_space : &kernel_address_space;
963     lock_acquire_shared(&address_space->lock);
964     lock_acquire(&mapping_space.lock);
965
966     memory_block_t *block = find_free_block(&mapping_space, address, size);
967     if (block == NULL)
968     {
969         lock_release(&address_space->lock);
970         lock_release(&mapping_space.lock);
971         return ERR_NOMEMORY;
972     }
973
974     dword_t real_address = (address != NULL) ? (dword_t)address : (dword_t)block->address;
975     dword_t new_flags = PAGE_PRESENT | PAGE_GLOBAL;
976     if (!lock_contents) new_flags |= PAGE_WRITABLE;
977
978     for (i = 0; i < size; i++)
979     {
980         void *virt_page = (void*)(virt_addr + i * PAGE_SIZE);
981         void *phys_page = get_physical_address(virt_page);
982         ASSERT(phys_page != INVALID_PAGE);
983
984         if (lock_contents)
985         {
986             memory_block_t *block = find_block_by_addr(address_space, (void*)(virt_addr + i));
987             ASSERT(block != NULL);
988             block->flags |= MEMORY_BLOCK_COPY_ON_WRITE;
989             set_page_flags(virt_page, get_page_flags(virt_page) & ~PAGE_WRITABLE);
990         }
991
992         dword_t ret = map_page(phys_page, (void*)(real_address + i * PAGE_SIZE), new_flags);
993         ASSERT(ret == ERR_SUCCESS);
994         reference_page(phys_page);
995     }
996
997     if ((dword_t)block->address < (dword_t)address)
998     {
999         memory_block_t *new_block = mem_tree_alloc();
1000         new_block->flags = MEMORY_BLOCK_FREE;
1001         new_block->address = block->address;
1002         new_block->size = (size_t)(((dword_t)address - block->address) / PAGE_SIZE);
1003         new_block->address_space = &mapping_space;
1004         new_block->section = NULL;
1005
1006         size_t new_size = block->size - new_block->size;
1007         avl_tree_change_key(&mapping_space.by_size_tree, &block->by_size_node, &new_size);
1008         avl_tree_change_key(&mapping_space.by_addr_tree, &block->by_addr_node, &address);
1009
1010         avl_tree_insert(&mapping_space.by_addr_tree, &new_block->by_addr_node);
1011         avl_tree_insert(&mapping_space.by_size_tree, &new_block->by_size_node);
1012
1013         combine_blocks_backward(new_block);
1014     }
1015
1016     if ((dword_t)block->size > size)
1017     {
1018         memory_block_t *new_block = mem_tree_alloc();
1019         new_block->flags = MEMORY_BLOCK_FREE;
1020         new_block->address = (qword_t)(block->address + (size * PAGE_SIZE));
1021         new_block->size = (qword_t)((dword_t)block->size - size);
1022         new_block->address_space = &mapping_space;
1023         new_block->section = NULL;
1024
1025         avl_tree_change_key(&mapping_space.by_size_tree, &block->by_size_node, &size);
1026
1027         avl_tree_insert(&mapping_space.by_addr_tree, &new_block->by_addr_node);
1028         avl_tree_insert(&mapping_space.by_size_tree, &new_block->by_size_node);
1029
1030         combine_blocks_forward(new_block);
1031     }
1032
1033     block->flags = MEMORY_BLOCK_ACCESSIBLE;
1034     if (!lock_contents) block->flags |= MEMORY_BLOCK_WRITABLE;
1035     *pinned = (void*)((dword_t)block->address) + PAGE_OFFSET((uintptr_t)virtual);
1036
1037     lock_release(&address_space->lock);
1038     lock_release(&mapping_space.lock);
1039     return ERR_SUCCESS;
1040 }
1041
1042 dword_t unmap_memory_in_address_space(memory_address_space_t *address_space, void *virtual)
1043 {
1044     lock_acquire(&mapping_space.lock);
1045     uintptr_t aligned_address = PAGE_ALIGN((uintptr_t)virtual);
1046
1047     avl_node_t *node = avl_tree_lookup(&mapping_space.by_addr_tree, &aligned_address);
1048     if (node == NULL)
1049     {
1050         lock_release(&mapping_space.lock);
1051         return ERR_INVALID;
1052     }
1053
1054     memory_block_t *mem_block = CONTAINER_OF(node, memory_block_t, by_addr_node);
1055     if (mem_block->flags & MEMORY_BLOCK_FREE)
1056     {
1057         lock_release(&mapping_space.lock);
1058         return ERR_INVALID;
1059     }
1060
1061     unmap_memory_internal((void*)((dword_t)mem_block->address), (dword_t)mem_block->size * PAGE_SIZE);
1062
1063     mem_block->flags = MEMORY_BLOCK_FREE;
1064     mem_block = combine_blocks_backward(mem_block);
1065     mem_block = combine_blocks_forward(mem_block);
1066
1067     lock_release(&mapping_space.lock);
1068     return ERR_SUCCESS;
1069 }
1070
1071 dword_t map_memory(void *physical, void **virtual, uintptr_t size, dword_t block_flags)
1072 {
1073     return map_memory_in_address_space(&mapping_space, physical, virtual, size, block_flags);
1074 }
1075
1076 dword_t unmap_memory(void *virtual)
1077 {
1078     return unmap_memory_in_address_space(&mapping_space, virtual);
1079 }
1080
1081 dword_t alloc_memory_in_address_space(memory_address_space_t *address_space,
1082                                       void **address,
1083                                       dword_t size,
1084                                       dword_t block_flags,
1085                                       memory_section_t *section,
1086                                       qword_t section_offset)
1087 {
1088     void *base_address = (void*)PAGE_ALIGN((uintptr_t)*address);
1089
1090     block_flags &= ~(MEMORY_BLOCK_FREE | MEMORY_BLOCK_COPY_ON_WRITE);
1091     size = PAGE_ALIGN_UP(size) >> 12;
1092     if (size == 0) return ERR_INVALID;
1093
1094     lock_acquire(&address_space->lock);
1095
1096     memory_block_t *block = find_free_block(address_space, base_address, size);
1097     if (block == NULL)
1098     {
1099         lock_release(&address_space->lock);
1100         return ERR_NOMEMORY;
1101     }
1102
1103     if (section)
1104     {
1105         reference(&section->header);
1106         block->section = section;
1107         block->section_offset = section_offset;
1108
1109         if ((section->flags & (MEMORY_SECTION_WRITABLE | MEMORY_SECTION_DIRECT_WRITE)) == MEMORY_SECTION_WRITABLE)
1110         {
1111             block_flags |= MEMORY_BLOCK_COPY_ON_WRITE;
1112         }
1113     }
1114
1115     if ((dword_t)block->address < (dword_t)base_address)
1116     {
1117         memory_block_t *new_block = mem_tree_alloc();
1118         new_block->flags = MEMORY_BLOCK_FREE;
1119         new_block->address = block->address;
1120         new_block->size = (size_t)(((dword_t)base_address - block->address) / PAGE_SIZE);
1121         new_block->address_space = address_space;
1122         new_block->section = NULL;
1123
1124         size_t new_size = block->size - new_block->size;
1125         avl_tree_change_key(&address_space->by_size_tree, &block->by_size_node, &new_size);
1126         avl_tree_change_key(&address_space->by_addr_tree, &block->by_addr_node, &base_address);
1127
1128         avl_tree_insert(&address_space->by_addr_tree, &new_block->by_addr_node);
1129         avl_tree_insert(&address_space->by_size_tree, &new_block->by_size_node);
1130
1131         combine_blocks_backward(new_block);
1132     }
1133
1134     if ((dword_t)block->size > size)
1135     {
1136         memory_block_t *new_block = mem_tree_alloc();
1137         new_block->flags = MEMORY_BLOCK_FREE;
1138         new_block->address = (qword_t)(block->address + (size * PAGE_SIZE));
1139         new_block->size = (qword_t)((dword_t)block->size - size);
1140         new_block->address_space = address_space;
1141         new_block->section = NULL;
1142
1143         avl_tree_change_key(&address_space->by_size_tree, &block->by_size_node, &size);
1144
1145         avl_tree_insert(&address_space->by_addr_tree, &new_block->by_addr_node);
1146         avl_tree_insert(&address_space->by_size_tree, &new_block->by_size_node);
1147
1148         combine_blocks_forward(new_block);
1149     }
1150
1151     block->flags = block_flags;
1152     *address = (void*)((dword_t)block->address);
1153     if (block_flags & MEMORY_BLOCK_EVICTABLE) list_append(&address_space->evictable_blocks, &block->evict_link);
1154
1155     lock_release(&address_space->lock);
1156     return ERR_SUCCESS;
1157 }
1158
1159 dword_t free_memory_in_address_space(memory_address_space_t *address_space, void *address)
1160 {
1161     lock_acquire(&address_space->lock);
1162     uintptr_t aligned_address = PAGE_ALIGN((uintptr_t)address);
1163
1164     avl_node_t *node = avl_tree_lookup(&address_space->by_addr_tree, &aligned_address);
1165     if (node == NULL)
1166     {
1167         lock_release(&address_space->lock);
1168         return ERR_INVALID;
1169     }
1170
1171     memory_block_t *mem_block = CONTAINER_OF(node, memory_block_t, by_addr_node);
1172     if (mem_block->flags & MEMORY_BLOCK_FREE)
1173     {
1174         lock_release(&address_space->lock);
1175         return ERR_INVALID;
1176     }
1177
1178     release_memory_block(mem_block);
1179
1180     if (mem_block->flags & MEMORY_BLOCK_EVICTABLE) list_remove(&mem_block->evict_link);
1181     mem_block->flags = MEMORY_BLOCK_FREE;
1182
1183     mem_block = combine_blocks_backward(mem_block);
1184     mem_block = combine_blocks_forward(mem_block);
1185
1186     lock_release(&address_space->lock);
1187     return ERR_SUCCESS;
1188 }
1189
1190 dword_t commit_pages(void *address, size_t size)
1191 {
1192     uintptr_t i;
1193     uintptr_t first_page = PAGE_ALIGN((uintptr_t)address);
1194     uintptr_t last_page = PAGE_ALIGN_UP(first_page + size - 1);
1195
1196     EH_TRY
1197     {
1198         for (i = first_page; i <= last_page; i += PAGE_SIZE)
1199         {
1200             volatile uintptr_t value = *(volatile uintptr_t*)i;
1201             UNUSED_PARAMETER(value);
1202         }
1203     }
1204     EH_CATCH
1205     {
1206         EH_ESCAPE(return ERR_BADPTR);
1207     }
1208     EH_DONE;
1209
1210     return ERR_SUCCESS;
1211 }
1212
1213 dword_t uncommit_pages(void *address, size_t size)
1214 {
1215     uintptr_t i;
1216     uintptr_t first_page = PAGE_ALIGN((uintptr_t)address);
1217     uintptr_t last_page = PAGE_ALIGN_UP(first_page + size - 1);
1218
1219     EH_TRY
1220     {
1221         for (i = first_page; i <= last_page; i += PAGE_SIZE)
1222         {
1223             volatile uintptr_t value = *(volatile uintptr_t*)i;
1224             UNUSED_PARAMETER(value);
1225
1226             dword_t ret = unmap_page((void*)i);
1227             if (ret != ERR_SUCCESS) return ret;
1228         }
1229     }
1230     EH_CATCH
1231     {
1232         EH_ESCAPE(return ERR_BADPTR);
1233     }
1234     EH_DONE;
1235
1236     return ERR_SUCCESS;
1237 }
1238
1239 dword_t read_physical(void *physical, void *buffer, dword_t size)
1240 {
1241     critical_t critical;
1242     dword_t ret = ERR_SUCCESS;
1243     dword_t page;
1244     dword_t first_page = PAGE_ALIGN((dword_t)physical);
1245     dword_t last_page = PAGE_ALIGN((dword_t)physical + size - 1);
1246     dword_t offset = PAGE_OFFSET((dword_t)physical);
1247
1248     enter_critical(&critical);
1249
1250     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1251     {
1252         dword_t length = ((page == last_page) ? ((dword_t)physical + size - page) : PAGE_SIZE) - offset;
1253
1254         void *mapping = map_temporary_page((void*)page, PAGE_PRESENT);
1255         if (mapping == NULL) return ERR_NOMEMORY;
1256
1257         memcpy(buffer, (void*)((dword_t)mapping + offset), length);
1258         unmap_temporary_page(mapping);
1259
1260         buffer = (void*)((dword_t)buffer + length);
1261         offset = 0;
1262     }
1263
1264     leave_critical(&critical);
1265     return ret;
1266 }
1267
1268 dword_t write_physical(void *physical, void *buffer, dword_t size)
1269 {
1270     critical_t critical;
1271     dword_t ret = ERR_SUCCESS;
1272     dword_t page;
1273     dword_t first_page = PAGE_ALIGN((dword_t)physical);
1274     dword_t last_page = PAGE_ALIGN((dword_t)physical + size - 1);
1275     dword_t offset = PAGE_OFFSET((dword_t)physical);
1276
1277     enter_critical(&critical);
1278
1279     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1280     {
1281         dword_t length = ((page == last_page) ? ((dword_t)physical + size - page) : PAGE_SIZE) - offset;
1282
1283         void *mapping = map_temporary_page((void*)page, PAGE_PRESENT | PAGE_WRITABLE);
1284         if (mapping == NULL) return ERR_NOMEMORY;
1285
1286         memcpy((void*)((dword_t)mapping + offset), buffer, length);
1287         unmap_temporary_page(mapping);
1288
1289         buffer = (void*)((dword_t)buffer + length);
1290         offset = 0;
1291     }
1292
1293     leave_critical(&critical);
1294     return ret;
1295 }
1296
1297 sysret_t syscall_alloc_memory(handle_t process, void **address, dword_t size, dword_t flags)
1298 {
1299     process_t *proc;
1300     dword_t ret = ERR_SUCCESS;
1301     void *safe_address;
1302     void **local_address = address;
1303
1304     if (get_previous_mode() == USER_MODE)
1305     {
1306         flags &= MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_ACCESSIBLE;
1307         flags |= MEMORY_BLOCK_USERMODE | MEMORY_BLOCK_EVICTABLE;
1308
1309         if (!check_usermode(address, sizeof(void*))) return ERR_BADPTR;
1310
1311         EH_TRY
1312         {
1313             safe_address = *address;
1314             local_address = &safe_address;
1315         }
1316         EH_CATCH
1317         {
1318             EH_ESCAPE(return ERR_BADPTR);
1319         }
1320         EH_DONE;
1321     }
1322
1323     if (process != INVALID_HANDLE)
1324     {
1325         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1326     }
1327     else
1328     {
1329         proc = get_current_process();
1330         reference(&proc->header);
1331     }
1332
1333     ret = alloc_memory_in_address_space(&proc->memory_space, local_address, size, flags, NULL, 0ULL);
1334
1335     if (get_previous_mode() == USER_MODE)
1336     {
1337         EH_TRY *address = safe_address;
1338         EH_DONE;
1339     }
1340
1341     dereference(&proc->header);
1342     return ret;
1343 }
1344
1345 sysret_t syscall_free_memory(handle_t process, void *address)
1346 {
1347     dword_t ret = ERR_SUCCESS;
1348     process_t *proc;
1349
1350     if (process != INVALID_HANDLE)
1351     {
1352         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1353     }
1354     else
1355     {
1356         proc = get_current_process();
1357         reference(&proc->header);
1358     }
1359
1360     ret = free_memory_in_address_space(&proc->memory_space, address);
1361
1362     dereference(&proc->header);
1363     return ret;
1364 }
1365
1366 sysret_t syscall_commit_memory(handle_t process, void *address, dword_t size)
1367 {
1368     dword_t ret = ERR_SUCCESS;
1369     process_t *proc;
1370
1371     if (get_previous_mode() == USER_MODE && !check_usermode(address, size)) return ERR_BADPTR;
1372
1373     if (process == INVALID_HANDLE)
1374     {
1375         proc = get_current_process();
1376         reference(&proc->header);
1377     }
1378     else
1379     {
1380         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1381     }
1382
1383     if (proc->terminating) return ERR_CANCELED;
1384     lock_acquire_shared(&proc->memory_space.lock);
1385
1386     process_t *prev_proc = switch_process(proc);
1387     ret = commit_pages(address, size);
1388     switch_process(prev_proc);
1389
1390     lock_release(&proc->memory_space.lock);
1391     dereference(&proc->header);
1392     return ret;
1393 }
1394
1395 sysret_t syscall_uncommit_memory(handle_t process, void *address, dword_t size)
1396 {
1397     dword_t ret = ERR_SUCCESS;
1398     process_t *proc;
1399
1400     if (get_previous_mode() == USER_MODE && !check_usermode(address, size)) return ERR_BADPTR;
1401
1402     if (process == INVALID_HANDLE)
1403     {
1404         proc = get_current_process();
1405         reference(&proc->header);
1406     }
1407     else
1408     {
1409         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1410     }
1411
1412     if (proc->terminating) return ERR_CANCELED;
1413     lock_acquire_shared(&proc->memory_space.lock);
1414
1415     process_t *prev_proc = switch_process(proc);
1416     ret = uncommit_pages(address, size);
1417     switch_process(prev_proc);
1418
1419     lock_release(&proc->memory_space.lock);
1420     dereference(&proc->header);
1421     return ret;
1422 }
1423
1424 sysret_t syscall_set_memory_flags(handle_t process, void *address, dword_t flags)
1425 {
1426     dword_t ret = ERR_SUCCESS;
1427     process_t *proc;
1428
1429     flags &= ~(MEMORY_BLOCK_FREE | MEMORY_BLOCK_COPY_ON_WRITE);
1430     if (get_previous_mode() == USER_MODE) flags |= MEMORY_BLOCK_USERMODE | MEMORY_BLOCK_EVICTABLE;
1431
1432     if (process != INVALID_HANDLE)
1433     {
1434         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1435     }
1436     else
1437     {
1438         proc = get_current_process();
1439         reference(&proc->header);
1440     }
1441
1442     process_t *prev_proc = switch_process(proc);
1443     lock_acquire(&proc->memory_space.lock);
1444
1445     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1446     if (block == NULL)
1447     {
1448         ret = ERR_INVALID;
1449         goto cleanup;
1450     }
1451
1452     if (block->section)
1453     {
1454         if ((flags & MEMORY_BLOCK_WRITABLE) && !(block->section->flags & MEMORY_SECTION_WRITABLE))
1455         {
1456             ret = ERR_FORBIDDEN;
1457             goto cleanup;
1458         }
1459     }
1460
1461     if (block->flags & MEMORY_BLOCK_FREE)
1462     {
1463         ret = ERR_INVALID;
1464         goto cleanup;
1465     }
1466
1467     dword_t page;
1468     dword_t start_address = (dword_t)block->address;
1469     dword_t end_address = start_address + (dword_t)block->size * PAGE_SIZE;
1470     dword_t page_flags = 0;
1471
1472     if (flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
1473     if (flags & MEMORY_BLOCK_WRITABLE) page_flags |= PAGE_WRITABLE;
1474
1475     if (flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
1476     else page_flags |= PAGE_GLOBAL;
1477
1478     for (page = start_address; page < end_address; page += PAGE_SIZE)
1479     {
1480         set_page_flags((void*)page, page_flags);
1481     }
1482
1483     if (!(block->flags & MEMORY_BLOCK_EVICTABLE) && (flags & MEMORY_BLOCK_EVICTABLE))
1484     {
1485         list_append(&proc->memory_space.evictable_blocks, &block->evict_link);
1486     }
1487     else if ((block->flags & MEMORY_BLOCK_EVICTABLE) && !(flags & MEMORY_BLOCK_EVICTABLE))
1488     {
1489         list_remove(&block->evict_link);
1490     }
1491
1492     block->flags &= MEMORY_BLOCK_COPY_ON_WRITE;
1493     block->flags |= flags;
1494
1495 cleanup:
1496     lock_release(&proc->memory_space.lock);
1497     switch_process(prev_proc);
1498     dereference(&proc->header);
1499     return ret;
1500 }
1501
1502 sysret_t syscall_query_memory(handle_t process, void *address, memory_block_info_t *info)
1503 {
1504     dword_t ret = ERR_SUCCESS;
1505     process_t *proc;
1506
1507     if ((get_previous_mode() == USER_MODE) && !check_usermode(info, sizeof(memory_block_info_t)))
1508     {
1509         return ERR_BADPTR;
1510     }
1511
1512     if (process != INVALID_HANDLE)
1513     {
1514         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1515     }
1516     else
1517     {
1518         proc = get_current_process();
1519         reference(&proc->header);
1520     }
1521
1522     lock_acquire_shared(&proc->memory_space.lock);
1523
1524     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1525     if (block == NULL)
1526     {
1527         ret = ERR_INVALID;
1528         goto cleanup;
1529     }
1530
1531     EH_TRY
1532     {
1533         info->address = block->address;
1534         info->size = block->size;
1535         info->flags = block->flags;
1536     }
1537     EH_CATCH
1538     {
1539         ret = ERR_BADPTR;
1540     }
1541     EH_DONE;
1542
1543 cleanup:
1544     lock_release(&proc->memory_space.lock);
1545     dereference(&proc->header);
1546     return ret;
1547 }
1548
1549 sysret_t syscall_read_memory(handle_t process, void *address, void *buffer, dword_t size)
1550 {
1551     dword_t ret = ERR_SUCCESS;
1552     process_t *proc;
1553     byte_t page_cache[PAGE_SIZE];
1554
1555     if (get_previous_mode() == USER_MODE && !check_usermode(buffer, size)) return ERR_BADPTR;
1556
1557     if (process == INVALID_HANDLE)
1558     {
1559         EH_TRY
1560         {
1561             memmove(buffer, address, size);
1562             return ERR_SUCCESS;
1563         }
1564         EH_CATCH
1565         {
1566             EH_ESCAPE(return ERR_FORBIDDEN);
1567         }
1568         EH_DONE;
1569     }
1570
1571     if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1572     if (proc->terminating) return ERR_CANCELED;
1573
1574     lock_acquire_shared(&proc->memory_space.lock);
1575
1576     dword_t page;
1577     dword_t first_page = PAGE_ALIGN((dword_t)address);
1578     dword_t last_page = PAGE_ALIGN((dword_t)address + size - 1);
1579     dword_t offset = PAGE_OFFSET((dword_t)address);
1580
1581     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1582     {
1583         dword_t length = ((page == last_page) ? ((dword_t)address + size - page) : PAGE_SIZE) - offset;
1584
1585         process_t *prev_proc = switch_process(proc);
1586
1587         EH_TRY memcpy(&page_cache[offset], (void*)(page + offset), length);
1588         EH_CATCH ret = ERR_FORBIDDEN;
1589         EH_DONE;
1590
1591         switch_process(prev_proc);
1592         if (ret != ERR_SUCCESS) break;
1593
1594         EH_TRY memcpy(buffer, &page_cache[offset], length);
1595         EH_CATCH ret = ERR_BADPTR;
1596         EH_DONE;
1597
1598         buffer = (void*)((dword_t)buffer + length);
1599         offset = 0;
1600         if (ret != ERR_SUCCESS) break;
1601     }
1602
1603     lock_release(&proc->memory_space.lock);
1604     dereference(&proc->header);
1605     return ret;
1606 }
1607
1608 sysret_t syscall_write_memory(handle_t process, void *address, void *buffer, dword_t size)
1609 {
1610     dword_t ret = ERR_SUCCESS;
1611     process_t *proc;
1612     byte_t page_cache[PAGE_SIZE];
1613
1614     if (get_previous_mode() == USER_MODE && !check_usermode(buffer, size)) return ERR_BADPTR;
1615
1616     if (process == INVALID_HANDLE)
1617     {
1618         EH_TRY
1619         {
1620             memmove(address, buffer, size);
1621             return ERR_SUCCESS;
1622         }
1623         EH_CATCH
1624         {
1625             EH_ESCAPE(return ERR_FORBIDDEN);
1626         }
1627         EH_DONE;
1628     }
1629
1630     if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1631     if (proc->terminating) return ERR_CANCELED;
1632
1633     lock_acquire(&proc->memory_space.lock);
1634
1635     dword_t page;
1636     dword_t first_page = PAGE_ALIGN((dword_t)address);
1637     dword_t last_page = PAGE_ALIGN((dword_t)address + size - 1);
1638     dword_t offset = PAGE_OFFSET((dword_t)address);
1639
1640     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1641     {
1642         dword_t length = ((page == last_page) ? ((dword_t)address + size - page) : PAGE_SIZE) - offset;
1643
1644         EH_TRY memcpy(&page_cache[offset], buffer, length);
1645         EH_CATCH ret = ERR_BADPTR;
1646         EH_DONE;
1647
1648         if (ret != ERR_SUCCESS) break;
1649         process_t *prev_proc = switch_process(proc);
1650
1651         EH_TRY memcpy((void*)(page + offset), &page_cache[offset], length);
1652         EH_CATCH ret = ERR_FORBIDDEN;
1653         EH_DONE;
1654
1655         switch_process(prev_proc);
1656
1657         buffer = (void*)((dword_t)buffer + length);
1658         offset = 0;
1659         if (ret != ERR_SUCCESS) break;
1660     }
1661
1662     lock_release(&proc->memory_space.lock);
1663     dereference(&proc->header);
1664     return ret;
1665 }
1666
1667 void *alloc_pool(void *address, dword_t size, dword_t block_flags)
1668 {
1669     size = PAGE_ALIGN_UP(size);
1670     void *result = address;
1671
1672     if (alloc_memory_in_address_space(&kernel_address_space,
1673                                       &result,
1674                                       size,
1675                                       block_flags,
1676                                       NULL,
1677                                       0ULL) == ERR_SUCCESS)
1678     {
1679         return result;
1680     }
1681     else
1682     {
1683         return NULL;
1684     }
1685 }
1686
1687 void free_pool(void *address)
1688 {
1689     free_memory_in_address_space(&kernel_address_space, address);
1690 }
1691
1692 sysret_t syscall_create_memory_section(const char *name, handle_t file, size_t max_size, dword_t flags, handle_t *handle)
1693 {
1694     dword_t ret = ERR_SUCCESS;
1695     handle_t safe_handle;
1696     char *safe_name = NULL;
1697
1698     flags &= MEMORY_SECTION_WRITABLE | MEMORY_SECTION_DIRECT_WRITE;
1699     if (flags & MEMORY_SECTION_DIRECT_WRITE) flags |= MEMORY_SECTION_WRITABLE;
1700
1701     if (get_previous_mode() == USER_MODE)
1702     {
1703         dword_t name_length = 0;
1704
1705         EH_TRY name_length = strlen(name);
1706         EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1707         EH_DONE;
1708
1709         if (!check_usermode(name, name_length + 1)) return ERR_BADPTR;
1710         if (!check_usermode(handle, sizeof(handle_t))) return ERR_BADPTR;
1711
1712         safe_name = copy_user_string(name);
1713         if (safe_name == NULL) return ERR_BADPTR;
1714     }
1715     else
1716     {
1717         safe_name = (char*)name;
1718     }
1719
1720     memory_section_t *section = (memory_section_t*)malloc(sizeof(memory_section_t));
1721     if (section == NULL)
1722     {
1723         ret = ERR_NOMEMORY;
1724         goto cleanup;
1725     }
1726
1727     file_instance_t *file_instance = NULL;
1728     if (file != INVALID_HANDLE)
1729     {
1730         if (!reference_by_handle(file, OBJECT_FILE_INSTANCE, (object_t**)&file_instance))
1731         {
1732             ret = ERR_INVALID;
1733             goto cleanup;
1734         }
1735     }
1736
1737     list_init(&section->page_list);
1738     section->flags = flags;
1739     section->size = max_size;
1740     section->file = file != INVALID_HANDLE ? file_instance : NULL;
1741
1742     init_object(&section->header, safe_name, OBJECT_MEMORY);
1743     ret = create_object(&section->header);
1744     if (ret != ERR_SUCCESS)
1745     {
1746         if (file_instance) dereference(&file_instance->header);
1747         if (section->header.name) free(section->header.name);
1748         free(section);
1749         section = NULL;
1750         goto cleanup;
1751     }
1752
1753     ret = open_object(&section->header, 0, &safe_handle);
1754     if (ret == ERR_SUCCESS)
1755     {
1756         EH_TRY
1757         {
1758             *handle = safe_handle;
1759         }
1760         EH_CATCH
1761         {
1762             syscall_close_object(safe_handle);
1763             ret = ERR_BADPTR;
1764         }
1765         EH_DONE;
1766     }
1767
1768 cleanup:
1769     if (section) dereference(&section->header);
1770     if (get_previous_mode() == USER_MODE) free(safe_name);
1771
1772     return ret;
1773 }
1774
1775 sysret_t syscall_open_memory_section(const char *name, handle_t *handle)
1776 {
1777     handle_t safe_handle;
1778     char *safe_name = NULL;
1779
1780     if (get_previous_mode() == USER_MODE)
1781     {
1782         dword_t name_length = 0;
1783
1784         EH_TRY name_length = strlen(name);
1785         EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1786         EH_DONE;
1787
1788         if (!check_usermode(name, name_length + 1)) return ERR_BADPTR;
1789         if (!check_usermode(handle, sizeof(handle_t))) return ERR_BADPTR;
1790
1791         safe_name = copy_user_string(name);
1792         if (safe_name == NULL) return ERR_NOMEMORY;
1793     }
1794     else safe_name = (char*)name;
1795
1796     dword_t ret = open_object_by_name(safe_name, OBJECT_MEMORY, 0, &safe_handle);
1797
1798     EH_TRY
1799     {
1800         *handle = safe_handle;
1801     }
1802     EH_CATCH
1803     {
1804         syscall_close_object(safe_handle);
1805         ret = ERR_BADPTR;
1806     }
1807     EH_DONE;
1808
1809     if (get_previous_mode() == USER_MODE) free(safe_name);
1810     return ret;
1811 }
1812
1813 sysret_t syscall_map_memory_section(handle_t process, handle_t section, void **address, qword_t offset, size_t size, dword_t flags)
1814 {
1815     dword_t ret = ERR_SUCCESS;
1816     process_t *proc = NULL;
1817     memory_section_t *mem_sec = NULL;
1818     void *safe_address;
1819
1820     if (PAGE_OFFSET(offset) != 0) return ERR_INVALID;
1821
1822     if (process != INVALID_HANDLE)
1823     {
1824         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc))
1825         {
1826             ret = ERR_INVALID;
1827             goto cleanup;
1828         }
1829     }
1830     else
1831     {
1832         proc = get_current_process();
1833         reference(&proc->header);
1834     }
1835
1836     if (!reference_by_handle(section, OBJECT_MEMORY, (object_t**)&mem_sec))
1837     {
1838         ret = ERR_INVALID;
1839         goto cleanup;
1840     }
1841
1842     if (get_previous_mode() == USER_MODE)
1843     {
1844         if (!check_usermode(address, sizeof(void*)))
1845         {
1846             ret = ERR_BADPTR;
1847             goto cleanup;
1848         }
1849
1850         EH_TRY safe_address = *address;
1851         EH_CATCH ret = ERR_BADPTR;
1852         EH_DONE;
1853
1854         if (ret != ERR_SUCCESS) goto cleanup;
1855     }
1856     else
1857     {
1858         safe_address = *address;
1859     }
1860
1861     if ((flags & MEMORY_BLOCK_WRITABLE) && !(mem_sec->flags & MEMORY_SECTION_WRITABLE))
1862     {
1863         ret = ERR_FORBIDDEN;
1864         goto cleanup;
1865     }
1866
1867     ret = alloc_memory_in_address_space(&proc->memory_space, &safe_address, size, flags, mem_sec, offset);
1868     if (ret != ERR_SUCCESS) goto cleanup;
1869
1870     EH_TRY *address = safe_address;
1871     EH_DONE;
1872
1873 cleanup:
1874     if (proc) dereference(&proc->header);
1875     if (mem_sec) dereference(&mem_sec->header);
1876     return ret;
1877 }
1878
1879 sysret_t syscall_flush_memory_section(handle_t process, void *address)
1880 {
1881     dword_t ret = ERR_SUCCESS;
1882     process_t *proc = NULL;
1883
1884     if (process != INVALID_HANDLE)
1885     {
1886         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc))
1887         {
1888             ret = ERR_INVALID;
1889             goto cleanup;
1890         }
1891     }
1892     else
1893     {
1894         proc = get_current_process();
1895         reference(&proc->header);
1896     }
1897
1898     lock_acquire_shared(&proc->memory_space.lock);
1899
1900     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1901     if (block == NULL || block->section == NULL)
1902     {
1903         ret = ERR_INVALID;
1904         goto cleanup;
1905     }
1906
1907     if (block->section->file == NULL) goto cleanup;
1908
1909     list_entry_t *ptr;
1910
1911     for (ptr = block->section->page_list.next; ptr != &block->section->page_list; ptr = ptr->next)
1912     {
1913         dword_t bytes_written;
1914         byte_t buffer[PAGE_SIZE];
1915         shared_page_t *shared = CONTAINER_OF(ptr, shared_page_t, link);
1916
1917         ret = read_physical(shared->physical, buffer, PAGE_SIZE);
1918         if (ret != ERR_SUCCESS) continue;
1919
1920         file_instance_t *file = block->section->file;
1921         lock_acquire(&file->global->volume->lock);
1922         ret = file->global->volume->driver->write_file(file, buffer, shared->offset, PAGE_SIZE, &bytes_written);
1923         lock_release(&file->global->volume->lock);
1924         if (ret != ERR_SUCCESS) break;
1925     }
1926
1927 cleanup:
1928     lock_release(&proc->memory_space.lock);
1929     dereference(&proc->header);
1930     return ret;
1931 }
1932
1933 sysret_t syscall_add_page_file(const char *path, dword_t max_entries)
1934 {
1935     dword_t ret;
1936     char *safe_path = NULL;
1937     if (max_entries == INVALID_STORE_NUMBER) max_entries--;
1938
1939     if (get_previous_mode() == USER_MODE)
1940     {
1941         if (!check_privileges(PRIVILEGE_SET_PAGE_FILE)) return ERR_FORBIDDEN;
1942
1943         if (path)
1944         {
1945             dword_t path_length = 0;
1946
1947             EH_TRY path_length = strlen(path);
1948             EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1949             EH_DONE;
1950
1951             if (!check_usermode(path, path_length + 1)) return ERR_BADPTR;
1952
1953             safe_path = copy_user_string(path);
1954             if (!safe_path) return ERR_NOMEMORY;
1955         }
1956     }
1957     else safe_path = (char*)path;
1958
1959     page_store_t *store = (page_store_t*)malloc(sizeof(page_store_t));
1960     if (store == NULL)
1961     {
1962         ret = ERR_NOMEMORY;
1963         goto cleanup;
1964     }
1965
1966     store->bitmap = malloc((max_entries + 7) / 8);
1967     if (store->bitmap == NULL)
1968     {
1969         free(store);
1970         ret = ERR_NOMEMORY;
1971         goto cleanup;
1972     }
1973
1974     memset(store->bitmap, 0, (max_entries + 7) / 8);
1975     store->num_entries = 0;
1976     store->max_entries = max_entries;
1977     list_init(&store->entry_list);
1978
1979     ret = syscall(SYSCALL_OPEN_FILE,
1980                   safe_path,
1981                   &store->file_handle,
1982                   FILE_MODE_READ
1983                   | FILE_MODE_WRITE
1984                   | FILE_MODE_NO_CACHE
1985                   | FILE_MODE_DELETE_ON_CLOSE
1986                   | FILE_MODE_CREATE
1987                   | FILE_MODE_TRUNCATE,
1988                   0);
1989     if (ret != ERR_SUCCESS)
1990     {
1991         free(store->bitmap);
1992         free(store);
1993         goto cleanup;
1994     }
1995
1996     lock_acquire(&page_store_lock);
1997     list_append(&page_stores, &store->link);
1998     lock_release(&page_store_lock);
1999
2000 cleanup:
2001     if (get_previous_mode() == USER_MODE) free(safe_path);
2002     return ret;
2003 }
2004
2005 sysret_t syscall_remove_page_file(const char *path)
2006 {
2007     dword_t ret = ERR_SUCCESS;
2008     char *safe_path = NULL;
2009
2010     if (get_previous_mode() == USER_MODE)
2011     {
2012         if (!check_privileges(PRIVILEGE_SET_PAGE_FILE)) return ERR_FORBIDDEN;
2013
2014         if (path)
2015         {
2016             dword_t path_length = 0;
2017
2018             EH_TRY path_length = strlen(path);
2019             EH_CATCH EH_ESCAPE(return ERR_BADPTR);
2020             EH_DONE;
2021
2022             if (!check_usermode(path, path_length + 1)) return ERR_BADPTR;
2023
2024             safe_path = copy_user_string(path);
2025             if (!safe_path) return ERR_NOMEMORY;
2026         }
2027     }
2028     else safe_path = (char*)path;
2029
2030     list_entry_t *ptr;
2031     page_store_t *store;
2032
2033     lock_acquire(&page_store_lock);
2034
2035     for (ptr = page_stores.next; ptr != &page_stores; ptr = ptr->next)
2036     {
2037         store = CONTAINER_OF(ptr, page_store_t, link);
2038
2039         char *name_buffer = NULL;
2040         size_t name_buffer_size = 256;
2041
2042         while (TRUE)
2043         {
2044             char *name_buffer = malloc(name_buffer_size);
2045             if (!name_buffer) break;
2046
2047             ret = syscall(SYSCALL_QUERY_FILE, store->file_handle, name_buffer, name_buffer_size);
2048             if (ret != ERR_SUCCESS) free(name_buffer);
2049             if (ret != ERR_SMALLBUF) break;
2050
2051             name_buffer_size *= 2;
2052         }
2053
2054         if (ret == ERR_SUCCESS)
2055         {
2056             bool_t found = strcmp(name_buffer, safe_path) == 0;
2057             if (name_buffer) free(name_buffer);
2058             if (found) break;
2059         }
2060     }
2061
2062     if (ptr == &page_stores)
2063     {
2064         ret = ERR_NOTFOUND;
2065         lock_release(&page_store_lock);
2066         goto cleanup;
2067     }
2068
2069     list_remove(&store->link);
2070     lock_release(&page_store_lock);
2071
2072     for (ptr = store->entry_list.next; ptr != &store->entry_list; ptr = ptr->next)
2073     {
2074         process_t *old_process;
2075         byte_t buffer[PAGE_SIZE];
2076         dword_t bytes_read;
2077         dword_t page_flags = 0;
2078         page_store_entry_t *entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2079
2080         ret = syscall_read_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_read);
2081         if (ret != ERR_SUCCESS) break;
2082
2083         lock_acquire(&entry->address_space->lock);
2084         memory_block_t *block = find_block_by_addr(entry->address_space, entry->address);
2085
2086         if (block->flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
2087         if ((block->flags & (MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_COPY_ON_WRITE))
2088             == MEMORY_BLOCK_WRITABLE)
2089         {
2090             page_flags |= PAGE_WRITABLE;
2091         }
2092
2093         if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2094         else page_flags |= PAGE_GLOBAL;
2095
2096         if (entry->address_space != &kernel_address_space)
2097         {
2098             old_process = switch_process(CONTAINER_OF(entry->address_space, process_t, memory_space));
2099         }
2100
2101         ret = alloc_page(entry->address, page_flags);
2102         if (ret != ERR_SUCCESS) goto loop_cleanup;
2103
2104         list_entry_t *p;
2105         for (p = store->entry_list.next; p != &store->entry_list; p = ptr->next)
2106         {
2107             page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2108
2109             if (entry != other_entry && other_entry->number == entry->number)
2110             {
2111                 list_remove(&other_entry->link);
2112                 list_append(&transition_pages, &other_entry->link);
2113
2114                 other_entry->physical = get_physical_address(entry->address);
2115                 other_entry->number = INVALID_STORE_NUMBER;
2116             }
2117         }
2118
2119         clear_bit(store->bitmap, entry->number);
2120         list_remove(&entry->link);
2121
2122         memcpy(entry->address, buffer, PAGE_SIZE);
2123         free(entry);
2124
2125 loop_cleanup:
2126         if (entry->address_space != &kernel_address_space) switch_process(old_process);
2127         lock_release(&entry->address_space->lock);
2128     }
2129
2130     free(store);
2131
2132 cleanup:
2133     if (ret != ERR_SUCCESS)
2134     {
2135         lock_acquire(&page_store_lock);
2136         list_append(&page_stores, &store->link);
2137         lock_release(&page_store_lock);
2138     }
2139
2140     if (get_previous_mode() == USER_MODE) free(safe_path);
2141     return ret;
2142 }
2143
2144 static int compare_address(const void *key1, const void *key2)
2145 {
2146     const uintptr_t first = *(const uintptr_t*)key1;
2147     const uintptr_t second = *(const uintptr_t*)key2;
2148
2149     if (first < second) return -1;
2150     else if (first == second) return 0;
2151     else return 1;
2152 }
2153
2154 static int compare_size(const void *key1, const void *key2)
2155 {
2156     const size_t first = *(const size_t*)key1;
2157     const size_t second = *(const size_t*)key2;
2158
2159     if (first < second) return -1;
2160     else if (first == second) return 0;
2161     else return 1;
2162 }
2163
2164 dword_t create_address_space(void *base_address, dword_t page_count, memory_address_space_t *mem_space)
2165 {
2166     dword_t ret = ERR_NOMEMORY;
2167
2168     mem_space->pool_address = base_address;
2169     mem_space->pool_size = page_count;
2170     AVL_TREE_INIT(&mem_space->by_addr_tree, memory_block_t, by_addr_node, address, compare_address);
2171     AVL_TREE_INIT(&mem_space->by_size_tree, memory_block_t, by_size_node, size, compare_size);
2172     lock_init(&mem_space->lock);
2173     list_init(&mem_space->evictable_blocks);
2174     mem_space->evict_blk_ptr = NULL;
2175     mem_space->evict_page_num = 0;
2176     mem_space->stats.used_virtual = 0;
2177     mem_space->stats.committed = 0;
2178     mem_space->stats.evicted = 0;
2179     mem_space->stats.shared = 0;
2180
2181     if (get_page_directory() != INVALID_PAGE)
2182     {
2183         mem_space->page_directory = create_page_directory();
2184         if (mem_space->page_directory == NULL) return ret;
2185     }
2186     else
2187     {
2188         dword_t *boot_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2189         mem_space->page_directory = (void*)PAGE_ALIGN(boot_directory[PAGEDIR_SELF_ENTRY]);
2190     }
2191
2192     memory_block_t *initial = mem_tree_alloc();
2193     if (initial != NULL)
2194     {
2195         initial->address = (uintptr_t)base_address;
2196         initial->size = page_count;
2197         initial->flags = MEMORY_BLOCK_FREE;
2198         initial->address_space = mem_space;
2199         initial->section = NULL;
2200
2201         avl_tree_insert(&mem_space->by_addr_tree, &initial->by_addr_node);
2202         avl_tree_insert(&mem_space->by_size_tree, &initial->by_size_node);
2203         ret = ERR_SUCCESS;
2204     }
2205
2206     if (mem_space != &kernel_address_space)
2207     {
2208         list_append(&user_address_spaces, &mem_space->link);
2209     }
2210
2211     return ret;
2212 }
2213
2214 dword_t clone_address_space(memory_address_space_t *original, memory_address_space_t *clone)
2215 {
2216     dword_t i;
2217     dword_t ret = ERR_SUCCESS;
2218
2219     lock_acquire_shared(&original->lock);
2220
2221     clone->pool_address = original->pool_address;
2222     clone->pool_size = original->pool_size;
2223     AVL_TREE_INIT(&clone->by_addr_tree, memory_block_t, by_addr_node, address, NULL);
2224     AVL_TREE_INIT(&clone->by_size_tree, memory_block_t, by_size_node, size, NULL);
2225     lock_init(&clone->lock);
2226     list_init(&clone->evictable_blocks);
2227     clone->evict_blk_ptr = NULL;
2228     clone->evict_page_num = 0;
2229     clone->stats.used_virtual = original->stats.used_virtual;
2230     clone->stats.committed = original->stats.committed;
2231     clone->stats.evicted = original->stats.evicted;
2232     clone->stats.shared = original->stats.committed;
2233
2234     if (original->by_addr_tree.root != NULL)
2235     {
2236         memory_block_t *root_block = CONTAINER_OF(original->by_addr_tree.root, memory_block_t, by_addr_node);
2237         if (!clone_blocks_recursive(clone, root_block))
2238         {
2239             ret = ERR_NOMEMORY;
2240             goto cleanup;
2241         }
2242     }
2243
2244     if (!(clone->page_directory = create_page_directory()))
2245     {
2246         ret = ERR_NOMEMORY;
2247         goto cleanup;
2248     }
2249
2250     dword_t *clone_dir = map_temporary_page(clone->page_directory, PAGE_PRESENT | PAGE_WRITABLE);
2251     bool_t this_directory = original->page_directory == get_page_directory();
2252
2253     dword_t *original_dir;
2254     if (this_directory) original_dir = (dword_t*)PAGE_DIRECTORY_ADDR;
2255     else original_dir = map_temporary_page(original->page_directory, PAGE_PRESENT | PAGE_WRITABLE);
2256
2257     for (i = USER_PAGE_START; i <= USER_PAGE_END; i++)
2258     {
2259         reference_page((void*)PAGE_ALIGN(original_dir[i]));
2260         original_dir[i] &= ~PAGE_WRITABLE;
2261         clone_dir[i] = original_dir[i];
2262         if (this_directory) cpu_invalidate_tlb((void*)(i << 12));
2263     }
2264
2265     if (!this_directory) unmap_temporary_page(original_dir);
2266     unmap_temporary_page(clone_dir);
2267     list_append(&user_address_spaces, &clone->link);
2268
2269 cleanup:
2270     lock_release(&original->lock);
2271     return ret;
2272 }
2273
2274 void bump_address_space(memory_address_space_t *mem_space)
2275 {
2276     list_remove(&mem_space->link);
2277     list_append(&user_address_spaces, &mem_space->link);
2278 }
2279
2280 void delete_address_space(memory_address_space_t *mem_space)
2281 {
2282     ASSERT(get_page_directory() != mem_space->page_directory);
2283     lock_acquire(&mem_space->lock);
2284
2285     if (mem_space->by_addr_tree.root)
2286     {
2287         memory_block_t *root = CONTAINER_OF(mem_space->by_addr_tree.root, memory_block_t, by_addr_node);
2288         free_blocks_recursive(root);
2289         mem_space->by_addr_tree.root = mem_space->by_size_tree.root = NULL;
2290     }
2291
2292     free_physical_page(mem_space->page_directory);
2293     mem_space->page_directory = NULL;
2294
2295     lock_release(&mem_space->lock);
2296 }
2297
2298 static bool_t find_evicted_page(memory_block_t *block, void *address, page_store_t **store, page_store_entry_t **entry)
2299 {
2300     list_entry_t *i;
2301
2302     for (i = transition_pages.next; i != &transition_pages; i = i->next)
2303     {
2304         *entry = CONTAINER_OF(i, page_store_entry_t, link);
2305
2306         if ((*entry)->address_space == block->address_space
2307             && PAGE_ALIGN((dword_t)(*entry)->address) == PAGE_ALIGN((dword_t)address))
2308         {
2309             return TRUE;
2310         }
2311     }
2312
2313     for (i = page_stores.next; i != &page_stores; i = i->next)
2314     {
2315         list_entry_t *j;
2316         *store = CONTAINER_OF(i, page_store_t, link);
2317
2318         for (j = (*store)->entry_list.next; j != &(*store)->entry_list; j = j->next)
2319         {
2320             *entry = CONTAINER_OF(j, page_store_entry_t, link);
2321
2322             if ((*entry)->address_space == block->address_space
2323                 && PAGE_ALIGN((dword_t)(*entry)->address) == PAGE_ALIGN((dword_t)address))
2324             {
2325                 return TRUE;
2326             }
2327         }
2328     }
2329
2330     return FALSE;
2331 }
2332
2333 bool_t memory_fault_handler(void *address, registers_t *regs)
2334 {
2335     int i;
2336     page_error_t problem;
2337     dword_t aligned_address = PAGE_ALIGN((dword_t)address);
2338     dword_t pd_index = ADDR_TO_PDE((dword_t)address);
2339     dword_t pt_index = ADDR_TO_PTE((dword_t)address);
2340     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2341     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
2342     process_t *proc = get_current_process();
2343
2344     memory_address_space_t *address_space = (proc != NULL && check_usermode(address, 1))
2345                                             ? &proc->memory_space : &kernel_address_space;
2346     memory_block_t *block = find_block_by_addr(address_space, address);
2347     if (block == NULL) return FALSE;
2348
2349     if (!(regs->error_code & PAGE_ERROR_PRESENT_FLAG))
2350     {
2351         problem = PAGE_ERROR_NOTPRESENT;
2352     }
2353     else if (!(block->flags & MEMORY_BLOCK_USERMODE)
2354         && (regs->error_code & PAGE_ERROR_USERMODE_FLAG))
2355     {
2356         problem = PAGE_ERROR_UNPRIVILEGED;
2357     }
2358     else if (regs->error_code & PAGE_ERROR_WRITE_FLAG)
2359     {
2360         problem = PAGE_ERROR_READONLY;
2361     }
2362     else
2363     {
2364         KERNEL_CRASH_WITH_REGS("Unknown paging problem", regs);
2365     }
2366
2367     if ((block->flags & MEMORY_BLOCK_ACCESSIBLE) && (problem == PAGE_ERROR_NOTPRESENT))
2368     {
2369         page_store_t *store = NULL;
2370         page_store_entry_t *entry = NULL;
2371         byte_t buffer[PAGE_SIZE];
2372         dword_t bytes_read;
2373         dword_t page_flags = 0;
2374
2375         if (find_evicted_page(block, address, &store, &entry))
2376         {
2377             if (block->flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
2378             if ((block->flags & (MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_COPY_ON_WRITE))
2379                 == MEMORY_BLOCK_WRITABLE)
2380             {
2381                 page_flags |= PAGE_WRITABLE;
2382             }
2383
2384             if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2385             else page_flags |= PAGE_GLOBAL;
2386
2387             if (entry->number != INVALID_STORE_NUMBER)
2388             {
2389                 cpu_enable_interrupts();
2390                 dword_t ret = syscall_read_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_read);
2391                 cpu_disable_interrupts();
2392
2393                 if ((page_directory[pd_index] & PAGE_PRESENT) && (page_table[pt_index] & PAGE_PRESENT))
2394                 {
2395                     return TRUE;
2396                 }
2397
2398                 if (ret != ERR_SUCCESS) return FALSE;
2399
2400                 ret = alloc_page((void*)aligned_address, page_flags);
2401                 if (ret != ERR_SUCCESS) return FALSE;
2402
2403                 list_entry_t *ptr;
2404                 for (ptr = store->entry_list.next; ptr != &store->entry_list; ptr = ptr->next)
2405                 {
2406                     page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2407
2408                     if (entry != other_entry && other_entry->number == entry->number)
2409                     {
2410                         list_remove(&other_entry->link);
2411                         list_append(&transition_pages, &other_entry->link);
2412
2413                         other_entry->physical = get_physical_address((void*)aligned_address);
2414                         other_entry->number = INVALID_STORE_NUMBER;
2415                     }
2416                 }
2417
2418                 clear_bit(store->bitmap, entry->number);
2419                 list_remove(&entry->link);
2420                 free(entry);
2421
2422                 memcpy((void*)aligned_address, buffer, PAGE_SIZE);
2423                 address_space->stats.evicted -= PAGE_SIZE;
2424                 return TRUE;
2425             }
2426             else
2427             {
2428                 if (map_page(entry->physical, entry->address, page_flags) == ERR_SUCCESS)
2429                 {
2430                     list_remove(&entry->link);
2431                     free(entry);
2432                     address_space->stats.evicted -= PAGE_SIZE;
2433                     return TRUE;
2434                 }
2435             }
2436
2437             return FALSE;
2438         }
2439         else
2440         {
2441             list_entry_t *ptr;
2442             shared_page_t *page = NULL;
2443             qword_t offset = block->section_offset + (qword_t)aligned_address - (qword_t)block->address;
2444
2445             page_flags = PAGE_PRESENT;
2446             if (block->flags & MEMORY_BLOCK_WRITABLE) page_flags |= PAGE_WRITABLE;
2447
2448             if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2449             else page_flags |= PAGE_GLOBAL;
2450
2451             if (block->section && offset < (qword_t)block->section->size)
2452             {
2453                 ASSERT(PAGE_OFFSET(offset) == 0);
2454
2455                 for (ptr = block->section->page_list.next; ptr != &block->section->page_list; ptr = ptr->next)
2456                 {
2457                     page = CONTAINER_OF(ptr, shared_page_t, link);
2458                     if (page->offset == offset) break;
2459                 }
2460
2461                 if (ptr != &block->section->page_list)
2462                 {
2463                     return (map_page(page->physical, (void*)aligned_address, page_flags) == ERR_SUCCESS);
2464                 }
2465             }
2466
2467             memset(buffer, 0, PAGE_SIZE);
2468
2469             if (block->section && block->section->file && offset < (qword_t)block->section->size)
2470             {
2471                 cpu_enable_interrupts();
2472                 file_instance_t *file = block->section->file;
2473                 lock_acquire_shared(&file->global->volume->lock);
2474                 dword_t ret = file->global->volume->driver->read_file(file, buffer, offset, PAGE_SIZE, &bytes_read);
2475                 lock_release(&file->global->volume->lock);
2476                 cpu_disable_interrupts();
2477                 if (ret != ERR_SUCCESS && ret != ERR_BEYOND) return FALSE;
2478             }
2479
2480             dword_t ret = alloc_page((void*)aligned_address, page_flags | PAGE_WRITABLE);
2481             if (ret != ERR_SUCCESS) return FALSE;
2482
2483             memcpy((void*)aligned_address, buffer, PAGE_SIZE);
2484             set_page_flags((void*)aligned_address, page_flags);
2485
2486             if (block->section && offset < (qword_t)block->section->size)
2487             {
2488                 page = (shared_page_t*)malloc(sizeof(shared_page_t));
2489                 if (page == NULL)
2490                 {
2491                     free_page((void*)aligned_address);
2492                     return FALSE;
2493                 }
2494
2495                 page->physical = get_physical_address((void*)aligned_address);
2496                 page->offset = offset;
2497
2498                 list_append(&block->section->page_list, &page->link);
2499             }
2500
2501             address_space->stats.committed += PAGE_SIZE;
2502             return TRUE;
2503         }
2504     }
2505
2506     if ((block->flags & (MEMORY_BLOCK_COPY_ON_WRITE | MEMORY_BLOCK_WRITABLE))
2507         == (MEMORY_BLOCK_COPY_ON_WRITE | MEMORY_BLOCK_WRITABLE)
2508         && (problem == PAGE_ERROR_READONLY))
2509     {
2510         if (!(page_directory[pd_index] & PAGE_WRITABLE))
2511         {
2512             void *table_phys = (void*)PAGE_ALIGN(page_directory[pd_index]);
2513
2514             if (get_page(table_phys)->ref_count > 1)
2515             {
2516                 void *table_copy = alloc_physical_page();
2517                 if (table_copy == NULL) return FALSE;
2518
2519                 dword_t *temporary = map_temporary_page(table_copy, PAGE_PRESENT | PAGE_WRITABLE);
2520                 if (temporary == NULL)
2521                 {
2522                     free_physical_page(table_copy);
2523                     return FALSE;
2524                 }
2525
2526                 for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++)
2527                 {
2528                     if (page_table[i])
2529                     {
2530                         reference_page((void*)PAGE_ALIGN(page_table[i]));
2531                         temporary[i] = page_table[i] & ~PAGE_WRITABLE;
2532                     }
2533                 }
2534
2535                 unmap_temporary_page(temporary);
2536
2537                 reference_page(table_copy);
2538                 dereference_page(table_phys);
2539
2540                 page_directory[pd_index] = PAGE_ALIGN((dword_t)table_copy)
2541                                            | PAGE_OFFSET(page_directory[pd_index])
2542                                            | PAGE_WRITABLE;
2543                 cpu_invalidate_tlb(page_table);
2544             }
2545             else
2546             {
2547                 page_directory[pd_index] |= PAGE_WRITABLE;
2548                 cpu_invalidate_tlb(page_table);
2549
2550                 for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++)
2551                 {
2552                     page_table[i] &= ~PAGE_WRITABLE;
2553                     cpu_invalidate_tlb((void*)((pd_index << 22) | (i << 12)));
2554                 }
2555             }
2556         }
2557
2558         if (!(page_table[pt_index] & PAGE_WRITABLE))
2559         {
2560             void *phys = (void*)PAGE_ALIGN(page_table[pt_index]);
2561
2562             if (get_page(phys)->ref_count > 1)
2563             {
2564                 void *page_copy = alloc_physical_page();
2565                 if (page_copy == NULL) return FALSE;
2566
2567                 write_physical(page_copy, (void*)PAGE_ALIGN((dword_t)address), PAGE_SIZE);
2568                 reference_page(page_copy);
2569                 dereference_page(phys);
2570
2571                 page_table[pt_index] = PAGE_ALIGN((dword_t)page_copy)
2572                                        | PAGE_OFFSET(page_table[pt_index])
2573                                        | PAGE_WRITABLE;
2574                 cpu_invalidate_tlb((void*)aligned_address);
2575             }
2576             else
2577             {
2578                 page_table[pt_index] |= PAGE_WRITABLE;
2579                 cpu_invalidate_tlb((void*)aligned_address);
2580             }
2581         }
2582
2583         return TRUE;
2584     }
2585
2586     return FALSE;
2587 }
2588
2589 void memory_init(multiboot_tag_mmap_t *mmap, uintptr_t lowest_physical)
2590 {
2591     dword_t i, j;
2592     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2593
2594     fix_overlapping_sections(mmap);
2595
2596     log_write(LOG_NORMAL, "Memory map:\nBase\t\t\tLength\t\t\tType\n");
2597     log_write(LOG_NORMAL, "------------------------------------------------------------\n");
2598
2599     multiboot_mmap_entry_t *entry;
2600
2601     for (entry = (multiboot_mmap_entry_t*)(mmap + 1);
2602          (uintptr_t)entry < ((uintptr_t)mmap + mmap->size);
2603          entry = (multiboot_mmap_entry_t*)((uintptr_t)entry + mmap->entry_size))
2604     {
2605         log_write(LOG_NORMAL, "0x%08X%08X\t0x%08X%08X\t%s\n",
2606                   entry->base_high,
2607                   entry->base_low,
2608                   entry->length_high,
2609                   entry->length_low,
2610                   (entry->type == 1) ? "Usable" : "Not Usable");
2611
2612         if (entry->type == 1
2613             && entry->base_high == 0
2614             && entry->length_high == 0
2615             && entry->length_low < (0xFFFFFFFF - entry->base_low)
2616             && entry->length_low > 0)
2617         {
2618             dword_t start_addr = entry->base_low;
2619             if (start_addr < lowest_physical) start_addr = lowest_physical;
2620             start_addr = PAGE_ALIGN_UP(start_addr);
2621             dword_t end_addr = PAGE_ALIGN_UP(entry->base_low + entry->length_low);
2622             dword_t page = end_addr - PAGE_SIZE;
2623
2624             while (page >= start_addr)
2625             {
2626                 dword_t stack_address = (dword_t)&physical_memory_stack[num_free_pages];
2627                 dword_t pd_index = ADDR_TO_PDE(stack_address);
2628                 dword_t pt_index = ADDR_TO_PTE(stack_address);
2629                 dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + pd_index * PAGE_SIZE);
2630
2631                 if (!(page_directory[pd_index] & PAGE_PRESENT))
2632                 {
2633                     page_directory[pd_index] = start_addr | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2634                     start_addr += PAGE_SIZE;
2635                     cpu_invalidate_tlb(page_table);
2636                     memset(page_table, 0, PAGE_SIZE);
2637                     total_physical_pages++;
2638                     continue;
2639                 }
2640
2641                 if (!(page_table[pt_index] & PAGE_PRESENT))
2642                 {
2643                     page_table[pt_index] = start_addr | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2644                     start_addr += PAGE_SIZE;
2645                     cpu_invalidate_tlb((void*)stack_address);
2646                     total_physical_pages++;
2647                     continue;
2648                 }
2649
2650                 free_physical_page((void*)page);
2651                 page -= PAGE_SIZE;
2652             }
2653         }
2654     }
2655
2656     log_write(LOG_NORMAL, "------------------------------------------------------------\n");
2657     total_physical_pages += num_free_pages;
2658     pages = (page_t*)(KERNEL_POOL_START - total_physical_pages * sizeof(page_t));
2659
2660     for (i = PAGE_ALIGN((uintptr_t)pages); i < KERNEL_POOL_START; i += PAGE_SIZE)
2661     {
2662         dword_t pd_index = ADDR_TO_PDE(i);
2663         dword_t pt_index = ADDR_TO_PTE(i);
2664         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + pd_index * PAGE_SIZE);
2665
2666         if (!(page_directory[pd_index] & PAGE_PRESENT))
2667         {
2668             page_directory[pd_index] = (uintptr_t)alloc_physical_page() | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2669             cpu_invalidate_tlb(page_table);
2670             memset(page_table, 0, PAGE_SIZE);
2671         }
2672
2673         if (!(page_table[pt_index] & PAGE_PRESENT))
2674         {
2675             page_table[pt_index] = (uintptr_t)alloc_physical_page() | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2676             cpu_invalidate_tlb((void*)i);
2677         }
2678     }
2679
2680     dword_t pages_inserted = 0;
2681
2682     for (i = 0; i < num_free_pages; i++)
2683     {
2684         pages[pages_inserted].phys_addr = PAGE_ALIGN((dword_t)physical_memory_stack[i]);
2685         pages[pages_inserted].ref_count = 0;
2686         pages_inserted++;
2687     }
2688
2689     for (i = KERNEL_PAGE_START; i <= KERNEL_PAGE_END; i++)
2690     {
2691         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + i * PAGE_SIZE);
2692         if (!(page_directory[i] & PAGE_PRESENT)) continue;
2693
2694         for (j = 0; j < PAGE_SIZE / sizeof(dword_t); j++)
2695         {
2696             if (PAGE_ALIGN(page_table[j]) < lowest_physical) continue;
2697
2698             if (page_table[j] & PAGE_PRESENT)
2699             {
2700                 pages[pages_inserted].phys_addr = PAGE_ALIGN((dword_t)page_table[j]);
2701                 pages[pages_inserted].ref_count = 0;
2702                 pages_inserted++;
2703             }
2704         }
2705     }
2706
2707     ASSERT(pages_inserted == total_physical_pages);
2708     qsort(pages, total_physical_pages, sizeof(page_t), compare_page);
2709
2710     init_semaphore(&temporary_page_semaphore, TEMPORARY_PAGES, TEMPORARY_PAGES);
2711
2712     if (create_address_space((void*)KERNEL_POOL_START,
2713                              (KERNEL_POOL_END - KERNEL_POOL_START + PAGE_SIZE - 1) / PAGE_SIZE,
2714                              &kernel_address_space) != ERR_SUCCESS)
2715     {
2716         KERNEL_CRASH("Unable to create kernel address space");
2717     }
2718
2719     if (create_address_space((void*)MAPPING_START,
2720                              (MAPPING_END - MAPPING_START + PAGE_SIZE - 1) / PAGE_SIZE,
2721                              &mapping_space) != ERR_SUCCESS)
2722     {
2723         KERNEL_CRASH("Unable to create mapping space");
2724     }
2725
2726     set_page_directory((void*)PAGE_ALIGN(page_directory[PAGEDIR_SELF_ENTRY]));
2727
2728     for (i = KERNEL_PAGE_START; i <= KERNEL_PAGE_END; i++)
2729     {
2730         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + i * PAGE_SIZE);
2731         if (!(page_directory[i] & PAGE_PRESENT)) continue;
2732
2733         for (j = 0; j < PAGE_SIZE / sizeof(dword_t); j++)
2734         {
2735             if (page_table[j] & PAGE_PRESENT) reference_page((void*)PAGE_ALIGN(page_table[j]));
2736         }
2737     }
2738
2739     for (i = USER_PAGE_START; i <= USER_PAGE_END; i++) page_directory[i] = 0;
2740     set_page_directory(get_page_directory());
2741 }