03458e3e1e367beb235d5a8788ac584428fedbad
[monolithium.git] / kernel / src / memory / memory.c
1 /*
2  * memory.c
3  *
4  * Copyright (C) 2016 Aleksandar Andrejevic <theflash@sdf.lonestar.org>
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU Affero General Public License as
8  * published by the Free Software Foundation, either version 3 of the
9  * License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Affero General Public License for more details.
15  *
16  * You should have received a copy of the GNU Affero General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19
20 #include <memory.h>
21 #include <exception.h>
22 #include <process.h>
23 #include <syscalls.h>
24 #include <heap.h>
25 #include <cpu.h>
26
27 static void **physical_memory_stack = (void**)MEM_STACK_VIRT_ADDR;
28 static lock_t phys_mem_stack_lock = 0;
29 static page_t *pages = NULL;
30 static void *current_page_directory = INVALID_PAGE;
31 static memory_address_space_t kernel_address_space;
32 static memory_address_space_t mapping_space;
33 static list_entry_t user_address_spaces = { &user_address_spaces, &user_address_spaces };
34 static dword_t total_physical_pages = 0;
35 static dword_t num_free_pages = 0;
36 static dword_t mem_tree_bitmap[TOTAL_PAGES / 32];
37 static lock_t mem_tree_lock = 0;
38 static semaphore_t temporary_page_semaphore;
39 static bool_t evicting = FALSE;
40 static DECLARE_LIST(transition_pages);
41 static DECLARE_LIST(page_stores);
42 static lock_t page_store_lock = 0;
43
44 static void *evict_page(void);
45
46 static inline void invalidate_tlb(dword_t *addr)
47 {
48     asm volatile ("invlpg %0" :: "m"(*addr));
49 }
50
51 static inline void *alloc_physical_page(void)
52 {
53     void *page = INVALID_PAGE;
54
55     if (!evicting && num_free_pages <= EVICTION_THRESHOLD)
56     {
57         evicting = TRUE;
58         page = evict_page();
59         evicting = FALSE;
60
61         if (page != INVALID_PAGE) return page;
62     }
63
64     acquire_lock(&phys_mem_stack_lock);
65     if (num_free_pages) page = physical_memory_stack[--num_free_pages];
66     release_lock(&phys_mem_stack_lock);
67
68     return page;
69 }
70
71 static inline void free_physical_page(void *address)
72 {
73     acquire_lock(&phys_mem_stack_lock);
74     physical_memory_stack[num_free_pages++] = address;
75     release_lock(&phys_mem_stack_lock);
76 }
77
78 static int compare_page(const void *a, const void *b)
79 {
80     const page_t *page_a = (const page_t*)a;
81     const page_t *page_b = (const page_t*)b;
82
83     if (page_a->phys_addr < page_b->phys_addr) return -1;
84     else if (page_a->phys_addr > page_b->phys_addr) return 1;
85     else return 0;
86 }
87
88 static page_t *get_page(void *physical)
89 {
90     page_t key = { .phys_addr = (uintptr_t)physical };
91     if (pages == NULL) return NULL;
92     return (page_t*)bsearch(&key, pages, total_physical_pages, sizeof(page_t), compare_page);
93 }
94
95 static inline dword_t reference_page(void *physical)
96 {
97     page_t *page = get_page(physical);
98     if (!page) return 0;
99
100     return ++page->ref_count;
101 }
102
103 static inline dword_t dereference_page(void *physical)
104 {
105     page_t *page = get_page(physical);
106     if (!page) return 0;
107
108     return --page->ref_count;
109 }
110
111 static dword_t map_page(void *physical, void *virtual, dword_t flags)
112 {
113     dword_t i;
114     dword_t ret = ERR_SUCCESS;
115     critical_t critical;
116     dword_t phys_addr = PAGE_ALIGN((dword_t)physical);
117     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
118     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
119     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
120     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
121
122     flags &= 0x00000FFF;
123     enter_critical(&critical);
124
125     if (!(page_directory[pd_index] & PAGE_PRESENT))
126     {
127         void *table_page = alloc_physical_page();
128         if (table_page == INVALID_PAGE)
129         {
130             ret = ERR_NOMEMORY;
131             goto done;
132         }
133
134         reference_page(table_page);
135         page_directory[pd_index] = (dword_t)table_page | PAGE_PRESENT | PAGE_WRITABLE;
136
137         invalidate_tlb(page_table);
138         for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++) page_table[i] = 0;
139     }
140
141     page_directory[pd_index] |= flags;
142     if (page_table[pt_index] & PAGE_PRESENT)
143     {
144         ret = ERR_EXISTS;
145         goto done;
146     }
147
148     reference_page((void*)phys_addr);
149     page_table[pt_index] = phys_addr | flags | PAGE_PRESENT;
150     invalidate_tlb(virtual);
151
152 done:
153     leave_critical(&critical);
154     return ret;
155 }
156
157 static dword_t unmap_page(void *virtual)
158 {
159     dword_t i, ret = ERR_SUCCESS;
160     critical_t critical;
161     bool_t empty_dir = TRUE;
162     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
163     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
164     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
165     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
166
167     enter_critical(&critical);
168
169     if (!(page_directory[pd_index] & PAGE_PRESENT))
170     {
171         ret = ERR_NOTFOUND;
172         goto done;
173     }
174
175     if (!(page_table[pt_index] & PAGE_PRESENT))
176     {
177         ret = ERR_NOTFOUND;
178         goto done;
179     }
180
181     dereference_page((void*)PAGE_ALIGN(page_table[pt_index]));
182     page_table[pt_index] = 0;
183     invalidate_tlb((dword_t*)virt_addr);
184
185     for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++) if (page_table[i])
186     {
187         empty_dir = FALSE;
188         break;
189     }
190
191     if (empty_dir)
192     {
193         void *table_page = (void*)PAGE_ALIGN(page_directory[pd_index]);
194         page_directory[pd_index] = 0;
195         invalidate_tlb(page_table);
196
197         if (dereference_page(table_page) == 0)
198         {
199             free_physical_page(table_page);
200         }
201     }
202
203 done:
204     leave_critical(&critical);
205     return ret;
206 }
207
208 static dword_t get_page_flags(void *virtual)
209 {
210     dword_t virt_addr = PAGE_ALIGN((uintptr_t)virtual);
211     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
212     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
213     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
214
215     if (!(page_directory[pd_index] & PAGE_PRESENT)) return 0;
216     if (!(page_table[pt_index] & PAGE_PRESENT)) return 0;
217
218     return PAGE_OFFSET(page_table[pt_index]);
219 }
220
221 static dword_t set_page_flags(void *virtual, dword_t flags)
222 {
223     dword_t ret = ERR_SUCCESS;
224     critical_t critical;
225     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
226     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
227     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
228     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
229
230     flags &= 0x00000FFF;
231     enter_critical(&critical);
232
233     if (!(page_directory[pd_index] & PAGE_PRESENT))
234     {
235         ret = ERR_NOTFOUND;
236         goto done;
237     }
238
239     if (!(page_table[pt_index] & PAGE_PRESENT))
240     {
241         ret = ERR_NOTFOUND;
242         goto done;
243     }
244
245     page_directory[pd_index] |= flags;
246     page_table[pt_index] = PAGE_ALIGN(page_table[pt_index]) | flags | PAGE_PRESENT;
247     invalidate_tlb((void*)virt_addr);
248
249 done:
250     leave_critical(&critical);
251     return ret;
252 }
253
254 static void *map_temporary_page(void *physical, dword_t flags)
255 {
256     int i;
257     wait_semaphore(&temporary_page_semaphore, 1, NO_TIMEOUT);
258
259     for (i = TEMPORARY_PAGES - 1; i >= temporary_page_semaphore.count ; i--)
260     {
261         void *address = (void*)(TEMPORARY_ADDR + i * PAGE_SIZE);
262
263         if (get_physical_address(address) == INVALID_PAGE)
264         {
265             if (map_page(physical, address, flags) == ERR_SUCCESS) return address;
266             break;
267         }
268     }
269
270     return NULL;
271 }
272
273 static void unmap_temporary_page(void *virtual)
274 {
275     unmap_page(virtual);
276     release_semaphore(&temporary_page_semaphore, 1);
277 }
278
279 static inline dword_t alloc_page(void *virtual, dword_t flags)
280 {
281     void *phys = alloc_physical_page();
282     if (phys == INVALID_PAGE) return ERR_NOMEMORY;
283
284     dword_t ret = map_page(phys, virtual, flags);
285     if (ret != ERR_SUCCESS) free_physical_page(phys);
286
287     return ret;
288 }
289
290 static inline dword_t free_page(void *virtual)
291 {
292     void *phys = get_physical_address(virtual);
293     if (phys == INVALID_PAGE) return ERR_INVALID;
294
295     unmap_page(virtual);
296
297     page_t *page = get_page(phys);
298     if (page == NULL || page->ref_count == 0) free_physical_page(phys);
299
300     return ERR_SUCCESS;
301 }
302
303 static void *evict_page_from_address_space(memory_address_space_t *space)
304 {
305     void *physical = INVALID_PAGE;
306     int chances = 2;
307     dword_t cached_directory[PAGE_SIZE / sizeof(dword_t)];
308     dword_t *table = NULL;
309
310     if (read_physical(space->page_directory, cached_directory, PAGE_SIZE) != ERR_SUCCESS)
311     {
312         return INVALID_PAGE;
313     }
314
315     if (!space->evict_blk_ptr) space->evict_blk_ptr = space->evictable_blocks.next;
316     memory_block_t *block = CONTAINER_OF(space->evict_blk_ptr, memory_block_t, evict_link);
317     dword_t prev_pd_index = (dword_t)-1;
318     dword_t address;
319     dword_t pd_index, pt_index;
320
321     while (chances)
322     {
323         address = (dword_t)block->by_addr_tree.key + space->evict_page_num * PAGE_SIZE;
324         pd_index = ADDR_TO_PDE(address);
325         pt_index = ADDR_TO_PTE(address);
326         if (!(cached_directory[pd_index] & PAGE_PRESENT)) goto next;
327
328         if (prev_pd_index != pd_index)
329         {
330             if (table) unmap_temporary_page(table);
331             table = map_temporary_page((void*)PAGE_ALIGN(cached_directory[pd_index]),
332                                        PAGE_PRESENT | PAGE_WRITABLE);
333             if (table == NULL) break;
334             prev_pd_index = pd_index;
335         }
336
337         if (table[pt_index])
338         {
339             if (!(table[pt_index] & PAGE_ACCESSED))
340             {
341                 physical = (void*)PAGE_ALIGN(table[pt_index]);
342                 break;
343             }
344
345             table[pt_index] &= ~PAGE_ACCESSED;
346         }
347
348 next:
349         space->evict_page_num++;
350
351         if (space->evict_page_num == (dword_t)block->by_size_tree.key)
352         {
353             space->evict_page_num = 0;
354             space->evict_blk_ptr = space->evict_blk_ptr->next;
355
356             if (space->evict_blk_ptr == &space->evictable_blocks)
357             {
358                 space->evict_blk_ptr = space->evict_blk_ptr->next;
359                 chances--;
360             }
361
362             if (space->evict_blk_ptr == &space->evictable_blocks) break;
363             block = CONTAINER_OF(space->evict_blk_ptr, memory_block_t, evict_link);
364         }
365     }
366
367     if (physical == INVALID_PAGE) goto cleanup;
368
369     dword_t i;
370     list_entry_t *ptr;
371     page_store_t *store = NULL;
372     byte_t buffer[PAGE_SIZE];
373
374     dword_t ret = read_physical(physical, buffer, PAGE_SIZE);
375     if (ret != ERR_SUCCESS)
376     {
377         physical = INVALID_PAGE;
378         goto cleanup;
379     }
380
381     for (ptr = page_stores.next; ptr != &page_stores; ptr = ptr->next)
382     {
383         store = CONTAINER_OF(ptr, page_store_t, link);
384
385         for (i = 0; i < store->max_entries; i++) if (!test_bit(store->bitmap, i)) break;
386         if (i == store->max_entries) continue;
387     }
388
389     if (ptr == &page_stores)
390     {
391         physical = INVALID_PAGE;
392         goto cleanup;
393     }
394
395     page_store_entry_t *entry = (page_store_entry_t*)malloc(sizeof(page_store_entry_t));
396     if (entry == NULL)
397     {
398         physical = INVALID_PAGE;
399         goto cleanup;
400     }
401
402     space->stats.evicted += PAGE_SIZE;
403     entry->address = (void*)address;
404     entry->address_space = space;
405     entry->number = INVALID_STORE_NUMBER;
406     entry->physical = INVALID_PAGE;
407
408     if (dereference_page(physical) == 0)
409     {
410         entry->number = i;
411
412         dword_t bytes_written;
413         ret = syscall_write_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_written);
414         if (ret != ERR_SUCCESS)
415         {
416             reference_page(physical);
417             free(entry);
418             physical = INVALID_PAGE;
419             goto cleanup;
420         }
421
422         set_bit(store->bitmap, i);
423         list_append(&store->entry_list, &entry->link);
424
425         for (ptr = transition_pages.next; ptr != &transition_pages; ptr = ptr->next)
426         {
427             page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
428
429             if (other_entry->physical == physical)
430             {
431                 ASSERT(other_entry->number == INVALID_STORE_NUMBER);
432
433                 list_remove(&other_entry->link);
434                 list_append(&store->entry_list, &other_entry->link);
435
436                 other_entry->number = entry->number;
437                 other_entry->physical = INVALID_PAGE;
438             }
439         }
440     }
441     else
442     {
443         entry->physical = physical;
444         list_append(&transition_pages, &entry->link);
445         physical = INVALID_PAGE;
446     }
447
448     table[pt_index] = 0;
449     if (space->page_directory == get_page_directory()) invalidate_tlb((void*)address);
450
451 cleanup:
452     if (table) unmap_temporary_page(table);
453     return physical;
454 }
455
456 static void *evict_page(void)
457 {
458     if (pages == NULL) return INVALID_PAGE;
459
460     list_entry_t *ptr;
461
462     for (ptr = user_address_spaces.next; ptr != &user_address_spaces; ptr = ptr->next)
463     {
464         memory_address_space_t *space = CONTAINER_OF(ptr, memory_address_space_t, link);
465         void *page = evict_page_from_address_space(space);
466         if (page != INVALID_PAGE) return page;
467     }
468
469     return evict_page_from_address_space(&kernel_address_space);
470 }
471
472 static memory_block_t *mem_tree_alloc(void)
473 {
474     dword_t i;
475     memory_block_t *block = NULL;
476
477     acquire_lock(&mem_tree_lock);
478     for (i = 0; i < TOTAL_PAGES; i++) if (!test_bit(mem_tree_bitmap, i)) break;
479
480     if (i < TOTAL_PAGES)
481     {
482         block = (memory_block_t*)(MEM_TREE_BLOCKS + i * sizeof(memory_block_t));
483
484         if ((get_physical_address(block) != INVALID_PAGE)
485             || (alloc_page(block, PAGE_GLOBAL | PAGE_WRITABLE | PAGE_PRESENT) == ERR_SUCCESS))
486         {
487             set_bit(mem_tree_bitmap, i);
488         }
489         else
490         {
491             block = NULL;
492         }
493     }
494
495     release_lock(&mem_tree_lock);
496     return block;
497 }
498
499 static void mem_tree_free(memory_block_t *block)
500 {
501     dword_t index = ((dword_t)block - MEM_TREE_BLOCKS) / sizeof(memory_block_t);
502     bool_t busy = FALSE;
503     dword_t i, page = PAGE_ALIGN((dword_t)block);
504
505     acquire_lock(&mem_tree_lock);
506     clear_bit(mem_tree_bitmap, index);
507
508     for (i = page; i < page + PAGE_SIZE; i += sizeof(memory_block_t))
509     {
510         index = (i - MEM_TREE_BLOCKS) / sizeof(memory_block_t);
511         if (test_bit(mem_tree_bitmap, index))
512         {
513             busy = TRUE;
514             break;
515         }
516     }
517
518     if (!busy) free_page((void*)page);
519     release_lock(&mem_tree_lock);
520 }
521
522 static memory_block_t *find_block_by_addr_internal(memory_block_t *block, void *address)
523 {
524     qword_t key = (qword_t)(dword_t)address;
525     qword_t start_addr = block->by_addr_tree.key;
526     qword_t end_addr = start_addr + block->by_size_tree.key * PAGE_SIZE;
527
528     if (key >= start_addr && key < end_addr) return block;
529
530     if (key < start_addr)
531     {
532         if (!block->by_addr_tree.left) return NULL;
533
534         memory_block_t *left_block = CONTAINER_OF(block->by_addr_tree.left, memory_block_t, by_addr_tree);
535         return find_block_by_addr_internal(left_block, address);
536     }
537     else
538     {
539         if (!block->by_addr_tree.right) return NULL;
540
541         memory_block_t *right_block = CONTAINER_OF(block->by_addr_tree.right, memory_block_t, by_addr_tree);
542         return find_block_by_addr_internal(right_block, address);
543     }
544 }
545
546 static memory_block_t *find_block_by_addr(memory_address_space_t *space, void *address)
547 {
548     if (!space->by_addr_tree_root) return NULL;
549     memory_block_t *root = CONTAINER_OF(space->by_addr_tree_root, memory_block_t, by_addr_tree);
550     return find_block_by_addr_internal(root, address);
551 }
552
553 static bool_t clone_blocks_recursive(memory_address_space_t *space, memory_block_t *block)
554 {
555     memory_block_t *clone = mem_tree_alloc();
556     if (clone == NULL) return FALSE;
557
558     clone->by_addr_tree.key = block->by_addr_tree.key;
559     clone->by_size_tree.key = block->by_size_tree.key;
560     block->flags |= MEMORY_BLOCK_COPY_ON_WRITE;
561     clone->flags = block->flags;
562     clone->address_space = space;
563     clone->section = block->section;
564
565     avl_tree_insert(&space->by_addr_tree_root, &clone->by_addr_tree);
566     avl_tree_insert(&space->by_size_tree_root, &clone->by_size_tree);
567
568     memory_block_t *left_block = CONTAINER_OF(block->by_addr_tree.left, memory_block_t, by_addr_tree);
569     memory_block_t *right_block = CONTAINER_OF(block->by_addr_tree.right, memory_block_t, by_addr_tree);
570
571     if ((block->by_addr_tree.left && !clone_blocks_recursive(space, left_block))
572         || (block->by_addr_tree.right && !clone_blocks_recursive(space, right_block)))
573     {
574         avl_tree_remove(&space->by_addr_tree_root, &clone->by_addr_tree);
575         avl_tree_remove(&space->by_size_tree_root, &clone->by_size_tree);
576         mem_tree_free(clone);
577         return FALSE;
578     }
579
580     return TRUE;
581 }
582
583 static inline void release_memory_block(memory_block_t *block)
584 {
585     dword_t page;
586     dword_t start_address = (dword_t)block->by_addr_tree.key;
587     dword_t end_address = start_address + (dword_t)block->by_size_tree.key * PAGE_SIZE;
588
589     critical_t critical;
590     enter_critical(&critical);
591     void *old_page_dir = get_page_directory();
592     set_page_directory(block->address_space->page_directory);
593
594     for (page = start_address; page < end_address; page += PAGE_SIZE)
595     {
596         free_page((void*)page);
597     }
598
599     set_page_directory(old_page_dir);
600     leave_critical(&critical);
601
602     if (block->section)
603     {
604         dereference(&block->section->header);
605         block->section = NULL;
606     }
607
608     list_entry_t *i;
609
610     for (i = transition_pages.next; i != &transition_pages; i = i->next)
611     {
612         page_store_entry_t *entry = CONTAINER_OF(i, page_store_entry_t, link);
613
614         if (entry->address_space == block->address_space
615             && (dword_t)entry->address >= start_address
616             && ((dword_t)entry->address < end_address))
617         {
618             list_remove(&entry->link);
619             free(entry);
620         }
621     }
622
623     acquire_lock(&page_store_lock);
624
625     for (i = page_stores.next; i != &page_stores; i = i->next)
626     {
627         list_entry_t *j;
628         page_store_t *store = CONTAINER_OF(i, page_store_t, link);
629
630         for (j = store->entry_list.next; j != &store->entry_list; j = j->next)
631         {
632             page_store_entry_t *entry = CONTAINER_OF(j, page_store_entry_t, link);
633
634             if (entry->address_space == block->address_space
635                 && (dword_t)entry->address >= start_address
636                 && ((dword_t)entry->address < end_address))
637             {
638                 if (entry->number != INVALID_STORE_NUMBER) clear_bit(store->bitmap, entry->number);
639                 list_remove(&entry->link);
640                 free(entry);
641             }
642         }
643     }
644
645     release_lock(&page_store_lock);
646 }
647
648 static void free_blocks_recursive(memory_block_t *block)
649 {
650     release_memory_block(block);
651
652     if (block->by_addr_tree.left)
653     {
654         memory_block_t *left_block = CONTAINER_OF(block->by_addr_tree.left, memory_block_t, by_addr_tree);
655         free_blocks_recursive(left_block);
656     }
657
658     if (block->by_addr_tree.right)
659     {
660         memory_block_t *right_block = CONTAINER_OF(block->by_addr_tree.right, memory_block_t, by_addr_tree);
661         free_blocks_recursive(right_block);
662     }
663
664     mem_tree_free(block);
665 }
666
667 static memory_block_t *find_free_block_internal(memory_block_t *root, void *address, dword_t size)
668 {
669     avl_tree_t *ptr;
670
671     if (root->by_size_tree.left && (dword_t)root->by_size_tree.key > size)
672     {
673         memory_block_t *left = CONTAINER_OF(root->by_size_tree.left, memory_block_t, by_size_tree);
674         memory_block_t *block = find_free_block_internal(left, address, size);
675         if (block) return block;
676     }
677
678     if ((dword_t)root->by_size_tree.key >= size)
679     {
680         for (ptr = &root->by_size_tree; ptr != NULL; ptr = ptr->next_equal)
681         {
682             memory_block_t *block = CONTAINER_OF(ptr, memory_block_t, by_size_tree);
683
684             if (!(block->flags & MEMORY_BLOCK_FREE)) continue;
685
686             if (address != NULL)
687             {
688                 dword_t block_start = (dword_t)block->by_addr_tree.key;
689                 dword_t block_end = block_start + ((dword_t)block->by_size_tree.key * PAGE_SIZE) - 1;
690
691                 dword_t needed_start = (dword_t)address;
692                 dword_t needed_end = needed_start + (size * PAGE_SIZE) - 1;
693
694                 if ((needed_start < block_start) || (needed_end > block_end)) continue;
695             }
696
697             return block;
698         }
699     }
700
701     if (!root->by_size_tree.right) return NULL;
702     memory_block_t *right = CONTAINER_OF(root->by_size_tree.right, memory_block_t, by_size_tree);
703     return find_free_block_internal(right, address, size);
704 }
705
706 static memory_block_t *find_free_block(memory_address_space_t *address_space, void *address, dword_t size)
707 {
708     memory_block_t *root_block = CONTAINER_OF(address_space->by_size_tree_root, memory_block_t, by_size_tree);
709     return find_free_block_internal(root_block, address, size);
710 }
711
712 static void *create_page_directory(void)
713 {
714     dword_t *current = (dword_t*)PAGE_DIRECTORY_ADDR;
715     dword_t new_dir_buffer[PAGE_SIZE / sizeof(dword_t)];
716
717     memset(&new_dir_buffer[USER_PAGE_START],
718            0,
719            (USER_PAGE_END - USER_PAGE_START + 1) * sizeof(dword_t));
720
721     memcpy(&new_dir_buffer[KERNEL_PAGE_START],
722            &current[KERNEL_PAGE_START],
723            (KERNEL_PAGE_END - KERNEL_PAGE_START + 1) * sizeof(dword_t));
724
725     void *directory = alloc_physical_page();
726     if (directory == NULL) return NULL;
727
728     new_dir_buffer[PAGEDIR_SELF_ENTRY] = (dword_t)directory | PAGE_PRESENT | PAGE_WRITABLE;
729     write_physical(directory, new_dir_buffer, PAGE_SIZE);
730
731     return directory;
732 }
733
734 static void fix_overlapping_sections(multiboot_mmap_t *mmap_addr, dword_t mmap_length)
735 {
736     multiboot_mmap_t *mmap = mmap_addr;
737
738     while ((dword_t)mmap < (dword_t)mmap_addr + mmap_length)
739     {
740         multiboot_mmap_t *ptr = (multiboot_mmap_t*)mmap_addr;
741
742         while ((dword_t)ptr < (dword_t) mmap)
743         {
744             qword_t mmap_end = mmap->base + mmap->length;
745             qword_t ptr_end = ptr->base + ptr->length;
746
747             if (mmap->base > ptr->base && mmap->base < ptr_end)
748             {
749                 mmap->base = ptr_end;
750                 if (mmap->base >= mmap_end) mmap->length = 0;
751                 else mmap->length = mmap_end - mmap->base;
752             }
753             else if (ptr->base > mmap->base && ptr->base < mmap_end)
754             {
755                 ptr->base = mmap_end;
756                 if (ptr->base >= ptr_end) ptr->length = 0;
757                 else ptr->length = ptr_end - ptr->base;
758             }
759
760             ptr = (multiboot_mmap_t*)((dword_t)ptr + ptr->size + sizeof(dword_t));
761         }
762
763         mmap = (multiboot_mmap_t*)((dword_t)mmap + mmap->size + sizeof(dword_t));
764     }
765 }
766
767 static inline memory_block_t *combine_blocks_forward(memory_block_t *mem_block)
768 {
769     while (TRUE)
770     {
771         avl_tree_t *next = avl_get_next_node(&mem_block->by_addr_tree);
772         if (!next) break;
773
774         memory_block_t *next_block = CONTAINER_OF(next, memory_block_t, by_addr_tree);
775         if (!(next_block->flags & MEMORY_BLOCK_FREE)) break;
776
777         avl_tree_change_key(&mem_block->address_space->by_size_tree_root,
778                             &mem_block->by_size_tree,
779                             (dword_t)mem_block->by_size_tree.key
780                             + (dword_t)next_block->by_size_tree.key);
781
782         avl_tree_remove(&mem_block->address_space->by_addr_tree_root, &next_block->by_addr_tree);
783         avl_tree_remove(&mem_block->address_space->by_size_tree_root, &next_block->by_size_tree);
784         mem_tree_free(next_block);
785     }
786
787     return mem_block;
788 }
789
790 static inline memory_block_t *combine_blocks_backward(memory_block_t *mem_block)
791 {
792     while (TRUE)
793     {
794         avl_tree_t *previous = avl_get_previous_node(&mem_block->by_addr_tree);
795         if (!previous) break;
796
797         memory_block_t *prev_block = CONTAINER_OF(previous, memory_block_t, by_addr_tree);
798         if (!(prev_block->flags & MEMORY_BLOCK_FREE)) break;
799
800         avl_tree_change_key(&mem_block->address_space->by_size_tree_root,
801                             &prev_block->by_size_tree,
802                             (dword_t)prev_block->by_size_tree.key
803                             + (dword_t)mem_block->by_size_tree.key);
804
805         avl_tree_remove(&mem_block->address_space->by_addr_tree_root, &mem_block->by_addr_tree);
806         avl_tree_remove(&mem_block->address_space->by_size_tree_root, &mem_block->by_size_tree);
807         mem_tree_free(mem_block);
808
809         mem_block = prev_block;
810     }
811
812     return mem_block;
813 }
814
815 void memory_cleanup(object_t *obj)
816 {
817     memory_section_t *section = (memory_section_t*)obj;
818     if (section->file) dereference(&section->file->header);
819 }
820
821 void *get_page_directory(void)
822 {
823     return current_page_directory;
824 }
825
826 void set_page_directory(void *phys_addr)
827 {
828     current_page_directory = phys_addr;
829
830     asm volatile ("mov %0, %%eax\n\
831                    mov %%eax, %%cr3" :: "r"(phys_addr));
832 }
833
834 void *get_physical_address(void *virtual)
835 {
836     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
837     dword_t pd_index = ADDR_TO_PDE(virt_addr), pt_index = ADDR_TO_PTE(virt_addr);
838     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
839     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
840
841     if (!(page_directory[pd_index] & PAGE_PRESENT)) return INVALID_PAGE;
842     if (!(page_table[pt_index] & PAGE_PRESENT)) return INVALID_PAGE;
843
844     return (void*)PAGE_ALIGN(page_table[pt_index]);
845 }
846
847 dword_t map_memory_internal(void *physical, void *virtual, uintptr_t size, dword_t page_flags)
848 {
849     dword_t i, j;
850     dword_t phys_addr = PAGE_ALIGN((dword_t)physical);
851     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
852     size = PAGE_ALIGN_UP(size);
853     page_flags &= 0xFFF;
854
855     for (i = 0; i < size; i += PAGE_SIZE)
856     {
857         dword_t ret = map_page((void*)(phys_addr + i), (void*)(virt_addr + i), page_flags);
858         if (ret != ERR_SUCCESS)
859         {
860             for (j = 0; j < i; j += PAGE_SIZE) unmap_page((void*)(virt_addr + j));
861             release_resource(&mapping_space.resource);
862             return ret;
863         }
864     }
865
866     return ERR_SUCCESS;
867 }
868
869 void unmap_memory_internal(void *virtual, dword_t size)
870 {
871     dword_t i;
872     dword_t virt_addr = PAGE_ALIGN((dword_t)virtual);
873     size = PAGE_ALIGN_UP(size);
874
875     for (i = 0; i < size; i += PAGE_SIZE)
876     {
877         void *page_addr = (void*)(virt_addr + i);
878         void *physical = get_physical_address(page_addr);
879
880         unmap_page(page_addr);
881         dereference_page(physical);
882     }
883 }
884
885 dword_t map_memory_in_address_space(memory_address_space_t *address_space,
886                                     void *physical,
887                                     void **virtual,
888                                     uintptr_t size,
889                                     dword_t block_flags)
890 {
891     dword_t ret;
892     void *address = (void*)PAGE_ALIGN((uintptr_t)*virtual);
893
894     acquire_resource_exclusive(&address_space->resource);
895
896     memory_block_t *block = find_free_block(address_space, address, size);
897     if (block == NULL)
898     {
899         release_resource(&address_space->resource);
900         return ERR_NOMEMORY;
901     }
902
903     dword_t flags = PAGE_GLOBAL;
904     dword_t real_address = (address != NULL) ? (dword_t)address : (dword_t)block->by_addr_tree.key;
905
906     block_flags &= ~MEMORY_BLOCK_EVICTABLE;
907     if (block_flags & MEMORY_BLOCK_ACCESSIBLE) flags |= PAGE_PRESENT;
908     if (block_flags & MEMORY_BLOCK_WRITABLE) flags |= PAGE_WRITABLE;
909     if (block_flags & MEMORY_BLOCK_USERMODE) flags |= PAGE_USERMODE;
910
911     ret = map_memory_internal(physical, (void*)real_address, size, flags);
912     if (ret != ERR_SUCCESS)
913     {
914         release_resource(&address_space->resource);
915         return ret;
916     }
917
918     if ((dword_t)block->by_addr_tree.key < (dword_t)address)
919     {
920         memory_block_t *new_block = mem_tree_alloc();
921         new_block->flags = MEMORY_BLOCK_FREE;
922         new_block->by_addr_tree.key = block->by_addr_tree.key;
923         new_block->by_size_tree.key = (qword_t)(((dword_t)address - block->by_addr_tree.key) / PAGE_SIZE);
924         new_block->address_space = address_space;
925         new_block->section = NULL;
926
927         avl_tree_change_key(&address_space->by_size_tree_root,
928                             &block->by_size_tree,
929                             (dword_t)block->by_size_tree.key - (dword_t)new_block->by_size_tree.key);
930         avl_tree_change_key(&address_space->by_addr_tree_root, &block->by_addr_tree, (dword_t)address);
931
932         avl_tree_insert(&address_space->by_addr_tree_root, &new_block->by_addr_tree);
933         avl_tree_insert(&address_space->by_size_tree_root, &new_block->by_size_tree);
934
935         combine_blocks_backward(new_block);
936     }
937
938     if ((dword_t)block->by_size_tree.key > size)
939     {
940         memory_block_t *new_block = mem_tree_alloc();
941         new_block->flags = MEMORY_BLOCK_FREE;
942         new_block->by_addr_tree.key = (qword_t)(block->by_addr_tree.key + (size * PAGE_SIZE));
943         new_block->by_size_tree.key = (qword_t)((dword_t)block->by_size_tree.key - size);
944         new_block->address_space = address_space;
945         new_block->section = NULL;
946
947         avl_tree_change_key(&address_space->by_size_tree_root, &block->by_size_tree, size);
948
949         avl_tree_insert(&address_space->by_addr_tree_root, &new_block->by_addr_tree);
950         avl_tree_insert(&address_space->by_size_tree_root, &new_block->by_size_tree);
951
952         combine_blocks_forward(new_block);
953     }
954
955     block->flags = block_flags;
956     *virtual = (void*)((dword_t)block->by_addr_tree.key);
957
958     release_resource(&address_space->resource);
959     return ERR_SUCCESS;
960 }
961
962 dword_t pin_memory(const void *virtual, void **pinned, uintptr_t size, bool_t lock_contents)
963 {
964     uintptr_t i;
965     uintptr_t virt_addr = PAGE_ALIGN((uintptr_t)virtual);
966     void *address = (void*)PAGE_ALIGN((uintptr_t)*pinned);
967     size = PAGE_ALIGN_UP(size);
968
969     memory_address_space_t *address_space = check_usermode(virtual, 1) ? &get_current_process()->memory_space : &kernel_address_space;
970     acquire_resource_shared(&address_space->resource);
971     acquire_resource_exclusive(&mapping_space.resource);
972
973     memory_block_t *block = find_free_block(&mapping_space, address, size);
974     if (block == NULL)
975     {
976         release_resource(&address_space->resource);
977         release_resource(&mapping_space.resource);
978         return ERR_NOMEMORY;
979     }
980
981     dword_t real_address = (address != NULL) ? (dword_t)address : (dword_t)block->by_addr_tree.key;
982     dword_t new_flags = PAGE_PRESENT | PAGE_GLOBAL;
983     if (!lock_contents) new_flags |= PAGE_WRITABLE;
984
985     for (i = 0; i < size; i += PAGE_SIZE)
986     {
987         void *virt_page = (void*)(virt_addr + i);
988         void *phys_page = get_physical_address(virt_page);
989
990         if (lock_contents)
991         {
992             memory_block_t *block = find_block_by_addr(address_space, (void*)(virt_addr + i));
993             ASSERT(block != NULL);
994             block->flags |= MEMORY_BLOCK_COPY_ON_WRITE;
995             set_page_flags(virt_page, get_page_flags(virt_page) & ~PAGE_WRITABLE);
996         }
997
998         dword_t ret = map_page(phys_page, (void*)(real_address + i), new_flags);
999         ASSERT(ret == ERR_SUCCESS);
1000         reference_page(phys_page);
1001     }
1002
1003     if ((dword_t)block->by_addr_tree.key < (dword_t)address)
1004     {
1005         memory_block_t *new_block = mem_tree_alloc();
1006         new_block->flags = MEMORY_BLOCK_FREE;
1007         new_block->by_addr_tree.key = block->by_addr_tree.key;
1008         new_block->by_size_tree.key = (qword_t)(((dword_t)address - block->by_addr_tree.key) / PAGE_SIZE);
1009         new_block->address_space = &mapping_space;
1010         new_block->section = NULL;
1011
1012         avl_tree_change_key(&mapping_space.by_size_tree_root,
1013                             &block->by_size_tree,
1014                             (dword_t)block->by_size_tree.key - (dword_t)new_block->by_size_tree.key);
1015         avl_tree_change_key(&mapping_space.by_addr_tree_root, &block->by_addr_tree, (dword_t)address);
1016
1017         avl_tree_insert(&mapping_space.by_addr_tree_root, &new_block->by_addr_tree);
1018         avl_tree_insert(&mapping_space.by_size_tree_root, &new_block->by_size_tree);
1019
1020         combine_blocks_backward(new_block);
1021     }
1022
1023     if ((dword_t)block->by_size_tree.key > size)
1024     {
1025         memory_block_t *new_block = mem_tree_alloc();
1026         new_block->flags = MEMORY_BLOCK_FREE;
1027         new_block->by_addr_tree.key = (qword_t)(block->by_addr_tree.key + (size * PAGE_SIZE));
1028         new_block->by_size_tree.key = (qword_t)((dword_t)block->by_size_tree.key - size);
1029         new_block->address_space = &mapping_space;
1030         new_block->section = NULL;
1031
1032         avl_tree_change_key(&mapping_space.by_size_tree_root, &block->by_size_tree, size);
1033
1034         avl_tree_insert(&mapping_space.by_addr_tree_root, &new_block->by_addr_tree);
1035         avl_tree_insert(&mapping_space.by_size_tree_root, &new_block->by_size_tree);
1036
1037         combine_blocks_forward(new_block);
1038     }
1039
1040     block->flags = MEMORY_BLOCK_ACCESSIBLE;
1041     if (!lock_contents) block->flags |= MEMORY_BLOCK_WRITABLE;
1042     *pinned = (void*)((dword_t)block->by_addr_tree.key) + PAGE_OFFSET((uintptr_t)virtual);
1043
1044     release_resource(&address_space->resource);
1045     release_resource(&mapping_space.resource);
1046     return ERR_SUCCESS;
1047 }
1048
1049 dword_t unmap_memory_in_address_space(memory_address_space_t *address_space, void *virtual)
1050 {
1051     acquire_resource_exclusive(&mapping_space.resource);
1052
1053     avl_tree_t *node = avl_tree_lookup(mapping_space.by_addr_tree_root, (dword_t)virtual);
1054     if (node == NULL)
1055     {
1056         release_resource(&mapping_space.resource);
1057         return ERR_INVALID;
1058     }
1059
1060     memory_block_t *mem_block = CONTAINER_OF(node, memory_block_t, by_addr_tree);
1061     if (mem_block->flags & MEMORY_BLOCK_FREE)
1062     {
1063         release_resource(&mapping_space.resource);
1064         return ERR_INVALID;
1065     }
1066
1067     unmap_memory_internal((void*)((dword_t)mem_block->by_addr_tree.key), (dword_t)mem_block->by_size_tree.key);
1068
1069     mem_block->flags = MEMORY_BLOCK_FREE;
1070     mem_block = combine_blocks_backward(mem_block);
1071     mem_block = combine_blocks_forward(mem_block);
1072
1073     release_resource(&mapping_space.resource);
1074     return ERR_SUCCESS;
1075 }
1076
1077 dword_t map_memory(void *physical, void **virtual, uintptr_t size, dword_t block_flags)
1078 {
1079     return map_memory_in_address_space(&mapping_space, physical, virtual, size, block_flags);
1080 }
1081
1082 dword_t unmap_memory(void *virtual)
1083 {
1084     return unmap_memory_in_address_space(&mapping_space, virtual);
1085 }
1086
1087 dword_t alloc_memory_in_address_space(memory_address_space_t *address_space,
1088                                       void **address,
1089                                       dword_t size,
1090                                       dword_t block_flags,
1091                                       memory_section_t *section,
1092                                       qword_t section_offset)
1093 {
1094     void *base_address = (void*)PAGE_ALIGN((uintptr_t)*address);
1095
1096     block_flags &= ~(MEMORY_BLOCK_FREE | MEMORY_BLOCK_COPY_ON_WRITE);
1097     size = PAGE_ALIGN_UP(size) >> 12;
1098     if (size == 0) return ERR_INVALID;
1099
1100     acquire_resource_exclusive(&address_space->resource);
1101
1102     memory_block_t *block = find_free_block(address_space, base_address, size);
1103     if (block == NULL)
1104     {
1105         release_resource(&address_space->resource);
1106         return ERR_NOMEMORY;
1107     }
1108
1109     if (section)
1110     {
1111         reference(&section->header);
1112         block->section = section;
1113         block->section_offset = section_offset;
1114
1115         if ((section->flags & (MEMORY_SECTION_WRITABLE | MEMORY_SECTION_DIRECT_WRITE)) == MEMORY_SECTION_WRITABLE)
1116         {
1117             block_flags |= MEMORY_BLOCK_COPY_ON_WRITE;
1118         }
1119     }
1120
1121     if ((dword_t)block->by_addr_tree.key < (dword_t)base_address)
1122     {
1123         memory_block_t *new_block = mem_tree_alloc();
1124         new_block->flags = MEMORY_BLOCK_FREE;
1125         new_block->by_addr_tree.key = block->by_addr_tree.key;
1126         new_block->by_size_tree.key = (qword_t)(((dword_t)base_address - block->by_addr_tree.key) / PAGE_SIZE);
1127         new_block->address_space = address_space;
1128         new_block->section = NULL;
1129
1130         avl_tree_change_key(&address_space->by_size_tree_root,
1131                             &block->by_size_tree,
1132                             (dword_t)block->by_size_tree.key - (dword_t)new_block->by_size_tree.key);
1133         avl_tree_change_key(&address_space->by_addr_tree_root, &block->by_addr_tree, (dword_t)base_address);
1134
1135         avl_tree_insert(&address_space->by_addr_tree_root, &new_block->by_addr_tree);
1136         avl_tree_insert(&address_space->by_size_tree_root, &new_block->by_size_tree);
1137
1138         combine_blocks_backward(new_block);
1139     }
1140
1141     if ((dword_t)block->by_size_tree.key > size)
1142     {
1143         memory_block_t *new_block = mem_tree_alloc();
1144         new_block->flags = MEMORY_BLOCK_FREE;
1145         new_block->by_addr_tree.key = (qword_t)(block->by_addr_tree.key + (size * PAGE_SIZE));
1146         new_block->by_size_tree.key = (qword_t)((dword_t)block->by_size_tree.key - size);
1147         new_block->address_space = address_space;
1148         new_block->section = NULL;
1149
1150         avl_tree_change_key(&address_space->by_size_tree_root, &block->by_size_tree, size);
1151
1152         avl_tree_insert(&address_space->by_addr_tree_root, &new_block->by_addr_tree);
1153         avl_tree_insert(&address_space->by_size_tree_root, &new_block->by_size_tree);
1154
1155         combine_blocks_forward(new_block);
1156     }
1157
1158     block->flags = block_flags;
1159     *address = (void*)((dword_t)block->by_addr_tree.key);
1160     if (block_flags & MEMORY_BLOCK_EVICTABLE) list_append(&address_space->evictable_blocks, &block->evict_link);
1161
1162     release_resource(&address_space->resource);
1163     return ERR_SUCCESS;
1164 }
1165
1166 dword_t free_memory_in_address_space(memory_address_space_t *address_space, void *address)
1167 {
1168     acquire_resource_exclusive(&address_space->resource);
1169
1170     avl_tree_t *node = avl_tree_lookup(address_space->by_addr_tree_root, (dword_t)address);
1171     if (node == NULL)
1172     {
1173         release_resource(&address_space->resource);
1174         return ERR_INVALID;
1175     }
1176
1177     memory_block_t *mem_block = CONTAINER_OF(node, memory_block_t, by_addr_tree);
1178     if (mem_block->flags & MEMORY_BLOCK_FREE)
1179     {
1180         release_resource(&address_space->resource);
1181         return ERR_INVALID;
1182     }
1183
1184     release_memory_block(mem_block);
1185
1186     if (mem_block->flags & MEMORY_BLOCK_EVICTABLE) list_remove(&mem_block->evict_link);
1187     mem_block->flags = MEMORY_BLOCK_FREE;
1188
1189     mem_block = combine_blocks_backward(mem_block);
1190     mem_block = combine_blocks_forward(mem_block);
1191
1192     release_resource(&address_space->resource);
1193     return ERR_SUCCESS;
1194 }
1195
1196 dword_t commit_pages(void *address, size_t size)
1197 {
1198     uintptr_t i;
1199     uintptr_t first_page = PAGE_ALIGN((uintptr_t)address);
1200     uintptr_t last_page = PAGE_ALIGN_UP(first_page + size - 1);
1201
1202     EH_TRY
1203     {
1204         for (i = first_page; i <= last_page; i += PAGE_SIZE)
1205         {
1206             volatile uintptr_t value = *(volatile uintptr_t*)i;
1207             UNUSED_PARAMETER(value);
1208         }
1209     }
1210     EH_CATCH
1211     {
1212         EH_ESCAPE(return ERR_BADPTR);
1213     }
1214     EH_DONE;
1215
1216     return ERR_SUCCESS;
1217 }
1218
1219 dword_t uncommit_pages(void *address, size_t size)
1220 {
1221     uintptr_t i;
1222     uintptr_t first_page = PAGE_ALIGN((uintptr_t)address);
1223     uintptr_t last_page = PAGE_ALIGN_UP(first_page + size - 1);
1224
1225     EH_TRY
1226     {
1227         for (i = first_page; i <= last_page; i += PAGE_SIZE)
1228         {
1229             volatile uintptr_t value = *(volatile uintptr_t*)i;
1230             UNUSED_PARAMETER(value);
1231
1232             dword_t ret = unmap_page((void*)i);
1233             if (ret != ERR_SUCCESS) return ret;
1234         }
1235     }
1236     EH_CATCH
1237     {
1238         EH_ESCAPE(return ERR_BADPTR);
1239     }
1240     EH_DONE;
1241
1242     return ERR_SUCCESS;
1243 }
1244
1245 dword_t read_physical(void *physical, void *buffer, dword_t size)
1246 {
1247     critical_t critical;
1248     dword_t ret = ERR_SUCCESS;
1249     dword_t page;
1250     dword_t first_page = PAGE_ALIGN((dword_t)physical);
1251     dword_t last_page = PAGE_ALIGN((dword_t)physical + size - 1);
1252     dword_t offset = PAGE_OFFSET((dword_t)physical);
1253
1254     enter_critical(&critical);
1255
1256     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1257     {
1258         dword_t length = ((page == last_page) ? ((dword_t)physical + size - page) : PAGE_SIZE) - offset;
1259
1260         void *mapping = map_temporary_page((void*)page, PAGE_PRESENT);
1261         if (mapping == NULL) return ERR_NOMEMORY;
1262
1263         memcpy(buffer, (void*)((dword_t)mapping + offset), length);
1264         unmap_temporary_page(mapping);
1265
1266         buffer = (void*)((dword_t)buffer + length);
1267         offset = 0;
1268     }
1269
1270     leave_critical(&critical);
1271     return ret;
1272 }
1273
1274 dword_t write_physical(void *physical, void *buffer, dword_t size)
1275 {
1276     critical_t critical;
1277     dword_t ret = ERR_SUCCESS;
1278     dword_t page;
1279     dword_t first_page = PAGE_ALIGN((dword_t)physical);
1280     dword_t last_page = PAGE_ALIGN((dword_t)physical + size - 1);
1281     dword_t offset = PAGE_OFFSET((dword_t)physical);
1282
1283     enter_critical(&critical);
1284
1285     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1286     {
1287         dword_t length = ((page == last_page) ? ((dword_t)physical + size - page) : PAGE_SIZE) - offset;
1288
1289         void *mapping = map_temporary_page((void*)page, PAGE_PRESENT | PAGE_WRITABLE);
1290         if (mapping == NULL) return ERR_NOMEMORY;
1291
1292         memcpy((void*)((dword_t)mapping + offset), buffer, length);
1293         unmap_temporary_page(mapping);
1294
1295         buffer = (void*)((dword_t)buffer + length);
1296         offset = 0;
1297     }
1298
1299     leave_critical(&critical);
1300     return ret;
1301 }
1302
1303 sysret_t syscall_alloc_memory(handle_t process, void **address, dword_t size, dword_t flags)
1304 {
1305     process_t *proc;
1306     dword_t ret = ERR_SUCCESS;
1307     void *safe_address;
1308     void **local_address = address;
1309
1310     if (get_previous_mode() == USER_MODE)
1311     {
1312         flags &= MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_ACCESSIBLE;
1313         flags |= MEMORY_BLOCK_USERMODE | MEMORY_BLOCK_EVICTABLE;
1314
1315         if (!check_usermode(address, sizeof(void*))) return ERR_BADPTR;
1316
1317         EH_TRY
1318         {
1319             safe_address = *address;
1320             local_address = &safe_address;
1321         }
1322         EH_CATCH
1323         {
1324             EH_ESCAPE(return ERR_BADPTR);
1325         }
1326         EH_DONE;
1327     }
1328
1329     if (process != INVALID_HANDLE)
1330     {
1331         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1332     }
1333     else
1334     {
1335         proc = get_current_process();
1336         reference(&proc->header);
1337     }
1338
1339     ret = alloc_memory_in_address_space(&proc->memory_space, local_address, size, flags, NULL, 0ULL);
1340
1341     if (get_previous_mode() == USER_MODE)
1342     {
1343         EH_TRY *address = safe_address;
1344         EH_DONE;
1345     }
1346
1347     dereference(&proc->header);
1348     return ret;
1349 }
1350
1351 sysret_t syscall_free_memory(handle_t process, void *address)
1352 {
1353     dword_t ret = ERR_SUCCESS;
1354     process_t *proc;
1355
1356     if (process != INVALID_HANDLE)
1357     {
1358         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1359     }
1360     else
1361     {
1362         proc = get_current_process();
1363         reference(&proc->header);
1364     }
1365
1366     ret = free_memory_in_address_space(&proc->memory_space, address);
1367
1368     dereference(&proc->header);
1369     return ret;
1370 }
1371
1372 sysret_t syscall_commit_memory(handle_t process, void *address, dword_t size)
1373 {
1374     dword_t ret = ERR_SUCCESS;
1375     process_t *proc;
1376
1377     if (get_previous_mode() == USER_MODE && !check_usermode(address, size)) return ERR_BADPTR;
1378
1379     if (process == INVALID_HANDLE)
1380     {
1381         proc = get_current_process();
1382         reference(&proc->header);
1383     }
1384     else
1385     {
1386         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1387     }
1388
1389     if (proc->terminating) return ERR_CANCELED;
1390     acquire_resource_shared(&proc->memory_space.resource);
1391
1392     process_t *prev_proc = switch_process(proc);
1393     ret = commit_pages(address, size);
1394     switch_process(prev_proc);
1395
1396     release_resource(&proc->memory_space.resource);
1397     dereference(&proc->header);
1398     return ret;
1399 }
1400
1401 sysret_t syscall_uncommit_memory(handle_t process, void *address, dword_t size)
1402 {
1403     dword_t ret = ERR_SUCCESS;
1404     process_t *proc;
1405
1406     if (get_previous_mode() == USER_MODE && !check_usermode(address, size)) return ERR_BADPTR;
1407
1408     if (process == INVALID_HANDLE)
1409     {
1410         proc = get_current_process();
1411         reference(&proc->header);
1412     }
1413     else
1414     {
1415         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1416     }
1417
1418     if (proc->terminating) return ERR_CANCELED;
1419     acquire_resource_shared(&proc->memory_space.resource);
1420
1421     process_t *prev_proc = switch_process(proc);
1422     ret = uncommit_pages(address, size);
1423     switch_process(prev_proc);
1424
1425     release_resource(&proc->memory_space.resource);
1426     dereference(&proc->header);
1427     return ret;
1428 }
1429
1430 sysret_t syscall_set_memory_flags(handle_t process, void *address, dword_t flags)
1431 {
1432     dword_t ret = ERR_SUCCESS;
1433     process_t *proc;
1434
1435     flags &= ~(MEMORY_BLOCK_FREE | MEMORY_BLOCK_COPY_ON_WRITE);
1436     if (get_previous_mode() == USER_MODE) flags |= MEMORY_BLOCK_USERMODE | MEMORY_BLOCK_EVICTABLE;
1437
1438     if (process != INVALID_HANDLE)
1439     {
1440         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1441     }
1442     else
1443     {
1444         proc = get_current_process();
1445         reference(&proc->header);
1446     }
1447
1448     process_t *prev_proc = switch_process(proc);
1449     acquire_resource_exclusive(&proc->memory_space.resource);
1450
1451     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1452     if (block == NULL)
1453     {
1454         ret = ERR_INVALID;
1455         goto cleanup;
1456     }
1457
1458     if (block->section)
1459     {
1460         if ((flags & MEMORY_BLOCK_WRITABLE) && !(block->section->flags & MEMORY_SECTION_WRITABLE))
1461         {
1462             ret = ERR_FORBIDDEN;
1463             goto cleanup;
1464         }
1465     }
1466
1467     if (block->flags & MEMORY_BLOCK_FREE)
1468     {
1469         ret = ERR_INVALID;
1470         goto cleanup;
1471     }
1472
1473     dword_t page;
1474     dword_t start_address = (dword_t)block->by_addr_tree.key;
1475     dword_t end_address = start_address + (dword_t)block->by_size_tree.key * PAGE_SIZE;
1476     dword_t page_flags = 0;
1477
1478     if (flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
1479     if (flags & MEMORY_BLOCK_WRITABLE) page_flags |= PAGE_WRITABLE;
1480
1481     if (flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
1482     else page_flags |= PAGE_GLOBAL;
1483
1484     for (page = start_address; page < end_address; page += PAGE_SIZE)
1485     {
1486         set_page_flags((void*)page, page_flags);
1487     }
1488
1489     if (!(block->flags & MEMORY_BLOCK_EVICTABLE) && (flags & MEMORY_BLOCK_EVICTABLE))
1490     {
1491         list_append(&proc->memory_space.evictable_blocks, &block->evict_link);
1492     }
1493     else if ((block->flags & MEMORY_BLOCK_EVICTABLE) && !(flags & MEMORY_BLOCK_EVICTABLE))
1494     {
1495         list_remove(&block->evict_link);
1496     }
1497
1498     block->flags &= MEMORY_BLOCK_COPY_ON_WRITE;
1499     block->flags |= flags;
1500
1501 cleanup:
1502     release_resource(&proc->memory_space.resource);
1503     switch_process(prev_proc);
1504     dereference(&proc->header);
1505     return ret;
1506 }
1507
1508 sysret_t syscall_query_memory(handle_t process, void *address, memory_block_info_t *info)
1509 {
1510     dword_t ret = ERR_SUCCESS;
1511     process_t *proc;
1512
1513     if ((get_previous_mode() == USER_MODE) && !check_usermode(info, sizeof(memory_block_info_t)))
1514     {
1515         return ERR_BADPTR;
1516     }
1517
1518     if (process != INVALID_HANDLE)
1519     {
1520         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1521     }
1522     else
1523     {
1524         proc = get_current_process();
1525         reference(&proc->header);
1526     }
1527
1528     acquire_resource_shared(&proc->memory_space.resource);
1529
1530     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1531     if (block == NULL)
1532     {
1533         ret = ERR_INVALID;
1534         goto cleanup;
1535     }
1536
1537     EH_TRY
1538     {
1539         info->address = block->by_addr_tree.key;
1540         info->size = block->by_size_tree.key;
1541         info->flags = block->flags;
1542     }
1543     EH_CATCH
1544     {
1545         ret = ERR_BADPTR;
1546     }
1547     EH_DONE;
1548
1549 cleanup:
1550     release_resource(&proc->memory_space.resource);
1551     dereference(&proc->header);
1552     return ret;
1553 }
1554
1555 sysret_t syscall_read_memory(handle_t process, void *address, void *buffer, dword_t size)
1556 {
1557     dword_t ret = ERR_SUCCESS;
1558     process_t *proc;
1559     byte_t page_cache[PAGE_SIZE];
1560
1561     if (get_previous_mode() == USER_MODE && !check_usermode(buffer, size)) return ERR_BADPTR;
1562
1563     if (process == INVALID_HANDLE)
1564     {
1565         EH_TRY
1566         {
1567             memmove(buffer, address, size);
1568             return ERR_SUCCESS;
1569         }
1570         EH_CATCH
1571         {
1572             EH_ESCAPE(return ERR_FORBIDDEN);
1573         }
1574         EH_DONE;
1575     }
1576
1577     if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1578     if (proc->terminating) return ERR_CANCELED;
1579
1580     acquire_resource_shared(&proc->memory_space.resource);
1581
1582     dword_t page;
1583     dword_t first_page = PAGE_ALIGN((dword_t)address);
1584     dword_t last_page = PAGE_ALIGN((dword_t)address + size - 1);
1585     dword_t offset = PAGE_OFFSET((dword_t)address);
1586
1587     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1588     {
1589         dword_t length = ((page == last_page) ? ((dword_t)address + size - page) : PAGE_SIZE) - offset;
1590
1591         process_t *prev_proc = switch_process(proc);
1592
1593         EH_TRY memcpy(&page_cache[offset], (void*)(page + offset), length);
1594         EH_CATCH ret = ERR_FORBIDDEN;
1595         EH_DONE;
1596
1597         switch_process(prev_proc);
1598         if (ret != ERR_SUCCESS) break;
1599
1600         EH_TRY memcpy(buffer, &page_cache[offset], length);
1601         EH_CATCH ret = ERR_BADPTR;
1602         EH_DONE;
1603
1604         buffer = (void*)((dword_t)buffer + length);
1605         offset = 0;
1606         if (ret != ERR_SUCCESS) break;
1607     }
1608
1609     release_resource(&proc->memory_space.resource);
1610     dereference(&proc->header);
1611     return ret;
1612 }
1613
1614 sysret_t syscall_write_memory(handle_t process, void *address, void *buffer, dword_t size)
1615 {
1616     dword_t ret = ERR_SUCCESS;
1617     process_t *proc;
1618     byte_t page_cache[PAGE_SIZE];
1619
1620     if (get_previous_mode() == USER_MODE && !check_usermode(buffer, size)) return ERR_BADPTR;
1621
1622     if (process == INVALID_HANDLE)
1623     {
1624         EH_TRY
1625         {
1626             memmove(address, buffer, size);
1627             return ERR_SUCCESS;
1628         }
1629         EH_CATCH
1630         {
1631             EH_ESCAPE(return ERR_FORBIDDEN);
1632         }
1633         EH_DONE;
1634     }
1635
1636     if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc)) return ERR_INVALID;
1637     if (proc->terminating) return ERR_CANCELED;
1638
1639     acquire_resource_exclusive(&proc->memory_space.resource);
1640
1641     dword_t page;
1642     dword_t first_page = PAGE_ALIGN((dword_t)address);
1643     dword_t last_page = PAGE_ALIGN((dword_t)address + size - 1);
1644     dword_t offset = PAGE_OFFSET((dword_t)address);
1645
1646     for (page = first_page; page <= last_page; page += PAGE_SIZE)
1647     {
1648         dword_t length = ((page == last_page) ? ((dword_t)address + size - page) : PAGE_SIZE) - offset;
1649
1650         EH_TRY memcpy(&page_cache[offset], buffer, length);
1651         EH_CATCH ret = ERR_BADPTR;
1652         EH_DONE;
1653
1654         if (ret != ERR_SUCCESS) break;
1655         process_t *prev_proc = switch_process(proc);
1656
1657         EH_TRY memcpy((void*)(page + offset), &page_cache[offset], length);
1658         EH_CATCH ret = ERR_FORBIDDEN;
1659         EH_DONE;
1660
1661         switch_process(prev_proc);
1662
1663         buffer = (void*)((dword_t)buffer + length);
1664         offset = 0;
1665         if (ret != ERR_SUCCESS) break;
1666     }
1667
1668     release_resource(&proc->memory_space.resource);
1669     dereference(&proc->header);
1670     return ret;
1671 }
1672
1673 void *alloc_pool(void *address, dword_t size, dword_t block_flags)
1674 {
1675     size = PAGE_ALIGN_UP(size);
1676     void *result = address;
1677
1678     if (alloc_memory_in_address_space(&kernel_address_space,
1679                                       &result,
1680                                       size,
1681                                       block_flags,
1682                                       NULL,
1683                                       0ULL) == ERR_SUCCESS)
1684     {
1685         return result;
1686     }
1687     else
1688     {
1689         return NULL;
1690     }
1691 }
1692
1693 void free_pool(void *address)
1694 {
1695     free_memory_in_address_space(&kernel_address_space, address);
1696 }
1697
1698 sysret_t syscall_create_memory_section(const char *name, handle_t file, size_t max_size, dword_t flags, handle_t *handle)
1699 {
1700     dword_t ret = ERR_SUCCESS;
1701     handle_t safe_handle;
1702     char *safe_name = NULL;
1703
1704     flags &= MEMORY_SECTION_WRITABLE | MEMORY_SECTION_DIRECT_WRITE;
1705     if (flags & MEMORY_SECTION_DIRECT_WRITE) flags |= MEMORY_SECTION_WRITABLE;
1706
1707     if (get_previous_mode() == USER_MODE)
1708     {
1709         dword_t name_length = 0;
1710
1711         EH_TRY name_length = strlen(name);
1712         EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1713         EH_DONE;
1714
1715         if (!check_usermode(name, name_length + 1)) return ERR_BADPTR;
1716         if (!check_usermode(handle, sizeof(handle_t))) return ERR_BADPTR;
1717
1718         safe_name = copy_user_string(name);
1719         if (safe_name == NULL) return ERR_BADPTR;
1720     }
1721     else
1722     {
1723         safe_name = (char*)name;
1724     }
1725
1726     memory_section_t *section = (memory_section_t*)malloc(sizeof(memory_section_t));
1727     if (section == NULL)
1728     {
1729         ret = ERR_NOMEMORY;
1730         goto cleanup;
1731     }
1732
1733     file_instance_t *file_instance = NULL;
1734     if (file != INVALID_HANDLE)
1735     {
1736         if (!reference_by_handle(file, OBJECT_FILE_INSTANCE, (object_t**)&file_instance))
1737         {
1738             ret = ERR_INVALID;
1739             goto cleanup;
1740         }
1741     }
1742
1743     list_init(&section->page_list);
1744     section->flags = flags;
1745     section->size = max_size;
1746     section->file = file != INVALID_HANDLE ? file_instance : NULL;
1747
1748     init_object(&section->header, safe_name, OBJECT_MEMORY);
1749     ret = create_object(&section->header);
1750     if (ret != ERR_SUCCESS)
1751     {
1752         if (file_instance) dereference(&file_instance->header);
1753         if (section->header.name) free(section->header.name);
1754         free(section);
1755         section = NULL;
1756         goto cleanup;
1757     }
1758
1759     ret = open_object(&section->header, 0, &safe_handle);
1760     if (ret == ERR_SUCCESS)
1761     {
1762         EH_TRY
1763         {
1764             *handle = safe_handle;
1765         }
1766         EH_CATCH
1767         {
1768             syscall_close_object(safe_handle);
1769             ret = ERR_BADPTR;
1770         }
1771         EH_DONE;
1772     }
1773
1774 cleanup:
1775     if (section) dereference(&section->header);
1776     if (get_previous_mode() == USER_MODE) free(safe_name);
1777
1778     return ret;
1779 }
1780
1781 sysret_t syscall_open_memory_section(const char *name, handle_t *handle)
1782 {
1783     handle_t safe_handle;
1784     char *safe_name = NULL;
1785
1786     if (get_previous_mode() == USER_MODE)
1787     {
1788         dword_t name_length = 0;
1789
1790         EH_TRY name_length = strlen(name);
1791         EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1792         EH_DONE;
1793
1794         if (!check_usermode(name, name_length + 1)) return ERR_BADPTR;
1795         if (!check_usermode(handle, sizeof(handle_t))) return ERR_BADPTR;
1796
1797         safe_name = copy_user_string(name);
1798         if (safe_name == NULL) return ERR_NOMEMORY;
1799     }
1800     else safe_name = (char*)name;
1801
1802     dword_t ret = open_object_by_name(safe_name, OBJECT_MEMORY, 0, &safe_handle);
1803
1804     EH_TRY
1805     {
1806         *handle = safe_handle;
1807     }
1808     EH_CATCH
1809     {
1810         syscall_close_object(safe_handle);
1811         ret = ERR_BADPTR;
1812     }
1813     EH_DONE;
1814
1815     if (get_previous_mode() == USER_MODE) free(safe_name);
1816     return ret;
1817 }
1818
1819 sysret_t syscall_map_memory_section(handle_t process, handle_t section, void **address, qword_t offset, size_t size, dword_t flags)
1820 {
1821     dword_t ret = ERR_SUCCESS;
1822     process_t *proc = NULL;
1823     memory_section_t *mem_sec = NULL;
1824     void *safe_address;
1825
1826     if (PAGE_OFFSET(offset) != 0) return ERR_INVALID;
1827
1828     if (process != INVALID_HANDLE)
1829     {
1830         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc))
1831         {
1832             ret = ERR_INVALID;
1833             goto cleanup;
1834         }
1835     }
1836     else
1837     {
1838         proc = get_current_process();
1839         reference(&proc->header);
1840     }
1841
1842     if (!reference_by_handle(section, OBJECT_MEMORY, (object_t**)&mem_sec))
1843     {
1844         ret = ERR_INVALID;
1845         goto cleanup;
1846     }
1847
1848     if (get_previous_mode() == USER_MODE)
1849     {
1850         if (!check_usermode(address, sizeof(void*)))
1851         {
1852             ret = ERR_BADPTR;
1853             goto cleanup;
1854         }
1855
1856         EH_TRY safe_address = *address;
1857         EH_CATCH ret = ERR_BADPTR;
1858         EH_DONE;
1859
1860         if (ret != ERR_SUCCESS) goto cleanup;
1861     }
1862     else
1863     {
1864         safe_address = *address;
1865     }
1866
1867     if ((flags & MEMORY_BLOCK_WRITABLE) && !(mem_sec->flags & MEMORY_SECTION_WRITABLE))
1868     {
1869         ret = ERR_FORBIDDEN;
1870         goto cleanup;
1871     }
1872
1873     ret = alloc_memory_in_address_space(&proc->memory_space, &safe_address, size, flags, mem_sec, offset);
1874     if (ret != ERR_SUCCESS) goto cleanup;
1875
1876     EH_TRY *address = safe_address;
1877     EH_DONE;
1878
1879 cleanup:
1880     if (proc) dereference(&proc->header);
1881     if (mem_sec) dereference(&mem_sec->header);
1882     return ret;
1883 }
1884
1885 sysret_t syscall_flush_memory_section(handle_t process, void *address)
1886 {
1887     dword_t ret = ERR_SUCCESS;
1888     process_t *proc = NULL;
1889
1890     if (process != INVALID_HANDLE)
1891     {
1892         if (!reference_by_handle(process, OBJECT_PROCESS, (object_t**)&proc))
1893         {
1894             ret = ERR_INVALID;
1895             goto cleanup;
1896         }
1897     }
1898     else
1899     {
1900         proc = get_current_process();
1901         reference(&proc->header);
1902     }
1903
1904     acquire_resource_shared(&proc->memory_space.resource);
1905
1906     memory_block_t *block = find_block_by_addr(&proc->memory_space, address);
1907     if (block == NULL || block->section == NULL)
1908     {
1909         ret = ERR_INVALID;
1910         goto cleanup;
1911     }
1912
1913     if (block->section->file == NULL) goto cleanup;
1914
1915     list_entry_t *ptr;
1916
1917     for (ptr = block->section->page_list.next; ptr != &block->section->page_list; ptr = ptr->next)
1918     {
1919         dword_t bytes_written;
1920         byte_t buffer[PAGE_SIZE];
1921         shared_page_t *shared = CONTAINER_OF(ptr, shared_page_t, link);
1922
1923         ret = read_physical(shared->physical, buffer, PAGE_SIZE);
1924         if (ret != ERR_SUCCESS) continue;
1925
1926         file_instance_t *file = block->section->file;
1927         acquire_resource_exclusive(&file->global->volume->resource);
1928         ret = file->global->volume->driver->write_file(file, buffer, shared->offset, PAGE_SIZE, &bytes_written);
1929         release_resource(&file->global->volume->resource);
1930         if (ret != ERR_SUCCESS) break;
1931     }
1932
1933 cleanup:
1934     release_resource(&proc->memory_space.resource);
1935     dereference(&proc->header);
1936     return ret;
1937 }
1938
1939 sysret_t syscall_add_page_file(const char *path, dword_t max_entries)
1940 {
1941     dword_t ret;
1942     char *safe_path = NULL;
1943     if (max_entries == INVALID_STORE_NUMBER) max_entries--;
1944
1945     if (get_previous_mode() == USER_MODE)
1946     {
1947         if (!check_privileges(PRIVILEGE_SET_PAGE_FILE)) return ERR_FORBIDDEN;
1948
1949         if (path)
1950         {
1951             dword_t path_length = 0;
1952
1953             EH_TRY path_length = strlen(path);
1954             EH_CATCH EH_ESCAPE(return ERR_BADPTR);
1955             EH_DONE;
1956
1957             if (!check_usermode(path, path_length + 1)) return ERR_BADPTR;
1958
1959             safe_path = copy_user_string(path);
1960             if (!safe_path) return ERR_NOMEMORY;
1961         }
1962     }
1963     else safe_path = (char*)path;
1964
1965     page_store_t *store = (page_store_t*)malloc(sizeof(page_store_t));
1966     if (store == NULL)
1967     {
1968         ret = ERR_NOMEMORY;
1969         goto cleanup;
1970     }
1971
1972     store->bitmap = malloc((max_entries + 7) / 8);
1973     if (store->bitmap == NULL)
1974     {
1975         free(store);
1976         ret = ERR_NOMEMORY;
1977         goto cleanup;
1978     }
1979
1980     memset(store->bitmap, 0, (max_entries + 7) / 8);
1981     store->num_entries = 0;
1982     store->max_entries = max_entries;
1983     list_init(&store->entry_list);
1984
1985     ret = syscall(SYSCALL_OPEN_FILE,
1986                   safe_path,
1987                   &store->file_handle,
1988                   FILE_MODE_READ
1989                   | FILE_MODE_WRITE
1990                   | FILE_MODE_NO_CACHE
1991                   | FILE_MODE_DELETE_ON_CLOSE
1992                   | FILE_MODE_CREATE
1993                   | FILE_MODE_TRUNCATE,
1994                   0);
1995     if (ret != ERR_SUCCESS)
1996     {
1997         free(store->bitmap);
1998         free(store);
1999         goto cleanup;
2000     }
2001
2002     acquire_lock(&page_store_lock);
2003     list_append(&page_stores, &store->link);
2004     release_lock(&page_store_lock);
2005
2006 cleanup:
2007     if (get_previous_mode() == USER_MODE) free(safe_path);
2008     return ret;
2009 }
2010
2011 sysret_t syscall_remove_page_file(const char *path)
2012 {
2013     dword_t ret = ERR_SUCCESS;
2014     char *safe_path = NULL;
2015
2016     if (get_previous_mode() == USER_MODE)
2017     {
2018         if (!check_privileges(PRIVILEGE_SET_PAGE_FILE)) return ERR_FORBIDDEN;
2019
2020         if (path)
2021         {
2022             dword_t path_length = 0;
2023
2024             EH_TRY path_length = strlen(path);
2025             EH_CATCH EH_ESCAPE(return ERR_BADPTR);
2026             EH_DONE;
2027
2028             if (!check_usermode(path, path_length + 1)) return ERR_BADPTR;
2029
2030             safe_path = copy_user_string(path);
2031             if (!safe_path) return ERR_NOMEMORY;
2032         }
2033     }
2034     else safe_path = (char*)path;
2035
2036     list_entry_t *ptr;
2037     page_store_t *store;
2038
2039     acquire_lock(&page_store_lock);
2040
2041     for (ptr = page_stores.next; ptr != &page_stores; ptr = ptr->next)
2042     {
2043         store = CONTAINER_OF(ptr, page_store_t, link);
2044
2045         char *name_buffer = NULL;
2046         size_t name_buffer_size = 256;
2047
2048         while (TRUE)
2049         {
2050             char *name_buffer = malloc(name_buffer_size);
2051             if (!name_buffer) break;
2052
2053             ret = syscall(SYSCALL_QUERY_FILE, store->file_handle, name_buffer, name_buffer_size);
2054             if (ret != ERR_SUCCESS) free(name_buffer);
2055             if (ret != ERR_SMALLBUF) break;
2056
2057             name_buffer_size *= 2;
2058         }
2059
2060         if (ret == ERR_SUCCESS)
2061         {
2062             bool_t found = strcmp(name_buffer, safe_path) == 0;
2063             if (name_buffer) free(name_buffer);
2064             if (found) break;
2065         }
2066     }
2067
2068     if (ptr == &page_stores)
2069     {
2070         ret = ERR_NOTFOUND;
2071         release_lock(&page_store_lock);
2072         goto cleanup;
2073     }
2074
2075     list_remove(&store->link);
2076     release_lock(&page_store_lock);
2077
2078     for (ptr = store->entry_list.next; ptr != &store->entry_list; ptr = ptr->next)
2079     {
2080         process_t *old_process;
2081         byte_t buffer[PAGE_SIZE];
2082         dword_t bytes_read;
2083         dword_t page_flags = 0;
2084         page_store_entry_t *entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2085
2086         ret = syscall_read_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_read);
2087         if (ret != ERR_SUCCESS) break;
2088
2089         acquire_resource_exclusive(&entry->address_space->resource);
2090         memory_block_t *block = find_block_by_addr(entry->address_space, entry->address);
2091
2092         if (block->flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
2093         if ((block->flags & (MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_COPY_ON_WRITE))
2094             == MEMORY_BLOCK_WRITABLE)
2095         {
2096             page_flags |= PAGE_WRITABLE;
2097         }
2098
2099         if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2100         else page_flags |= PAGE_GLOBAL;
2101
2102         if (entry->address_space != &kernel_address_space)
2103         {
2104             old_process = switch_process(CONTAINER_OF(entry->address_space, process_t, memory_space));
2105         }
2106
2107         ret = alloc_page(entry->address, page_flags);
2108         if (ret != ERR_SUCCESS) goto loop_cleanup;
2109
2110         list_entry_t *p;
2111         for (p = store->entry_list.next; p != &store->entry_list; p = ptr->next)
2112         {
2113             page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2114
2115             if (entry != other_entry && other_entry->number == entry->number)
2116             {
2117                 list_remove(&other_entry->link);
2118                 list_append(&transition_pages, &other_entry->link);
2119
2120                 other_entry->physical = get_physical_address(entry->address);
2121                 other_entry->number = INVALID_STORE_NUMBER;
2122             }
2123         }
2124
2125         clear_bit(store->bitmap, entry->number);
2126         list_remove(&entry->link);
2127
2128         memcpy(entry->address, buffer, PAGE_SIZE);
2129         free(entry);
2130
2131 loop_cleanup:
2132         if (entry->address_space != &kernel_address_space) switch_process(old_process);
2133         release_resource(&entry->address_space->resource);
2134     }
2135
2136     free(store);
2137
2138 cleanup:
2139     if (ret != ERR_SUCCESS)
2140     {
2141         acquire_lock(&page_store_lock);
2142         list_append(&page_stores, &store->link);
2143         release_lock(&page_store_lock);
2144     }
2145
2146     if (get_previous_mode() == USER_MODE) free(safe_path);
2147     return ret;
2148 }
2149
2150 dword_t create_address_space(void *base_address, dword_t page_count, memory_address_space_t *mem_space)
2151 {
2152     dword_t ret = ERR_NOMEMORY;
2153
2154     mem_space->pool_address = base_address;
2155     mem_space->pool_size = page_count;
2156     mem_space->by_addr_tree_root = mem_space->by_size_tree_root = NULL;
2157     mem_space->resource = 0;
2158     list_init(&mem_space->evictable_blocks);
2159     mem_space->evict_blk_ptr = NULL;
2160     mem_space->evict_page_num = 0;
2161     mem_space->stats.used_virtual = 0;
2162     mem_space->stats.committed = 0;
2163     mem_space->stats.evicted = 0;
2164     mem_space->stats.shared = 0;
2165
2166     if (get_page_directory() != INVALID_PAGE)
2167     {
2168         mem_space->page_directory = create_page_directory();
2169         if (mem_space->page_directory == NULL) return ret;
2170     }
2171     else
2172     {
2173         dword_t *boot_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2174         mem_space->page_directory = (void*)PAGE_ALIGN(boot_directory[PAGEDIR_SELF_ENTRY]);
2175     }
2176
2177     memory_block_t *initial = mem_tree_alloc();
2178     if (initial != NULL)
2179     {
2180         initial->by_addr_tree.key = (qword_t)((dword_t)base_address);
2181         initial->by_size_tree.key = (qword_t)page_count;
2182         initial->flags = MEMORY_BLOCK_FREE;
2183         initial->address_space = mem_space;
2184         initial->section = NULL;
2185
2186         avl_tree_insert(&mem_space->by_addr_tree_root, &initial->by_addr_tree);
2187         avl_tree_insert(&mem_space->by_size_tree_root, &initial->by_size_tree);
2188         ret = ERR_SUCCESS;
2189     }
2190
2191     if (mem_space != &kernel_address_space)
2192     {
2193         list_append(&user_address_spaces, &mem_space->link);
2194     }
2195
2196     return ret;
2197 }
2198
2199 dword_t clone_address_space(memory_address_space_t *original, memory_address_space_t *clone)
2200 {
2201     dword_t i;
2202     dword_t ret = ERR_SUCCESS;
2203
2204     acquire_resource_shared(&original->resource);
2205
2206     clone->pool_address = original->pool_address;
2207     clone->pool_size = original->pool_size;
2208     clone->by_addr_tree_root = clone->by_size_tree_root = NULL;
2209     clone->resource = 0;
2210     list_init(&clone->evictable_blocks);
2211     clone->evict_blk_ptr = NULL;
2212     clone->evict_page_num = 0;
2213     clone->stats.used_virtual = original->stats.used_virtual;
2214     clone->stats.committed = original->stats.committed;
2215     clone->stats.evicted = original->stats.evicted;
2216     clone->stats.shared = original->stats.committed;
2217
2218     if (original->by_addr_tree_root != NULL)
2219     {
2220         memory_block_t *root_block = CONTAINER_OF(original->by_addr_tree_root, memory_block_t, by_addr_tree);
2221         if (!clone_blocks_recursive(clone, root_block))
2222         {
2223             ret = ERR_NOMEMORY;
2224             goto cleanup;
2225         }
2226     }
2227
2228     if (!(clone->page_directory = create_page_directory()))
2229     {
2230         ret = ERR_NOMEMORY;
2231         goto cleanup;
2232     }
2233
2234     dword_t *clone_dir = map_temporary_page(clone->page_directory, PAGE_PRESENT | PAGE_WRITABLE);
2235     bool_t this_directory = original->page_directory == get_page_directory();
2236
2237     dword_t *original_dir;
2238     if (this_directory) original_dir = (dword_t*)PAGE_DIRECTORY_ADDR;
2239     else original_dir = map_temporary_page(original->page_directory, PAGE_PRESENT | PAGE_WRITABLE);
2240
2241     for (i = USER_PAGE_START; i <= USER_PAGE_END; i++)
2242     {
2243         reference_page((void*)PAGE_ALIGN(original_dir[i]));
2244         original_dir[i] &= ~PAGE_WRITABLE;
2245         clone_dir[i] = original_dir[i];
2246         if (this_directory) invalidate_tlb((void*)(i << 12));
2247     }
2248
2249     if (!this_directory) unmap_temporary_page(original_dir);
2250     unmap_temporary_page(clone_dir);
2251     list_append(&user_address_spaces, &clone->link);
2252
2253 cleanup:
2254     release_resource(&original->resource);
2255     return ret;
2256 }
2257
2258 void bump_address_space(memory_address_space_t *mem_space)
2259 {
2260     list_remove(&mem_space->link);
2261     list_append(&user_address_spaces, &mem_space->link);
2262 }
2263
2264 void delete_address_space(memory_address_space_t *mem_space)
2265 {
2266     ASSERT(get_page_directory() != mem_space->page_directory);
2267     acquire_resource_exclusive(&mem_space->resource);
2268
2269     if (mem_space->by_addr_tree_root)
2270     {
2271         memory_block_t *root = CONTAINER_OF(mem_space->by_addr_tree_root, memory_block_t, by_addr_tree);
2272         free_blocks_recursive(root);
2273         mem_space->by_addr_tree_root = NULL;
2274         mem_space->by_size_tree_root = NULL;
2275     }
2276
2277     free_physical_page(mem_space->page_directory);
2278     mem_space->page_directory = NULL;
2279
2280     release_resource(&mem_space->resource);
2281 }
2282
2283 static bool_t find_evicted_page(memory_block_t *block, void *address, page_store_t **store, page_store_entry_t **entry)
2284 {
2285     list_entry_t *i;
2286
2287     for (i = transition_pages.next; i != &transition_pages; i = i->next)
2288     {
2289         *entry = CONTAINER_OF(i, page_store_entry_t, link);
2290
2291         if ((*entry)->address_space == block->address_space
2292             && PAGE_ALIGN((dword_t)(*entry)->address) == PAGE_ALIGN((dword_t)address))
2293         {
2294             return TRUE;
2295         }
2296     }
2297
2298     for (i = page_stores.next; i != &page_stores; i = i->next)
2299     {
2300         list_entry_t *j;
2301         *store = CONTAINER_OF(i, page_store_t, link);
2302
2303         for (j = (*store)->entry_list.next; j != &(*store)->entry_list; j = j->next)
2304         {
2305             *entry = CONTAINER_OF(j, page_store_entry_t, link);
2306
2307             if ((*entry)->address_space == block->address_space
2308                 && PAGE_ALIGN((dword_t)(*entry)->address) == PAGE_ALIGN((dword_t)address))
2309             {
2310                 return TRUE;
2311             }
2312         }
2313     }
2314
2315     return FALSE;
2316 }
2317
2318 bool_t memory_fault_handler(void *address, registers_t *regs)
2319 {
2320     int i;
2321     page_error_t problem;
2322     dword_t aligned_address = PAGE_ALIGN((dword_t)address);
2323     dword_t pd_index = ADDR_TO_PDE((dword_t)address);
2324     dword_t pt_index = ADDR_TO_PTE((dword_t)address);
2325     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2326     dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + (pd_index << 12));
2327     process_t *proc = get_current_process();
2328
2329     memory_address_space_t *address_space = (proc != NULL && check_usermode(address, 1))
2330                                             ? &proc->memory_space : &kernel_address_space;
2331     memory_block_t *block = find_block_by_addr(address_space, address);
2332     if (block == NULL) return FALSE;
2333
2334     if (!(regs->error_code & PAGE_ERROR_PRESENT_FLAG))
2335     {
2336         problem = PAGE_ERROR_NOTPRESENT;
2337     }
2338     else if (!(block->flags & MEMORY_BLOCK_USERMODE)
2339         && (regs->error_code & PAGE_ERROR_USERMODE_FLAG))
2340     {
2341         problem = PAGE_ERROR_UNPRIVILEGED;
2342     }
2343     else if (regs->error_code & PAGE_ERROR_WRITE_FLAG)
2344     {
2345         problem = PAGE_ERROR_READONLY;
2346     }
2347     else
2348     {
2349         KERNEL_CRASH_WITH_REGS("Unknown paging problem", regs);
2350     }
2351
2352     if ((block->flags & MEMORY_BLOCK_ACCESSIBLE) && (problem == PAGE_ERROR_NOTPRESENT))
2353     {
2354         page_store_t *store = NULL;
2355         page_store_entry_t *entry = NULL;
2356         byte_t buffer[PAGE_SIZE];
2357         dword_t bytes_read;
2358         dword_t page_flags = 0;
2359
2360         if (find_evicted_page(block, address, &store, &entry))
2361         {
2362             if (block->flags & MEMORY_BLOCK_ACCESSIBLE) page_flags |= PAGE_PRESENT;
2363             if ((block->flags & (MEMORY_BLOCK_WRITABLE | MEMORY_BLOCK_COPY_ON_WRITE))
2364                 == MEMORY_BLOCK_WRITABLE)
2365             {
2366                 page_flags |= PAGE_WRITABLE;
2367             }
2368
2369             if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2370             else page_flags |= PAGE_GLOBAL;
2371
2372             if (entry->number != INVALID_STORE_NUMBER)
2373             {
2374                 enable_ints();
2375                 dword_t ret = syscall_read_file(store->file_handle, buffer, (qword_t)entry->number * (qword_t)PAGE_SIZE, PAGE_SIZE, &bytes_read);
2376                 disable_ints();
2377
2378                 if ((page_directory[pd_index] & PAGE_PRESENT) && (page_table[pt_index] & PAGE_PRESENT))
2379                 {
2380                     return TRUE;
2381                 }
2382
2383                 if (ret != ERR_SUCCESS) return FALSE;
2384
2385                 ret = alloc_page((void*)aligned_address, page_flags);
2386                 if (ret != ERR_SUCCESS) return FALSE;
2387
2388                 list_entry_t *ptr;
2389                 for (ptr = store->entry_list.next; ptr != &store->entry_list; ptr = ptr->next)
2390                 {
2391                     page_store_entry_t *other_entry = CONTAINER_OF(ptr, page_store_entry_t, link);
2392
2393                     if (entry != other_entry && other_entry->number == entry->number)
2394                     {
2395                         list_remove(&other_entry->link);
2396                         list_append(&transition_pages, &other_entry->link);
2397
2398                         other_entry->physical = get_physical_address((void*)aligned_address);
2399                         other_entry->number = INVALID_STORE_NUMBER;
2400                     }
2401                 }
2402
2403                 clear_bit(store->bitmap, entry->number);
2404                 list_remove(&entry->link);
2405                 free(entry);
2406
2407                 memcpy((void*)aligned_address, buffer, PAGE_SIZE);
2408                 address_space->stats.evicted -= PAGE_SIZE;
2409                 return TRUE;
2410             }
2411             else
2412             {
2413                 if (map_page(entry->physical, entry->address, page_flags) == ERR_SUCCESS)
2414                 {
2415                     list_remove(&entry->link);
2416                     free(entry);
2417                     address_space->stats.evicted -= PAGE_SIZE;
2418                     return TRUE;
2419                 }
2420             }
2421
2422             return FALSE;
2423         }
2424         else
2425         {
2426             list_entry_t *ptr;
2427             shared_page_t *page = NULL;
2428             qword_t offset = block->section_offset + (qword_t)aligned_address - (qword_t)block->by_addr_tree.key;
2429
2430             page_flags = PAGE_PRESENT;
2431             if (block->flags & MEMORY_BLOCK_WRITABLE) page_flags |= PAGE_WRITABLE;
2432
2433             if (block->flags & MEMORY_BLOCK_USERMODE) page_flags |= PAGE_USERMODE;
2434             else page_flags |= PAGE_GLOBAL;
2435
2436             if (block->section && offset < (qword_t)block->section->size)
2437             {
2438                 ASSERT(PAGE_OFFSET(offset) == 0);
2439
2440                 for (ptr = block->section->page_list.next; ptr != &block->section->page_list; ptr = ptr->next)
2441                 {
2442                     page = CONTAINER_OF(ptr, shared_page_t, link);
2443                     if (page->offset == offset) break;
2444                 }
2445
2446                 if (ptr != &block->section->page_list)
2447                 {
2448                     return (map_page(page->physical, (void*)aligned_address, page_flags) == ERR_SUCCESS);
2449                 }
2450             }
2451
2452             memset(buffer, 0, PAGE_SIZE);
2453
2454             if (block->section && block->section->file && offset < (qword_t)block->section->size)
2455             {
2456                 enable_ints();
2457                 file_instance_t *file = block->section->file;
2458                 acquire_resource_shared(&file->global->volume->resource);
2459                 dword_t ret = file->global->volume->driver->read_file(file, buffer, offset, PAGE_SIZE, &bytes_read);
2460                 release_resource(&file->global->volume->resource);
2461                 disable_ints();
2462                 if (ret != ERR_SUCCESS && ret != ERR_BEYOND) return FALSE;
2463             }
2464
2465             dword_t ret = alloc_page((void*)aligned_address, page_flags | PAGE_WRITABLE);
2466             if (ret != ERR_SUCCESS) return FALSE;
2467
2468             memcpy((void*)aligned_address, buffer, PAGE_SIZE);
2469             set_page_flags((void*)aligned_address, page_flags);
2470
2471             if (block->section && offset < (qword_t)block->section->size)
2472             {
2473                 page = (shared_page_t*)malloc(sizeof(shared_page_t));
2474                 if (page == NULL)
2475                 {
2476                     free_page((void*)aligned_address);
2477                     return FALSE;
2478                 }
2479
2480                 page->physical = get_physical_address((void*)aligned_address);
2481                 page->offset = offset;
2482
2483                 list_append(&block->section->page_list, &page->link);
2484             }
2485
2486             address_space->stats.committed += PAGE_SIZE;
2487             return TRUE;
2488         }
2489     }
2490
2491     if ((block->flags & (MEMORY_BLOCK_COPY_ON_WRITE | MEMORY_BLOCK_WRITABLE))
2492         == (MEMORY_BLOCK_COPY_ON_WRITE | MEMORY_BLOCK_WRITABLE)
2493         && (problem == PAGE_ERROR_READONLY))
2494     {
2495         if (!(page_directory[pd_index] & PAGE_WRITABLE))
2496         {
2497             void *table_phys = (void*)PAGE_ALIGN(page_directory[pd_index]);
2498
2499             if (get_page(table_phys)->ref_count > 1)
2500             {
2501                 void *table_copy = alloc_physical_page();
2502                 if (table_copy == NULL) return FALSE;
2503
2504                 dword_t *temporary = map_temporary_page(table_copy, PAGE_PRESENT | PAGE_WRITABLE);
2505                 if (temporary == NULL)
2506                 {
2507                     free_physical_page(table_copy);
2508                     return FALSE;
2509                 }
2510
2511                 for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++)
2512                 {
2513                     if (page_table[i])
2514                     {
2515                         reference_page((void*)PAGE_ALIGN(page_table[i]));
2516                         temporary[i] = page_table[i] & ~PAGE_WRITABLE;
2517                     }
2518                 }
2519
2520                 unmap_temporary_page(temporary);
2521
2522                 reference_page(table_copy);
2523                 dereference_page(table_phys);
2524
2525                 page_directory[pd_index] = PAGE_ALIGN((dword_t)table_copy)
2526                                            | PAGE_OFFSET(page_directory[pd_index])
2527                                            | PAGE_WRITABLE;
2528                 invalidate_tlb(page_table);
2529             }
2530             else
2531             {
2532                 page_directory[pd_index] |= PAGE_WRITABLE;
2533                 invalidate_tlb(page_table);
2534
2535                 for (i = 0; i < PAGE_SIZE / sizeof(dword_t); i++)
2536                 {
2537                     page_table[i] &= ~PAGE_WRITABLE;
2538                     invalidate_tlb((void*)((pd_index << 22) | (i << 12)));
2539                 }
2540             }
2541         }
2542
2543         if (!(page_table[pt_index] & PAGE_WRITABLE))
2544         {
2545             void *phys = (void*)PAGE_ALIGN(page_table[pt_index]);
2546
2547             if (get_page(phys)->ref_count > 1)
2548             {
2549                 void *page_copy = alloc_physical_page();
2550                 if (page_copy == NULL) return FALSE;
2551
2552                 write_physical(page_copy, (void*)PAGE_ALIGN((dword_t)address), PAGE_SIZE);
2553                 reference_page(page_copy);
2554                 dereference_page(phys);
2555
2556                 page_table[pt_index] = PAGE_ALIGN((dword_t)page_copy)
2557                                        | PAGE_OFFSET(page_table[pt_index])
2558                                        | PAGE_WRITABLE;
2559                 invalidate_tlb((void*)aligned_address);
2560             }
2561             else
2562             {
2563                 page_table[pt_index] |= PAGE_WRITABLE;
2564                 invalidate_tlb((void*)aligned_address);
2565             }
2566         }
2567
2568         return TRUE;
2569     }
2570
2571     return FALSE;
2572 }
2573
2574 void memory_init(multiboot_mmap_t *mmap_addr, dword_t mmap_length)
2575 {
2576     dword_t i, j;
2577     multiboot_mmap_t *mmap = mmap_addr;
2578     dword_t *page_directory = (dword_t*)PAGE_DIRECTORY_ADDR;
2579
2580     fix_overlapping_sections(mmap_addr, mmap_length);
2581
2582     puts("\nMemory map:\n\nBase\t\t\tLength\t\t\tType");
2583     puts("------------------------------------------------------------");
2584
2585     while ((dword_t)mmap < (dword_t)mmap_addr + mmap_length)
2586     {
2587         printf("0x%08X%08X\t0x%08X%08X\t%s\n",
2588                mmap->base_high,
2589                mmap->base_low,
2590                mmap->length_high,
2591                mmap->length_low,
2592                (mmap->type == 1) ? "Usable" : "Not Usable");
2593
2594         if (mmap->type == 1
2595             && mmap->base_high == 0
2596             && mmap->length_high == 0
2597             && mmap->length_low < (0xFFFFFFFF - mmap->base_low)
2598             && mmap->length_low > 0)
2599         {
2600             dword_t start_addr = mmap->base_low;
2601             if (start_addr < MEM_FIRST_PHYS_ADDR) start_addr = MEM_FIRST_PHYS_ADDR;
2602             start_addr = PAGE_ALIGN_UP(start_addr);
2603             dword_t end_addr = PAGE_ALIGN_UP(mmap->base_low + mmap->length_low);
2604             dword_t page = end_addr - PAGE_SIZE;
2605
2606             while (page >= start_addr)
2607             {
2608                 dword_t stack_address = (dword_t)&physical_memory_stack[num_free_pages];
2609                 dword_t pd_index = ADDR_TO_PDE(stack_address);
2610                 dword_t pt_index = ADDR_TO_PTE(stack_address);
2611                 dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + pd_index * PAGE_SIZE);
2612
2613                 if (!(page_directory[pd_index] & PAGE_PRESENT))
2614                 {
2615                     page_directory[pd_index] = start_addr | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2616                     start_addr += PAGE_SIZE;
2617                     invalidate_tlb(page_table);
2618                     memset(page_table, 0, PAGE_SIZE);
2619                     total_physical_pages++;
2620                     continue;
2621                 }
2622
2623                 if (!(page_table[pt_index] & PAGE_PRESENT))
2624                 {
2625                     page_table[pt_index] = start_addr | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2626                     start_addr += PAGE_SIZE;
2627                     invalidate_tlb((void*)stack_address);
2628                     total_physical_pages++;
2629                     continue;
2630                 }
2631
2632                 free_physical_page((void*)page);
2633                 page -= PAGE_SIZE;
2634             }
2635         }
2636
2637         mmap = (multiboot_mmap_t*)((dword_t)mmap + mmap->size + sizeof(dword_t));
2638     }
2639
2640     puts("------------------------------------------------------------");
2641     total_physical_pages += num_free_pages;
2642     pages = (page_t*)(KERNEL_POOL_START - total_physical_pages * sizeof(page_t));
2643
2644     for (i = PAGE_ALIGN((uintptr_t)pages); i < KERNEL_POOL_START; i += PAGE_SIZE)
2645     {
2646         dword_t pd_index = ADDR_TO_PDE(i);
2647         dword_t pt_index = ADDR_TO_PTE(i);
2648         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + pd_index * PAGE_SIZE);
2649
2650         if (!(page_directory[pd_index] & PAGE_PRESENT))
2651         {
2652             page_directory[pd_index] = (uintptr_t)alloc_physical_page() | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2653             invalidate_tlb(page_table);
2654             memset(page_table, 0, PAGE_SIZE);
2655         }
2656
2657         if (!(page_table[pt_index] & PAGE_PRESENT))
2658         {
2659             page_table[pt_index] = (uintptr_t)alloc_physical_page() | PAGE_PRESENT | PAGE_WRITABLE | PAGE_GLOBAL;
2660             invalidate_tlb((void*)i);
2661         }
2662     }
2663
2664     dword_t pages_inserted = 0;
2665
2666     for (i = 0; i < num_free_pages; i++)
2667     {
2668         pages[pages_inserted].phys_addr = PAGE_ALIGN((dword_t)physical_memory_stack[i]);
2669         pages[pages_inserted].ref_count = 0;
2670         pages_inserted++;
2671     }
2672
2673     for (i = KERNEL_PAGE_START; i <= KERNEL_PAGE_END; i++)
2674     {
2675         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + i * PAGE_SIZE);
2676         if (!(page_directory[i] & PAGE_PRESENT)) continue;
2677
2678         for (j = 0; j < PAGE_SIZE / sizeof(dword_t); j++)
2679         {
2680             if (PAGE_ALIGN(page_table[j]) < MEM_FIRST_PHYS_ADDR) continue;
2681
2682             if (page_table[j] & PAGE_PRESENT)
2683             {
2684                 pages[pages_inserted].phys_addr = PAGE_ALIGN((dword_t)page_table[j]);
2685                 pages[pages_inserted].ref_count = 0;
2686                 pages_inserted++;
2687             }
2688         }
2689     }
2690
2691     ASSERT(pages_inserted == total_physical_pages);
2692     qsort(pages, total_physical_pages, sizeof(page_t), compare_page);
2693
2694     init_semaphore(&temporary_page_semaphore, TEMPORARY_PAGES, TEMPORARY_PAGES);
2695
2696     if (create_address_space((void*)KERNEL_POOL_START,
2697                              (KERNEL_POOL_END - KERNEL_POOL_START + PAGE_SIZE - 1) / PAGE_SIZE,
2698                              &kernel_address_space) != ERR_SUCCESS)
2699     {
2700         KERNEL_CRASH("Unable to create kernel address space");
2701     }
2702
2703     if (create_address_space((void*)MAPPING_START,
2704                              (MAPPING_END - MAPPING_START + PAGE_SIZE - 1) / PAGE_SIZE,
2705                              &mapping_space) != ERR_SUCCESS)
2706     {
2707         KERNEL_CRASH("Unable to create mapping space");
2708     }
2709
2710     set_page_directory((void*)PAGE_ALIGN(page_directory[PAGEDIR_SELF_ENTRY]));
2711
2712     for (i = KERNEL_PAGE_START; i <= KERNEL_PAGE_END; i++)
2713     {
2714         dword_t *page_table = (dword_t*)(PAGE_TABLE_ADDR + i * PAGE_SIZE);
2715         if (!(page_directory[i] & PAGE_PRESENT)) continue;
2716
2717         for (j = 0; j < PAGE_SIZE / sizeof(dword_t); j++)
2718         {
2719             if (page_table[j] & PAGE_PRESENT) reference_page((void*)PAGE_ALIGN(page_table[j]));
2720         }
2721     }
2722
2723     for (i = USER_PAGE_START; i <= USER_PAGE_END; i++) page_directory[i] = 0;
2724     set_page_directory(get_page_directory());
2725
2726     if (cpu_features[0] & CPU_FEATURE_PGE)
2727     {
2728         asm volatile ("movl %cr4, %eax\n"
2729                       "orl $0x80, %eax\n"
2730                       "movl %eax, %cr4\n");
2731     }
2732 }