"
[chai.git] / re.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 #include <sys/mman.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7
8 #include "shim.h"
9 #include "jobs.h"
10 #include "memory.h"
11 #include "../oolong/chai-notes.h"
12
13 #define HEAP_FREE_ADDRESS 0x1DABE0000
14
15 int atom_count = 0;
16
17 uint64_t framebuffer;
18
19 uint64_t last_fragment;
20 uint64_t last_tiler;
21
22 void *set_value_helper(int fd) {
23         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_set_value));
24         void* magic = galloc(16);
25
26         struct job_descriptor_header header = {
27                 .exception_status = JOB_NOT_STARTED,
28                 .first_incomplete_task = 0,
29                 .fault_pointer = 0,
30                 .job_descriptor_size = 1, /* 64-bit */
31                 .job_type = JOB_TYPE_SET_VALUE,
32                 .job_barrier = /* 1 */ 0, /* set for first in chain? */
33                 .job_index = atom_count,
34                 .job_dependency_index_1 = 0,
35                 .job_dependency_index_2 = 0,
36                 .next_job = 0 
37         };
38
39         struct payload_set_value payload = {
40                 .out = (uint32_t) magic,
41                 .unknown = 0x03
42         };
43
44         memcpy(packet, &header, sizeof(header));
45         memcpy(packet + sizeof(header), &payload, sizeof(payload));
46
47         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
48         return packet;
49 }
50
51 uint64_t make_mfbd(bool tiler)
52 {
53         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
54         memset(mfbd, 0, sizeof(struct tentative_mfbd));
55
56         /* zeroes */
57         mfbd->block2[0] = (uint32_t) galloc(64);
58         mfbd->block2[1] = (uint32_t) galloc(64);
59         mfbd->ugaT = (uint32_t) galloc(64);
60         mfbd->unknown_gpu_address = (uint32_t) galloc(64);
61
62         /* Unknown contents -- it's a mystery! */
63         mfbd->unknown2 = (uint32_t) galloc(64);
64         mfbd->unknown_gpu_addressN = (uint32_t) galloc(64);
65
66         /* Match traces. TODO decode */
67         mfbd->flags = 0xF0;
68         mfbd->heap_free_address = HEAP_FREE_ADDRESS;
69         mfbd->blah = 0x1F00000000;
70         mfbd->unknown3 = tiler ? 0 : 0xFFFFF8C0;
71         mfbd->unknown1 = 0x1600;
72
73         mfbd->block1[4] = 0x02D801C2;
74         mfbd->block1[6] = 0x02D801C2;
75         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
76         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
77         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
78
79         mfbd->block3[0] = 0x00000158;
80         mfbd->block3[1] = 0x00000420;
81         mfbd->block3[14] = 0x04000000;
82         mfbd->block3[15] = 0x880A8899;
83
84         return (uint64_t ) (uint32_t) mfbd | MFBD | (tiler ? FBD_VERTEX_TILER : FBD_FRAGMENT);
85 }
86
87 void free_mfbd(struct tentative_mfbd *mfbd) {
88         gfree((void*) (uint32_t) mfbd->block2[0]);
89         gfree((void*) (uint32_t) mfbd->block2[1]);
90         gfree((void*) (uint32_t) mfbd->ugaT);
91         gfree((void*) (uint32_t) mfbd->unknown_gpu_address);
92         gfree((void*) (uint32_t) mfbd->unknown_gpu_addressN);
93         gfree((void*) (uint32_t) mfbd->unknown2);
94         gfree((void*) mfbd);
95 }
96
97 int job_chain_fragment(int fd) {
98         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_fragment));
99
100         struct job_descriptor_header header = {
101                 .exception_status = JOB_NOT_STARTED,
102                 .first_incomplete_task = 0,
103                 .fault_pointer = 0,
104                 .job_descriptor_size = JOB_32_BIT,
105                 .job_type = JOB_TYPE_FRAGMENT,
106                 .job_barrier = 0, 
107                 .job_index = atom_count,
108                 .job_dependency_index_1 = 0,
109                 .job_dependency_index_2 = 0,
110                 .next_job = 0 
111         };
112
113         struct payload_fragment payload = {
114                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
115                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
116                 .fragment_fbd = make_mfbd(false)
117         };
118
119         memcpy(packet, &header, sizeof(header));
120         memcpy(packet + sizeof(header), &payload, sizeof(payload));
121         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
122
123         struct base_dependency depNoDep = {
124                 .atom_id = 0,
125                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
126         };
127
128         struct base_dependency depTiler = {
129                 .atom_id = atom_count /* last one */,
130                 .dependency_type = BASE_JD_DEP_TYPE_DATA
131         };
132
133         printf("Framebuffer: %LX\n", framebuffer);
134         uint64_t* resource = calloc(sizeof(u64), 1);
135         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
136
137         /* TODO: free resource */
138
139         struct base_jd_atom_v2 job = {
140                 .jc = (uint32_t) packet,
141                 .extres_list = resource,
142                 .nr_extres = 1,
143                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_FS,
144                 .atom_number = ++atom_count,
145                 .prio = BASE_JD_PRIO_MEDIUM,
146                 .device_nr = 0
147         };
148
149         job.pre_dep[0] = depTiler;
150         job.pre_dep[1] = depNoDep;
151
152         submit_job(fd, job);
153
154         last_fragment = (uint32_t) packet;
155
156         //free_mfbd(mfbd);
157
158         return 0;
159 }
160
161 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
162 {
163         int pages = 1 + (sz >> PAGE_SHIFT);
164
165         uint64_t gpu = alloc_gpu_pages(fd, pages, BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_EX);
166         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
167
168         memcpy(cpu, shader, sz);
169         sync_gpu(fd, cpu, gpu, sz);
170
171         /* TODO: munmap */
172
173         return gpu | (fragment ? 9 : 5); /* Unknown flag */
174 }
175
176 void* vertex_tiler_helper(int fd, bool tiler)
177 {
178         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_vertex_tiler32));
179
180         struct job_descriptor_header header = {
181                 .exception_status = JOB_NOT_STARTED,
182                 .first_incomplete_task = 0,
183                 .fault_pointer = 0,
184                 .job_descriptor_size = JOB_32_BIT,
185                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
186                 .job_barrier = 0, 
187                 .job_index = atom_count,
188                 .job_dependency_index_1 = 0,
189                 .job_dependency_index_2 = 0,
190                 .next_job = 0 
191         };
192
193         /* TODO */
194         uint32_t mode_gooks = tiler ? 0x1403000C : 0x14000000;
195         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
196
197         struct payload_vertex_tiler32 payload = {
198                 .block1 = {
199                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
200                         0x00000000, other_gook, 0x00000000, 0x00000000,
201                         0x00000005, 0x00000000, 0x00000000
202                 },
203                 .zeroes = (uint32_t) galloc(64),
204                 .unknown1 = (uint32_t) galloc(16),
205                 .null1 = 0,
206                 .null2 = 0,
207                 .unknown2 = (uint32_t) galloc(32),
208                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
209                 .vertices = (uint32_t) galloc(sizeof(struct vertex_buffer)),
210                 .unknown4 = (uint32_t) galloc(16),
211                 .unknown5 = (uint32_t) galloc(32),
212                 .unknown6 = (uint32_t) galloc(64),
213                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
214                 .null4 = 0,
215                 .fbd = (uint32_t) make_mfbd(true),
216                 .unknown7 = tiler ? 0 : ((uint32_t) galloc(64) | 1) /* TODO */
217         };
218
219         struct shader_meta *shader = (struct shader_meta*) payload.shader;
220
221         /* TODO: Integrate an assembler */
222 #include "../shader_hex.h"
223         shader->shader = import_shader(fd,
224                         (uint8_t*) (tiler ? fragment_shader : vertex_shader),
225                         tiler ? sizeof(fragment_shader) : sizeof(vertex_shader),
226                         tiler);
227
228         if(!tiler) {
229                 uint32_t ni[] = {
230                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000,
231                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000
232                 };
233
234                 memcpy((void*) payload.unknown2, ni, sizeof(ni));
235         }
236
237         if(tiler) {
238                 /* Lose precision... on purpose? */
239                 payload.unknown7 = (uint32_t) shader->shader;
240         }
241
242         payload.unknown7 = tiler ? 0xDEADBA00 : 0xDEADFA00;
243
244         /* TODO: Decode me! */
245
246         if(tiler) {
247                 shader->unknown1 = 0x0007000000000000;
248                 shader->unknown2 = 0x0000000000020602;
249         } else {
250                 shader->unknown1 = 0x0005000100000000;
251                 shader->unknown2 = 0x0000000000420002;
252         }
253
254         /* I have *no* idea */
255
256         uint32_t *p = (uint32_t*) payload.unknown4;
257         *p = 0x2DEA2200;
258
259         uint64_t pi[] = {
260                 0x0000000017E49000, 0x0000000017E49000, 
261                 0x0000000017E49000, 0x0000000017E49000, 
262                 0x00000000179A2200, 0x0000000017E49000, 
263                 0x0000000017E49000
264         };
265
266         memcpy((void*) payload.unknown6, pi, sizeof(pi));
267
268         if(tiler) {
269                 uint32_t ni[] = {
270                         0xFF800000, 0xFF800000,
271                         0x7F800000, 0x7F800000,
272                         0x00000000, 0x3F800000,
273                         0x00000000, 0x00EF013F,
274                         0x00000000, 0x0000001F,
275                         0x02020000, 0x00000001
276                 };
277
278                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
279         }
280
281         /* TODO: Vertices should be parametric */
282         float vertices[] = {
283                 0.0, 0.0, 0.0,
284                 0.5, 1.0, 0.0,
285                 1.0, 0.0, 0.0
286         };
287
288         struct vertex_buffer *vb = (struct vertex_buffer*) payload.vertices;
289         vb->vertices = (uint64_t) (uint32_t) galloc(sizeof(vertices));
290         memcpy((void*) (uint32_t) vb->vertices, vertices, sizeof(vertices));
291         vb->vertex_size = sizeof(float) * 3;
292         vb->size = sizeof(vertices);
293
294         vb->vertices |= 1; /* TODO flags */
295         
296         /* Use some magic numbers from the traces */
297         uint64_t* unk1 = (uint64_t*) payload.unknown1;
298         unk1[0] = /*0x000000B296271001*/ 0x000000B296271001;
299         unk1[1] = /*0x000000B296273000*/ 0x000000B296273000;
300
301         uint32_t writeBuffer = (uint32_t) galloc(64);
302
303         uint64_t* unk5 = (uint64_t*) payload.unknown5;
304         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0xA3) << 56) | writeBuffer | 1;
305         unk5[1] = 0x0000004000000010;
306
307         if(tiler) {
308                 uint32_t ni[] = {
309                         0x00000001, 0x00000000,
310                         0x00070000, 0x00020602,
311                         0x00000000, 0x00000000,
312                         0x00000000, 0x3712FFFF,
313                         0x44F0FFFF, 0x0007FF00,
314                         0x0007FF00, 0x00000000,
315                         0x00000000, 0x00000000,
316                         0x00000000, 0x00000200,
317                         0x00000000, 0xF0122122,
318                         0x00000000, 0x00000000,
319                         0x00000000, 0xF0122122,
320                         0x00000000, 0xFF800000,
321                         0xFF800000, 0x7F800000,
322                         0x7F800000, 0x00000000,
323                         0x3F800000, 0x00000000,
324                         0xEF013F00, 0x00000000,
325                         0x0000001F, 0x02020000,
326                         0x00000001, 0x00000000
327                 };
328
329                 memcpy(payload.block2, ni, sizeof(ni));
330         } else {
331                 uint32_t ni[] = {
332                         0x00000000, 0x0000000C, 0x00000030, 0x2DEA2200,
333                         0x00000000, 0x00000000, 0x00000000, /* Address to 1 */ 0xCAFEDA01,
334                         0x57000000, 0x00000010, 0x00000040, 0x17E49000,
335                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
336                         0x00000000, 0x17E49000, 0x00000000, 0x179A2200,
337                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
338                         0x00000000, 0x00000000, 0x00000000, 0x43200000,
339                         0x42F00000, 0x3F000000, 0x00000000, 0x43200000,
340                         0x42F00000, 0x3F000000, 0x00000000, 0x00000000
341                 };
342
343                 memcpy(payload.block2, ni, sizeof(ni));
344
345         }
346
347         memcpy(packet, &header, sizeof(header));
348         memcpy(packet + sizeof(header), &payload, sizeof(payload));
349         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
350
351         return packet;
352 }
353
354 int job_chain_vertex_tiler(int fd) {
355         void *set = set_value_helper(fd);
356         void *vertex = vertex_tiler_helper(fd, false);
357         void *tiler = vertex_tiler_helper(fd, true);
358
359         ((struct job_descriptor_header*) set)->next_job = (uint32_t) vertex;
360         ((struct job_descriptor_header*) vertex)->next_job = (uint32_t) tiler;
361
362         struct base_dependency depNoDep = {
363                 .atom_id = 0,
364                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
365         };
366
367         struct base_jd_atom_v2 job = {
368                 .jc = (uint32_t) set,
369                 .extres_list = NULL,
370                 .nr_extres = 0,
371                 .core_req = BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_COHERENT_GROUP,
372                 .atom_number = ++atom_count,
373                 .prio = BASE_JD_PRIO_MEDIUM,
374                 .device_nr = 0
375         };
376
377         job.pre_dep[0] = depNoDep;
378         job.pre_dep[1] = depNoDep;
379
380         submit_job(fd, job);
381
382         last_tiler = (uint32_t) tiler;
383
384         return 0;
385 }
386
387 void job_chain_replay(int fd)
388 {
389         struct base_jd_replay_payload *payload;
390
391         payload = (struct base_jd_replay_payload*) galloc(sizeof(*payload));
392
393         payload->tiler_jc_list = last_tiler;
394         payload->fragment_jc = last_fragment;
395         payload->tiler_heap_free = HEAP_FREE_ADDRESS;
396         payload->fragment_hierarchy_mask = 0;
397         payload->tiler_hierarchy_mask = 0;
398         payload->hierarchy_default_weight = 0x10000;
399         payload->tiler_core_req = BASE_JD_REQ_T | BASE_JD_REQ_COHERENT_GROUP;
400         payload->fragment_core_req = BASE_JD_REQ_FS;
401
402         struct base_dependency depNoDep = {
403                 .atom_id = 0,
404                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
405         };
406
407         struct base_dependency depFragment = {
408                 .atom_id = atom_count,
409                 .dependency_type = BASE_JD_DEP_TYPE_DATA
410         };
411
412         printf("Framebuffer: %LX\n", framebuffer);
413         uint64_t* resource = calloc(sizeof(u64), 1);
414         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
415
416         struct base_jd_atom_v2 job = {
417                 .jc = (uint32_t) payload,
418                 .extres_list = resource,
419                 .nr_extres = 1,
420                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_SOFT_REPLAY,
421                 .atom_number = ++atom_count,
422                 .prio = BASE_JD_PRIO_LOW,
423                 .device_nr = 0
424         };
425
426         job.pre_dep[0] = depFragment;
427         job.pre_dep[1] = depNoDep;
428
429         submit_job(fd, job);
430 }
431
432 extern uint32_t cbma_bottom;
433 extern uint32_t cbma_top;
434
435 int main()
436 {
437         int fd = open_kernel_module();
438
439         init_cbma(fd);
440
441         //size_t fb_size = 29 * 16 * 45 * 16 * 4 * 2;
442
443         // framebuffer = (uint64_t) (uint32_t) galloc(fb_size);
444
445         /* Fake framebuffer to trap accesses */
446         framebuffer = 0x1CAFE0000;
447         printf("Framebuffer: %LX\n", framebuffer);
448
449         job_chain_vertex_tiler(fd);
450         job_chain_fragment(fd);
451         job_chain_replay(fd);
452         sync_gpu(fd, (uint8_t*) cbma_top, cbma_top, cbma_bottom - cbma_top);
453         flush_job_queue(fd);
454
455         sleep(3);
456         printf("Writing\n");
457
458         /* Dump framebuffer to a file */
459         /*uint8_t *fb = (uint8_t*) (uint32_t) framebuffer;
460         FILE *fp = fopen("framebuffer.bin", "wb");
461         fwrite(fb, 1, fb_size, fp);
462         fclose(fp);*/
463
464         /* Hang to prevent the tracer from going bananas */
465
466         while(1);
467
468         return 0;
469 }