d96903b98d004bcdde303b74fefae61c935fde5b
[chai.git] / re.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 #include <sys/mman.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7
8 #include "shim.h"
9 #include "jobs.h"
10 #include "memory.h"
11 #include "../oolong/chai-notes.h"
12
13 #define HEAP_FREE_ADDRESS 0xDABE0000
14
15 int atom_count = 0;
16
17 uint64_t framebuffer;
18
19 uint64_t last_fragment;
20 uint64_t last_tiler;
21
22 void *set_value_helper(int fd) {
23         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_set_value));
24         void* magic = galloc(16);
25
26         struct job_descriptor_header header = {
27                 .exception_status = JOB_NOT_STARTED,
28                 .first_incomplete_task = 0,
29                 .fault_pointer = 0,
30                 .job_descriptor_size = 1, /* 64-bit */
31                 .job_type = JOB_TYPE_SET_VALUE,
32                 .job_barrier = /* 1 */ 0, /* set for first in chain? */
33                 .job_index = atom_count,
34                 .job_dependency_index_1 = 0,
35                 .job_dependency_index_2 = 0,
36                 .next_job = 0 
37         };
38
39         struct payload_set_value payload = {
40                 .out = (uint32_t) magic,
41                 .unknown = 0x03
42         };
43
44         memcpy(packet, &header, sizeof(header));
45         memcpy(packet + sizeof(header), &payload, sizeof(payload));
46
47         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
48         return packet;
49 }
50
51 uint64_t make_mfbd(bool tiler)
52 {
53         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
54         memset(mfbd, 0, sizeof(struct tentative_mfbd));
55
56         /* zeroes */
57         mfbd->block2[0] = (uint32_t) galloc(64);
58         mfbd->block2[1] = (uint32_t) galloc(64);
59         mfbd->ugaT = (uint32_t) galloc(64);
60         mfbd->unknown_gpu_address = (uint32_t) galloc(64);
61
62         /* Unknown contents -- it's a mystery! */
63         mfbd->unknown2 = (uint32_t) galloc(64);
64         mfbd->unknown_gpu_addressN = (uint32_t) galloc(64);
65
66         /* Match traces. TODO decode */
67         mfbd->flags = 0xF0;
68         mfbd->heap_free_address = HEAP_FREE_ADDRESS;
69         mfbd->blah = 0x1F00000000;
70         mfbd->unknown3 = tiler ? 0 : 0xFFFFF8C0;
71         mfbd->unknown1 = 0x1600;
72
73         mfbd->block1[4] = 0x02D801C2;
74         mfbd->block1[6] = 0x02D801C2;
75         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
76         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
77         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
78
79         mfbd->block3[0] = 0x00000158;
80         mfbd->block3[1] = 0x00000420;
81         mfbd->block3[14] = 0x04000000;
82         mfbd->block3[15] = 0x880A8899;
83
84         return (uint64_t ) (uint32_t) mfbd | MFBD | (tiler ? FBD_VERTEX_TILER : FBD_FRAGMENT);
85 }
86
87 void free_mfbd(struct tentative_mfbd *mfbd) {
88         gfree((void*) (uint32_t) mfbd->block2[0]);
89         gfree((void*) (uint32_t) mfbd->block2[1]);
90         gfree((void*) (uint32_t) mfbd->ugaT);
91         gfree((void*) (uint32_t) mfbd->unknown_gpu_address);
92         gfree((void*) (uint32_t) mfbd->unknown_gpu_addressN);
93         gfree((void*) (uint32_t) mfbd->unknown2);
94         gfree((void*) mfbd);
95 }
96
97 int job_chain_fragment(int fd) {
98         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_fragment));
99
100         struct job_descriptor_header header = {
101                 .exception_status = JOB_NOT_STARTED,
102                 .first_incomplete_task = 0,
103                 .fault_pointer = 0,
104                 .job_descriptor_size = JOB_32_BIT,
105                 .job_type = JOB_TYPE_FRAGMENT,
106                 .job_barrier = 0, 
107                 .job_index = atom_count,
108                 .job_dependency_index_1 = 0,
109                 .job_dependency_index_2 = 0,
110                 .next_job = 0 
111         };
112
113         struct payload_fragment payload = {
114                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
115                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
116                 .fragment_fbd = make_mfbd(false)
117         };
118
119         memcpy(packet, &header, sizeof(header));
120         memcpy(packet + sizeof(header), &payload, sizeof(payload));
121         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
122
123         struct base_dependency depNoDep = {
124                 .atom_id = 0,
125                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
126         };
127
128         struct base_dependency depTiler = {
129                 .atom_id = atom_count /* last one */,
130                 .dependency_type = BASE_JD_DEP_TYPE_DATA
131         };
132
133         printf("Framebuffer: %LX\n", framebuffer);
134         uint64_t* resource = calloc(sizeof(u64), 1);
135         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
136
137         /* TODO: free resource */
138
139         struct base_jd_atom_v2 job = {
140                 .jc = (uint32_t) packet,
141                 .extres_list = resource,
142                 .nr_extres = 1,
143                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_FS,
144                 .atom_number = ++atom_count,
145                 .prio = BASE_JD_PRIO_MEDIUM,
146                 .device_nr = 0
147         };
148
149         job.pre_dep[0] = depTiler;
150         job.pre_dep[1] = depNoDep;
151
152         submit_job(fd, job);
153
154         last_fragment = (uint32_t) packet;
155
156         //free_mfbd(mfbd);
157
158         return 0;
159 }
160
161 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
162 {
163         int pages = 1 + (sz >> PAGE_SHIFT);
164
165         uint64_t gpu = alloc_gpu_pages(fd, pages, BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_EX);
166         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
167
168         memcpy(cpu, shader, sz);
169         sync_gpu(fd, cpu, gpu, sz);
170
171         /* TODO: munmap */
172
173         return gpu | (fragment ? 9 : 5); /* Unknown flag */
174 }
175
176 void* vertex_tiler_helper(int fd, bool tiler)
177 {
178         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_vertex_tiler32));
179
180         struct job_descriptor_header header = {
181                 .exception_status = JOB_NOT_STARTED,
182                 .first_incomplete_task = 0,
183                 .fault_pointer = 0,
184                 .job_descriptor_size = JOB_32_BIT,
185                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
186                 .job_barrier = 0, 
187                 .job_index = atom_count,
188                 .job_dependency_index_1 = 0,
189                 .job_dependency_index_2 = 0,
190                 .next_job = 0 
191         };
192
193         /* TODO */
194         uint32_t mode_gooks = tiler ? 0x1403000C : 0x14000000;
195         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
196
197         struct payload_vertex_tiler32 payload = {
198                 .block1 = {
199                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
200                         0x00000000, other_gook, 0x00000000, 0x00000000,
201                         0x00000005, 0x00000000, 0x00000000
202                 },
203                 .zeroes = (uint32_t) galloc(64),
204                 .unknown1 = (uint32_t) galloc(16),
205                 .null1 = 0,
206                 .null2 = 0,
207                 .unknown2 = (uint32_t) galloc(32),
208                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
209                 .vertices = (uint32_t) galloc(sizeof(struct vertex_buffer)),
210                 .unknown4 = (uint32_t) galloc(16),
211                 .unknown5 = (uint32_t) galloc(32),
212                 .unknown6 = (uint32_t) galloc(64),
213                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
214                 .null4 = 0,
215                 .fbd = (uint32_t) make_mfbd(true),
216                 .unknown7 = tiler ? 0 : ((uint32_t) galloc(64) | 1) /* TODO */
217         };
218
219         struct shader_meta *shader = (struct shader_meta*) payload.shader;
220
221         /* TODO: Integrate an assembler */
222 #include "../shader_hex.h"
223         shader->shader = import_shader(fd,
224                         (uint8_t*) (tiler ? fragment_shader : vertex_shader),
225                         tiler ? sizeof(fragment_shader) : sizeof(vertex_shader),
226                         tiler);
227
228         if(!tiler) {
229                 uint32_t ni[] = {
230                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000,
231                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000
232                 };
233
234                 memcpy((void*) payload.unknown2, ni, sizeof(ni));
235         }
236
237         if(tiler) {
238                 /* Lose precision... on purpose? */
239                 payload.unknown7 = (uint32_t) shader->shader;
240         }
241
242         payload.unknown7 = tiler ? 0xDEADBA00 : 0xDEADFA00;
243
244         /* TODO: Decode me! */
245
246         if(tiler) {
247                 shader->unknown1 = 0x0007000000000000;
248                 shader->unknown2 = 0x0000000000020602;
249         } else {
250                 shader->unknown1 = 0x0005000100000000;
251                 shader->unknown2 = 0x0000000000420002;
252         }
253
254         /* I have *no* idea */
255
256         uint32_t *p = (uint32_t*) payload.unknown4;
257         *p = 0x2DEA2200;
258
259         uint64_t pi[] = {
260                 0x0000000017E49000, 0x0000000017E49000, 
261                 0x0000000017E49000, 0x0000000017E49000, 
262                 0x00000000179A2200, 0x0000000017E49000, 
263                 0x0000000017E49000
264         };
265
266         memcpy((void*) payload.unknown6, pi, sizeof(pi));
267
268         if(tiler) {
269                 uint32_t ni[] = {
270                         0xFF800000, 0xFF800000,
271                         0x7F800000, 0x7F800000,
272                         0x00000000, 0x3F800000,
273                         0x00000000, 0x00EF013F,
274                         0x00000000, 0x0000001F,
275                         0x02020000, 0x00000001
276                 };
277
278                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
279         }
280
281         /* TODO: Vertices should be parametric */
282         float vertices[] = {
283                 0.0, 0.0, 0.0,
284                 0.5, 1.0, 0.0,
285                 1.0, 0.0, 0.0
286         };
287
288         struct vertex_buffer *vb = (struct vertex_buffer*) payload.vertices;
289         vb->vertices = (uint64_t) (uint32_t) galloc(sizeof(vertices));
290         memcpy((void*) (uint32_t) vb->vertices, vertices, sizeof(vertices));
291         vb->vertex_size = sizeof(float) * 3;
292         vb->size = sizeof(vertices);
293
294         vb->vertices |= 1; /* TODO flags */
295         
296         /* Use some magic numbers from the traces */
297         uint64_t* unk1 = (uint64_t*) payload.unknown1;
298         unk1[0] = /*0x000000B296271001*/ 0x000000B296271001;
299         unk1[1] = /*0x000000B296273000*/ 0x000000B296273000;
300
301         uint32_t writeBuffer = (uint32_t) galloc(64);
302
303         uint64_t* unk5 = (uint64_t*) payload.unknown5;
304         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0xA3) << 56) | writeBuffer | 1;
305         unk5[1] = 0x0000004000000010;
306
307         if(tiler) {
308                 uint32_t ni[] = {
309                         0x00000001, 0x00000000,
310                         0x00070000, 0x00020602,
311                         0x00000000, 0x00000000,
312                         0x00000000, 0x3712FFFF,
313                         0x44F0FFFF, 0x0007FF00,
314                         0x0007FF00, 0x00000000,
315                         0x00000000, 0x00000000,
316                         0x00000000, 0x00000200,
317                         0x00000000, 0xF0122122,
318                         0x00000000, 0x00000000,
319                         0x00000000, 0xF0122122,
320                         0x00000000, 0xFF800000,
321                         0xFF800000, 0x7F800000,
322                         0x7F800000, 0x00000000,
323                         0x3F800000, 0x00000000,
324                         0xEF013F00, 0x00000000,
325                         0x0000001F, 0x02020000,
326                         0x00000001, 0x00000000
327                 };
328
329                 memcpy(payload.block2, ni, sizeof(ni));
330         }
331
332         memcpy(packet, &header, sizeof(header));
333         memcpy(packet + sizeof(header), &payload, sizeof(payload));
334         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
335
336         return packet;
337 }
338
339 int job_chain_vertex_tiler(int fd) {
340         void *set = set_value_helper(fd);
341         void *vertex = vertex_tiler_helper(fd, false);
342         void *tiler = vertex_tiler_helper(fd, true);
343
344         ((struct job_descriptor_header*) set)->next_job = (uint32_t) vertex;
345         ((struct job_descriptor_header*) vertex)->next_job = (uint32_t) tiler;
346
347         struct base_dependency depNoDep = {
348                 .atom_id = 0,
349                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
350         };
351
352         struct base_jd_atom_v2 job = {
353                 .jc = (uint32_t) set,
354                 .extres_list = NULL,
355                 .nr_extres = 0,
356                 .core_req = BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_COHERENT_GROUP,
357                 .atom_number = ++atom_count,
358                 .prio = BASE_JD_PRIO_MEDIUM,
359                 .device_nr = 0
360         };
361
362         job.pre_dep[0] = depNoDep;
363         job.pre_dep[1] = depNoDep;
364
365         submit_job(fd, job);
366
367         last_tiler = (uint32_t) tiler;
368
369         return 0;
370 }
371
372 void job_chain_replay(int fd)
373 {
374         struct base_jd_replay_payload *payload;
375
376         payload = (struct base_jd_replay_payload*) galloc(sizeof(*payload));
377
378         payload->tiler_jc_list = last_tiler;
379         payload->fragment_jc = last_fragment;
380         payload->tiler_heap_free = HEAP_FREE_ADDRESS;
381         payload->fragment_hierarchy_mask = 0;
382         payload->tiler_hierarchy_mask = 0;
383         payload->hierarchy_default_weight = 0x10000;
384         payload->tiler_core_req = BASE_JD_REQ_T | BASE_JD_REQ_COHERENT_GROUP;
385         payload->fragment_core_req = BASE_JD_REQ_FS;
386
387         struct base_dependency depNoDep = {
388                 .atom_id = 0,
389                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
390         };
391
392         struct base_dependency depFragment = {
393                 .atom_id = atom_count,
394                 .dependency_type = BASE_JD_DEP_TYPE_DATA
395         };
396
397         printf("Framebuffer: %LX\n", framebuffer);
398         uint64_t* resource = calloc(sizeof(u64), 1);
399         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
400
401         struct base_jd_atom_v2 job = {
402                 .jc = (uint32_t) payload,
403                 .extres_list = resource,
404                 .nr_extres = 1,
405                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_SOFT_REPLAY,
406                 .atom_number = ++atom_count,
407                 .prio = BASE_JD_PRIO_LOW,
408                 .device_nr = 0
409         };
410
411         job.pre_dep[0] = depFragment;
412         job.pre_dep[1] = depNoDep;
413
414         submit_job(fd, job);
415 }
416
417 extern uint32_t cbma_bottom;
418 extern uint32_t cbma_top;
419
420 int main()
421 {
422         int fd = open_kernel_module();
423
424         init_cbma(fd);
425
426         //size_t fb_size = 29 * 16 * 45 * 16 * 4 * 2;
427
428         // framebuffer = (uint64_t) (uint32_t) galloc(fb_size);
429
430         /* Fake framebuffer to trap accesses */
431         framebuffer = 0xCAFE0000;
432         printf("Framebuffer: %LX\n", framebuffer);
433
434         job_chain_vertex_tiler(fd);
435         job_chain_fragment(fd);
436         job_chain_replay(fd);
437         sync_gpu(fd, (uint8_t*) cbma_top, cbma_top, cbma_bottom - cbma_top);
438         flush_job_queue(fd);
439
440         sleep(3);
441         printf("Writing\n");
442
443         /* Dump framebuffer to a file */
444         /*uint8_t *fb = (uint8_t*) (uint32_t) framebuffer;
445         FILE *fp = fopen("framebuffer.bin", "wb");
446         fwrite(fb, 1, fb_size, fp);
447         fclose(fp);*/
448
449         /* Hang to prevent the tracer from going bananas */
450
451         while(1);
452
453         return 0;
454 }