Unify FBD
[chai.git] / re.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 #include <sys/mman.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7
8 #include "shim.h"
9 #include "jobs.h"
10 #include "memory.h"
11 #include "../oolong/chai-notes.h"
12
13 #define HEAP_FREE_ADDRESS 0x1DABE0000
14
15 int atom_count = 0;
16
17 uint64_t framebuffer;
18
19 uint64_t last_fragment;
20 uint64_t last_tiler;
21
22 void *set_value_helper(int fd, uint32_t out) {
23         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_set_value));
24
25         struct job_descriptor_header header = {
26                 .exception_status = JOB_NOT_STARTED,
27                 .first_incomplete_task = 0,
28                 .fault_pointer = 0,
29                 .job_descriptor_size = 1, /* 64-bit */
30                 .job_type = JOB_TYPE_SET_VALUE,
31                 .job_barrier = /* 1 */ 0, /* set for first in chain? */
32                 .job_index = atom_count,
33                 .job_dependency_index_1 = 0,
34                 .job_dependency_index_2 = 0,
35                 .next_job = 0 
36         };
37
38         struct payload_set_value payload = {
39                 .out = out,
40                 .unknown = 0x03
41         };
42
43         memcpy(packet, &header, sizeof(header));
44         memcpy(packet + sizeof(header), &payload, sizeof(payload));
45
46         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
47         return packet;
48 }
49
50 uint64_t make_mfbd(bool tiler, uint32_t sv_magic)
51 {
52         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
53         memset(mfbd, 0, sizeof(struct tentative_mfbd));
54
55         /* zeroes */
56         mfbd->block2[0] = sv_magic;
57         mfbd->block2[1] = (uint32_t) galloc(64);
58         mfbd->ugaT = (uint32_t) galloc(64);
59         mfbd->unknown_gpu_address = (uint32_t) galloc(64);
60
61         /* Unknown contents -- it's a mystery! */
62         mfbd->unknown2 = (uint32_t) galloc(64);
63         mfbd->unknown_gpu_addressN = (uint32_t) galloc(64);
64
65         /* Match traces. TODO decode */
66         mfbd->flags = 0xF0;
67         mfbd->heap_free_address = HEAP_FREE_ADDRESS;
68         mfbd->blah = 0x1F00000000;
69         mfbd->unknown3 = tiler ? 0 : 0xFFFFF8C0;
70         mfbd->unknown1 = 0x1600;
71
72         mfbd->block1[4] = 0x02D801C2;
73         mfbd->block1[6] = 0x02D801C2;
74         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
75         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
76         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
77
78         mfbd->block3[0] = 0x00000158;
79         mfbd->block3[1] = 0x00000420;
80         mfbd->block3[14] = 0x04000000;
81         mfbd->block3[15] = 0x880A8899;
82
83         return (uint64_t ) (uint32_t) mfbd | MFBD | (tiler ? FBD_VERTEX_TILER : FBD_FRAGMENT);
84 }
85
86 int job_chain_fragment(int fd, uint32_t sv_magic) {
87         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_fragment));
88
89         struct job_descriptor_header header = {
90                 .exception_status = JOB_NOT_STARTED,
91                 .first_incomplete_task = 0,
92                 .fault_pointer = 0,
93                 .job_descriptor_size = JOB_32_BIT,
94                 .job_type = JOB_TYPE_FRAGMENT,
95                 .job_barrier = 0, 
96                 .job_index = atom_count,
97                 .job_dependency_index_1 = 0,
98                 .job_dependency_index_2 = 0,
99                 .next_job = 0 
100         };
101
102         struct payload_fragment payload = {
103                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
104                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
105                 .fragment_fbd = make_mfbd(false, sv_magic)
106         };
107
108         memcpy(packet, &header, sizeof(header));
109         memcpy(packet + sizeof(header), &payload, sizeof(payload));
110         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
111
112         struct base_dependency depNoDep = {
113                 .atom_id = 0,
114                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
115         };
116
117         struct base_dependency depTiler = {
118                 .atom_id = atom_count /* last one */,
119                 .dependency_type = BASE_JD_DEP_TYPE_DATA
120         };
121
122         printf("Framebuffer: %LX\n", framebuffer);
123         uint64_t* resource = calloc(sizeof(u64), 1);
124         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
125
126         /* TODO: free resource */
127
128         struct base_jd_atom_v2 job = {
129                 .jc = (uint32_t) packet,
130                 .extres_list = resource,
131                 .nr_extres = 1,
132                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_FS,
133                 .atom_number = ++atom_count,
134                 .prio = BASE_JD_PRIO_MEDIUM,
135                 .device_nr = 0
136         };
137
138         job.pre_dep[0] = depTiler;
139         job.pre_dep[1] = depNoDep;
140
141         submit_job(fd, job);
142
143         last_fragment = (uint32_t) packet;
144
145         return 0;
146 }
147
148 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
149 {
150         int pages = 1 + (sz >> PAGE_SHIFT);
151
152         uint64_t gpu = alloc_gpu_pages(fd, pages, BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_EX);
153         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
154
155         memcpy(cpu, shader, sz);
156         sync_gpu(fd, cpu, gpu, sz);
157
158         /* TODO: munmap */
159
160         return gpu | (fragment ? 9 : 5); /* Unknown flag */
161 }
162
163 uint32_t upload_vertices()
164 {
165         /* TODO: Vertices should be parametric */
166         float vertices[] = {
167                 0.0, 0.0, 0.0,
168                 0.5, 1.0, 0.0,
169                 1.0, 0.0, 0.0
170         };
171
172         struct vertex_buffer *vb;
173         vb = (struct vertex_buffer*) galloc(sizeof(*vb));
174         
175         vb->vertices = (uint64_t) (uint32_t) galloc(sizeof(vertices));
176         memcpy((void*) (uint32_t) vb->vertices, vertices, sizeof(vertices));
177
178         vb->vertex_size = sizeof(float) * 3;
179         vb->size = sizeof(vertices);
180
181         vb->vertices |= 1; /* TODO flags */
182         
183         return (uint32_t) vb;
184 }
185
186 void* vertex_tiler_helper(int fd, bool tiler, uint32_t fbd, uint32_t vertex_buffer)
187 {
188         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_vertex_tiler32));
189
190         struct job_descriptor_header header = {
191                 .exception_status = JOB_NOT_STARTED,
192                 .first_incomplete_task = 0,
193                 .fault_pointer = 0,
194                 .job_descriptor_size = JOB_32_BIT,
195                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
196                 .job_barrier = 0, 
197                 .job_index = atom_count,
198                 .job_dependency_index_1 = 0,
199                 .job_dependency_index_2 = 0,
200                 .next_job = 0 
201         };
202
203         /* TODO */
204         uint32_t mode = CHAI_TRIANGLE_FAN;
205         uint32_t mode_gooks = 0x14000000 | (tiler ? (0x030000 | mode) : 0);
206         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
207
208         struct payload_vertex_tiler32 payload = {
209                 .block1 = {
210                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
211                         0x00000000, other_gook, 0x00000000, 0x00000000,
212                         0x00000005, 0x00000000, 0x00000000
213                 },
214                 .zeroes = (uint32_t) galloc(64),
215                 .unknown1 = (uint32_t) galloc(16),
216                 .null1 = 0,
217                 .null2 = 0,
218                 .unknown2 = (uint32_t) galloc(32),
219                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
220                 .vertices = vertex_buffer,
221                 .unknown4 = (uint32_t) galloc(16),
222                 .unknown5 = (uint32_t) galloc(32),
223                 .unknown6 = (uint32_t) galloc(64),
224                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
225                 .null4 = 0,
226                 .fbd = fbd,
227                 .unknown7 = tiler ? 0 : ((uint32_t) galloc(64) | 1) /* TODO */
228         };
229
230         struct shader_meta *shader = (struct shader_meta*) payload.shader;
231
232         /* TODO: Integrate an assembler */
233 #include "../shader_hex.h"
234         shader->shader = import_shader(fd,
235                         (uint8_t*) (tiler ? fragment_shader : vertex_shader),
236                         tiler ? sizeof(fragment_shader) : sizeof(vertex_shader),
237                         tiler);
238
239         if(!tiler) {
240                 uint32_t ni[] = {
241                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000,
242                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000
243                 };
244
245                 memcpy((void*) payload.unknown2, ni, sizeof(ni));
246         }
247
248         if(tiler) {
249                 /* Lose precision... on purpose? */
250                 payload.unknown7 = (uint32_t) shader->shader;
251         }
252
253         payload.unknown7 = tiler ? 0xDEADBA00 : 0xDEADFA00;
254
255         /* TODO: Decode me! */
256
257         if(tiler) {
258                 shader->unknown1 = 0x0007000000000000;
259                 shader->unknown2 = 0x0000000000020602;
260         } else {
261                 shader->unknown1 = 0x0005000100000000;
262                 shader->unknown2 = 0x0000000000420002;
263         }
264
265         /* I have *no* idea */
266
267         uint32_t *p = (uint32_t*) payload.unknown4;
268         *p = 0x2DEA2200;
269
270         uint64_t pi[] = {
271                 0x0000000017E49000, 0x0000000017E49000, 
272                 0x0000000017E49000, 0x0000000017E49000, 
273                 0x00000000179A2200, 0x0000000017E49000, 
274                 0x0000000017E49000
275         };
276
277         memcpy((void*) payload.unknown6, pi, sizeof(pi));
278
279         if(tiler) {
280                 uint32_t ni[] = {
281                         0xFF800000, 0xFF800000,
282                         0x7F800000, 0x7F800000,
283                         0x00000000, 0x3F800000,
284                         0x00000000, 0x00EF013F,
285                         0x00000000, 0x0000001F,
286                         0x02020000, 0x00000001
287                 };
288
289                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
290         }
291
292         /* Use some magic numbers from the traces */
293         uint64_t* unk1 = (uint64_t*) payload.unknown1;
294         unk1[0] = 0x000000B296271001;
295         unk1[1] = 0x000000B296273000;
296
297         uint32_t writeBuffer = (uint32_t) galloc(64);
298
299         uint64_t* unk5 = (uint64_t*) payload.unknown5;
300         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0xA3) << 56) | writeBuffer | 1;
301         unk5[1] = 0x0000004000000010;
302
303         if(tiler) {
304                 uint32_t ni[] = {
305                         0x00000001, 0x00000000, 0x00070000, 0x00020602,
306                         0x00000000, 0x00000000, 0x00000000, 0x3712FFFF,
307                         0x44F0FFFF, 0x0007FF00, 0x0007FF00, 0x00000000,
308                         0x00000000, 0x00000000, 0x00000000, 0x00000200,
309                         0x00000000, 0xF0122122, 0x00000000, 0x00000000,
310                         0x00000000, 0xF0122122, 0x00000000, 0xFF800000,
311                         0xFF800000, 0x7F800000, 0x7F800000, 0x00000000,
312                         0x3F800000, 0x00000000, 0xEF013F00, 0x00000000,
313                         0x0000001F, 0x02020000, 0x00000001, 0x00000000
314                 };
315
316                 memcpy(payload.block2, ni, sizeof(ni));
317         } else {
318                 uint32_t ni[] = {
319                         0x00000000, 0x0000000C, 0x00000030, 0x2DEA2200,
320                         0x00000000, 0x00000000, 0x00000000, /* Address to 1 */ 0xCAFEDA01,
321                         0x57000000, 0x00000010, 0x00000040, 0x17E49000,
322                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
323                         0x00000000, 0x17E49000, 0x00000000, 0x179A2200,
324                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
325                         0x00000000, 0x00000000, 0x00000000, 0x43200000,
326                         0x42F00000, 0x3F000000, 0x00000000, 0x43200000,
327                         0x42F00000, 0x3F000000, 0x00000000, 0x00000000
328                 };
329
330                 memcpy(payload.block2, ni, sizeof(ni));
331         }
332
333         memcpy(packet, &header, sizeof(header));
334         memcpy(packet + sizeof(header), &payload, sizeof(payload));
335         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
336
337         return packet;
338 }
339
340 int job_chain_vertex_tiler(int fd, uint32_t sv_magic) {
341         uint32_t vertex_buffer = upload_vertices();
342         uint32_t vertex_fbd = (uint32_t) make_mfbd(true, sv_magic);
343
344         void *set = set_value_helper(fd, sv_magic);
345         void *vertex = vertex_tiler_helper(fd, false, vertex_fbd, vertex_buffer);
346         void *tiler = vertex_tiler_helper(fd, true, vertex_fbd, vertex_buffer);
347
348         ((struct job_descriptor_header*) set)->next_job = (uint32_t) vertex;
349         ((struct job_descriptor_header*) vertex)->next_job = (uint32_t) tiler;
350
351         struct base_dependency depNoDep = {
352                 .atom_id = 0,
353                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
354         };
355
356         struct base_jd_atom_v2 job = {
357                 .jc = (uint32_t) set,
358                 .extres_list = NULL,
359                 .nr_extres = 0,
360                 .core_req = BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_COHERENT_GROUP,
361                 .atom_number = ++atom_count,
362                 .prio = BASE_JD_PRIO_MEDIUM,
363                 .device_nr = 0
364         };
365
366         job.pre_dep[0] = depNoDep;
367         job.pre_dep[1] = depNoDep;
368
369         submit_job(fd, job);
370
371         last_tiler = (uint32_t) tiler;
372
373         return 0;
374 }
375
376 void job_chain_replay(int fd)
377 {
378         struct base_jd_replay_payload *payload;
379
380         payload = (struct base_jd_replay_payload*) galloc(sizeof(*payload));
381
382         payload->tiler_jc_list = last_tiler;
383         payload->fragment_jc = last_fragment;
384         payload->tiler_heap_free = HEAP_FREE_ADDRESS;
385         payload->fragment_hierarchy_mask = 0;
386         payload->tiler_hierarchy_mask = 0;
387         payload->hierarchy_default_weight = 0x10000;
388         payload->tiler_core_req = BASE_JD_REQ_T | BASE_JD_REQ_COHERENT_GROUP;
389         payload->fragment_core_req = BASE_JD_REQ_FS;
390
391         struct base_dependency depNoDep = {
392                 .atom_id = 0,
393                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
394         };
395
396         struct base_dependency depFragment = {
397                 .atom_id = atom_count,
398                 .dependency_type = BASE_JD_DEP_TYPE_DATA
399         };
400
401         uint64_t* resource = malloc(sizeof(u64) * 1);
402         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
403
404         struct base_jd_atom_v2 job = {
405                 .jc = (uint32_t) payload,
406                 .extres_list = resource,
407                 .nr_extres = 1,
408                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_SOFT_REPLAY,
409                 .atom_number = ++atom_count,
410                 .prio = BASE_JD_PRIO_LOW,
411                 .device_nr = 0
412         };
413
414         job.pre_dep[0] = depFragment;
415         job.pre_dep[1] = depNoDep;
416
417         submit_job(fd, job);
418 }
419
420 extern uint32_t cbma_bottom;
421 extern uint32_t cbma_top;
422
423 int main()
424 {
425         int fd = open_kernel_module();
426
427         init_cbma(fd);
428
429         //size_t fb_size = 29 * 16 * 45 * 16 * 4 * 2;
430
431         // framebuffer = (uint64_t) (uint32_t) galloc(fb_size);
432
433         /* Fake framebuffer to trap accesses */
434         framebuffer = 0x1CAFE0000;
435         printf("Framebuffer: %LX\n", framebuffer);
436
437         uint32_t sv_magic = (uint32_t) galloc(16);
438         job_chain_vertex_tiler(fd, sv_magic);
439         job_chain_fragment(fd, sv_magic);
440         job_chain_replay(fd);
441         sync_gpu(fd, (uint8_t*) cbma_top, cbma_top, cbma_bottom - cbma_top);
442         flush_job_queue(fd);
443
444         sleep(3);
445
446         /* Dump framebuffer to a file */
447         /*uint8_t *fb = (uint8_t*) (uint32_t) framebuffer;
448         FILE *fp = fopen("framebuffer.bin", "wb");
449         fwrite(fb, 1, fb_size, fp);
450         fclose(fp);*/
451
452         /* Hang to prevent the tracer from going bananas */
453
454         while(1);
455
456         return 0;
457 }