Set dependencies
[chai.git] / re.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 #include <sys/mman.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7
8 #include "shim.h"
9 #include "jobs.h"
10 #include "memory.h"
11 #include "../oolong/chai-notes.h"
12
13 int atom_count = 0;
14
15 uint64_t framebuffer;
16
17 uint64_t last_fragment;
18 uint64_t last_tiler;
19
20 void *set_value_helper(int fd) {
21         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_set_value));
22         void* magic = galloc(16);
23
24         struct job_descriptor_header header = {
25                 .exception_status = JOB_NOT_STARTED,
26                 .first_incomplete_task = 0,
27                 .fault_pointer = 0,
28                 .job_descriptor_size = 1, /* 64-bit */
29                 .job_type = JOB_TYPE_SET_VALUE,
30                 .job_barrier = /* 1 */ 0, /* set for first in chain? */
31                 .job_index = atom_count,
32                 .job_dependency_index_1 = 0,
33                 .job_dependency_index_2 = 0,
34                 .next_job = 0 
35         };
36
37         struct payload_set_value payload = {
38                 .out = (uint32_t) magic,
39                 .unknown = 0x03
40         };
41
42         memcpy(packet, &header, sizeof(header));
43         memcpy(packet + sizeof(header), &payload, sizeof(payload));
44
45         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
46         return packet;
47 }
48
49 int job_chain_set_value(int fd) {
50         void *packet = set_value_helper(fd);
51
52         struct base_dependency depNoDep = {
53                 .atom_id = 0,
54                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
55         };
56
57         struct base_jd_atom_v2 job = {
58                 .jc = (uint32_t) packet, /* 0x80 aligned */
59                 .extres_list = NULL,
60                 .nr_extres = 0,
61                 .core_req = BASE_JD_REQ_V,
62                 .atom_number = ++atom_count,
63                 .prio = BASE_JD_PRIO_LOW,
64                 .device_nr = 0
65         };
66
67         job.pre_dep[0] = depNoDep;
68         job.pre_dep[1] = depNoDep;
69
70         submit_job(fd, job);
71
72         return 0;
73 }
74
75 uint64_t make_mfbd(bool tiler)
76 {
77         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
78         memset(mfbd, 0, sizeof(struct tentative_mfbd));
79
80         /* zeroes */
81         mfbd->block2[0] = (uint32_t) galloc(64);
82         mfbd->block2[1] = (uint32_t) galloc(64);
83         mfbd->ugaT = (uint32_t) galloc(64);
84         mfbd->unknown_gpu_address = (uint32_t) galloc(64);
85
86         /* Unknown contents -- it's a mystery! */
87         mfbd->unknown2 = (uint32_t) galloc(64);
88         mfbd->unknown_gpu_addressN = (uint32_t) galloc(64);
89
90         /* Match traces. TODO decode */
91         mfbd->flags = 0xF0;
92         mfbd->heap_free_address = 0x102000000;
93         mfbd->blah = 0x1F00000000;
94         mfbd->unknown3 = tiler ? 0 : 0xFFFFF8C0;
95         mfbd->unknown1 = 0x200;
96
97         mfbd->block1[4] = 0x02D801C2;
98         mfbd->block1[6] = 0x02D801C2;
99         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
100         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
101         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
102
103         mfbd->block3[0] = 0x00000158;
104         mfbd->block3[1] = 0x00000420;
105         mfbd->block3[14] = 0x04000000;
106         mfbd->block3[15] = 0x880A8899;
107
108         return (uint64_t ) (uint32_t) mfbd | MFBD | (tiler ? FBD_VERTEX_TILER : FBD_FRAGMENT);
109 }
110
111 void free_mfbd(struct tentative_mfbd *mfbd) {
112         gfree((void*) (uint32_t) mfbd->block2[0]);
113         gfree((void*) (uint32_t) mfbd->block2[1]);
114         gfree((void*) (uint32_t) mfbd->ugaT);
115         gfree((void*) (uint32_t) mfbd->unknown_gpu_address);
116         gfree((void*) (uint32_t) mfbd->unknown_gpu_addressN);
117         gfree((void*) (uint32_t) mfbd->unknown2);
118         gfree((void*) mfbd);
119 }
120
121 int job_chain_fragment(int fd) {
122         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_fragment));
123
124         struct job_descriptor_header header = {
125                 .exception_status = JOB_NOT_STARTED,
126                 .first_incomplete_task = 0,
127                 .fault_pointer = 0,
128                 .job_descriptor_size = JOB_32_BIT,
129                 .job_type = JOB_TYPE_FRAGMENT,
130                 .job_barrier = 0, 
131                 .job_index = atom_count,
132                 .job_dependency_index_1 = 0,
133                 .job_dependency_index_2 = 0,
134                 .next_job = 0 
135         };
136
137         struct payload_fragment payload = {
138                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
139                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
140                 .fragment_fbd = make_mfbd(false)
141         };
142
143         memcpy(packet, &header, sizeof(header));
144         memcpy(packet + sizeof(header), &payload, sizeof(payload));
145         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
146
147         struct base_dependency depNoDep = {
148                 .atom_id = 0,
149                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
150         };
151
152         struct base_dependency depTiler = {
153                 .atom_id = atom_count /* last one */,
154                 .dependency_type = BASE_JD_DEP_TYPE_DATA
155         };
156
157         printf("Framebuffer: %LX\n", framebuffer);
158         uint64_t* resource = calloc(sizeof(u64), 1);
159         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
160
161         /* TODO: free resource */
162
163         struct base_jd_atom_v2 job = {
164                 .jc = (uint32_t) packet,
165                 .extres_list = resource,
166                 .nr_extres = 1,
167                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_FS,
168                 .atom_number = ++atom_count,
169                 .prio = BASE_JD_PRIO_MEDIUM,
170                 .device_nr = 0
171         };
172
173         job.pre_dep[0] = depTiler;
174         job.pre_dep[1] = depNoDep;
175
176         submit_job(fd, job);
177
178         last_fragment = (uint32_t) packet;
179
180         //free_mfbd(mfbd);
181
182         return 0;
183 }
184
185 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
186 {
187         int pages = 1 + (sz >> PAGE_SHIFT);
188
189         uint64_t gpu = alloc_gpu_pages(fd, pages, BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_EX);
190         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
191
192         memcpy(cpu, shader, sz);
193         sync_gpu(fd, cpu, gpu, sz);
194
195         /* TODO: munmap */
196
197         return gpu | (fragment ? 9 : 5); /* Unknown flag */
198 }
199
200 void* vertex_tiler_helper(int fd, bool tiler)
201 {
202         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_vertex_tiler32));
203
204         struct job_descriptor_header header = {
205                 .exception_status = JOB_NOT_STARTED,
206                 .first_incomplete_task = 0,
207                 .fault_pointer = 0,
208                 .job_descriptor_size = JOB_32_BIT,
209                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
210                 .job_barrier = 0, 
211                 .job_index = atom_count,
212                 .job_dependency_index_1 = 0,
213                 .job_dependency_index_2 = 0,
214                 .next_job = 0 
215         };
216
217         /* TODO */
218         uint32_t mode_gooks = tiler ? 0x1403000C : 0x14000000;
219         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
220
221         struct payload_vertex_tiler32 payload = {
222                 .block1 = {
223                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
224                         0x00000000, other_gook, 0x00000000, 0x00000000,
225                         0x00000005, 0x00000000, 0x00000000
226                 },
227                 .zeroes = (uint32_t) galloc(64),
228                 .unknown1 = (uint32_t) galloc(16),
229                 .null1 = 0,
230                 .null2 = 0,
231                 .unknown2 = (uint32_t) galloc(32),
232                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
233                 .vertices = (uint32_t) galloc(sizeof(struct vertex_buffer)),
234                 .unknown4 = (uint32_t) galloc(16),
235                 .unknown5 = (uint32_t) galloc(32),
236                 .unknown6 = (uint32_t) galloc(64),
237                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
238                 .null4 = 0,
239                 .fbd = (uint32_t) make_mfbd(true),
240                 .unknown7 = tiler ? 0 : (uint32_t) galloc(64)
241         };
242
243         struct shader_meta *shader = (struct shader_meta*) payload.shader;
244
245         /* TODO: Integrate an assembler */
246 #include "../shader_hex.h"
247         shader->shader = import_shader(fd,
248                         (uint8_t*) (tiler ? fragment_shader : vertex_shader),
249                         tiler ? sizeof(fragment_shader) : sizeof(vertex_shader),
250                         tiler);
251
252         if(tiler) {
253                 /* Lose precision... on purpose? */
254                 payload.unknown7 = (uint32_t) shader->shader;
255         }
256
257         /* TODO: Decode me! */
258
259         if(tiler) {
260                 shader->unknown1 = 0x0007000000000000;
261                 shader->unknown2 = 0x0000000000020602;
262         } else {
263                 shader->unknown1 = 0x0005000100000000;
264                 shader->unknown2 = 0x0000000000420002;
265         }
266
267         /* I have *no* idea */
268
269         if(tiler) {
270                 uint32_t *p = (uint32_t*) payload.unknown4;
271                 p[0] = 0x2DEA2200;
272
273                 uint64_t pi[] = {
274                         0x0000000017E49000, 0x0000000017E49000, 
275                         0x0000000017E49000, 0x0000000017E49000, 
276                         0x00000000179A2200, 0x0000000017E49000, 
277                         0x0000000017E49000
278                 };
279
280                 memcpy((void*) payload.unknown6, pi, sizeof(pi));
281
282                 uint32_t ni[] = {
283                         0xFF800000, 0xFF800000,
284                         0x7F800000, 0x7F800000,
285                         0x00000000, 0x3F800000,
286                         0x00000000, 0x00EF013F,
287                         0x00000000, 0x0000001F,
288                         0x02020000, 0x00000001
289                 };
290
291                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
292         }
293
294         /* TODO: Vertices should be parametric */
295         float vertices[] = {
296                 0.0, 0.0, 0.0,
297                 0.5, 1.0, 0.0,
298                 1.0, 0.0, 0.0
299         };
300
301         struct vertex_buffer *vb = (struct vertex_buffer*) payload.vertices;
302         vb->vertices = (uint64_t) (uint32_t) galloc(sizeof(vertices));
303         memcpy((void*) (uint32_t) vb->vertices, vertices, sizeof(vertices));
304         vb->vertex_size = sizeof(float) * 3;
305         vb->size = sizeof(vertices);
306
307         vb->vertices |= 1; /* TODO flags */
308         
309         /* Use some magic numbers from the traces */
310         uint64_t* unk1 = (uint64_t*) payload.unknown1;
311         unk1[0] = 0x000000B296271001;
312         unk1[1] = 0x000000B296273000;
313
314         uint32_t writeBuffer = (uint32_t) galloc(64);
315
316         uint64_t* unk5 = (uint64_t*) payload.unknown5;
317         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0xA3) << 56) | writeBuffer | 1;
318         unk5[1] = 0x0000004000000010;
319
320         if(tiler) {
321                 uint32_t ni[] = {
322                         0x00000001, 0x00000000,
323                         0x00070000, 0x00020602,
324                         0x00000000, 0x00000000,
325                         0x00000000, 0x3712FFFF,
326                         0x44F0FFFF, 0x0007FF00,
327                         0x0007FF00, 0x00000000,
328                         0x00000000, 0x00000000,
329                         0x00000000, 0x00000200,
330                         0x00000000, 0xF0122122,
331                         0x00000000, 0x00000000,
332                         0x00000000, 0xF0122122,
333                         0x00000000, 0xFF800000,
334                         0xFF800000, 0x7F800000,
335                         0x7F800000, 0x00000000,
336                         0x3F800000, 0x00000000,
337                         0xEF013F00, 0x00000000,
338                         0x0000001F, 0x02020000,
339                         0x00000001, 0x00000000
340                 };
341
342                 memcpy(payload.block2, ni, sizeof(ni));
343         }
344
345         memcpy(packet, &header, sizeof(header));
346         memcpy(packet + sizeof(header), &payload, sizeof(payload));
347         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
348
349         return packet;
350 }
351
352 int job_chain_vertex_tiler(int fd) {
353         void *set = set_value_helper(fd);
354         void *vertex = vertex_tiler_helper(fd, false);
355         void *tiler = vertex_tiler_helper(fd, true);
356
357         ((struct job_descriptor_header*) set)->next_job = (uint32_t) vertex;
358         ((struct job_descriptor_header*) vertex)->next_job = (uint32_t) tiler;
359
360         struct base_dependency depNoDep = {
361                 .atom_id = 0,
362                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
363         };
364
365         struct base_jd_atom_v2 job = {
366                 .jc = (uint32_t) set,
367                 .extres_list = NULL,
368                 .nr_extres = 0,
369                 .core_req = BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_COHERENT_GROUP,
370                 .atom_number = ++atom_count,
371                 .prio = BASE_JD_PRIO_MEDIUM,
372                 .device_nr = 0
373         };
374
375         job.pre_dep[0] = depNoDep;
376         job.pre_dep[1] = depNoDep;
377
378         submit_job(fd, job);
379
380         last_tiler = (uint32_t) tiler;
381
382         return 0;
383 }
384
385 void job_chain_replay(int fd)
386 {
387         struct base_jd_replay_payload *payload;
388
389         payload = (struct base_jd_replay_payload*) galloc(sizeof(*payload));
390
391         payload->tiler_jc_list = last_tiler;
392         payload->fragment_jc = last_fragment;
393         payload->tiler_heap_free = 0x102000000;
394         payload->fragment_hierarchy_mask = 0;
395         payload->tiler_hierarchy_mask = 0;
396         payload->hierarchy_default_weight = 0x10000;
397         payload->tiler_core_req = BASE_JD_REQ_T | BASE_JD_REQ_COHERENT_GROUP;
398         payload->fragment_core_req = BASE_JD_REQ_FS;
399
400         struct base_dependency depNoDep = {
401                 .atom_id = 0,
402                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
403         };
404
405         struct base_dependency depFragment = {
406                 .atom_id = atom_count,
407                 .dependency_type = BASE_JD_DEP_TYPE_DATA
408         };
409
410         printf("Framebuffer: %LX\n", framebuffer);
411         uint64_t* resource = calloc(sizeof(u64), 1);
412         resource[0] = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
413
414         struct base_jd_atom_v2 job = {
415                 .jc = (uint32_t) payload,
416                 .extres_list = resource,
417                 .nr_extres = 1,
418                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_SOFT_REPLAY,
419                 .atom_number = ++atom_count,
420                 .prio = BASE_JD_PRIO_LOW,
421                 .device_nr = 0
422         };
423
424         job.pre_dep[0] = depFragment;
425         job.pre_dep[1] = depNoDep;
426
427         submit_job(fd, job);
428 }
429
430 int main()
431 {
432         int fd = open_kernel_module();
433
434         init_cbma(fd);
435
436         size_t fb_size = 29 * 16 * 45 * 16 * 4 * 2;
437
438         framebuffer = (uint64_t) (uint32_t) galloc(fb_size);
439         printf("Framebuffer: %LX\n", framebuffer);
440         job_chain_vertex_tiler(fd);
441         job_chain_fragment(fd);
442         job_chain_replay(fd);
443         flush_job_queue(fd);
444
445         sleep(3);
446         printf("Writing\n");
447
448         /* Dump framebuffer to a file */
449         uint8_t *fb = (uint8_t*) (uint32_t) framebuffer;
450         FILE *fp = fopen("framebuffer.bin", "wb");
451         fwrite(fb, 1, fb_size, fp);
452         fclose(fp);
453
454         /* Hang to prevent the tracer from going bananas */
455
456         while(1);
457
458         return 0;
459 }