Adjust parameters to match
[chai.git] / re.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <memory.h>
4 #include <sys/mman.h>
5 #include <stdbool.h>
6 #include <unistd.h>
7
8 #include "shim.h"
9 #include "jobs.h"
10 #include "memory.h"
11 #include "../oolong/chai-notes.h"
12
13 int atom_count = 0;
14
15 uint64_t framebuffer;
16
17 void *set_value_helper(int fd) {
18         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_set_value));
19         void* magic = galloc(16);
20
21         struct job_descriptor_header header = {
22                 .exception_status = JOB_NOT_STARTED,
23                 .first_incomplete_task = 0,
24                 .fault_pointer = 0,
25                 .job_descriptor_size = 1, /* 64-bit */
26                 .job_type = JOB_TYPE_SET_VALUE,
27                 .job_barrier = /* 1 */ 0, /* set for first in chain? */
28                 .job_index = atom_count,
29                 .job_dependency_index_1 = 0,
30                 .job_dependency_index_2 = 0,
31                 .next_job = 0 
32         };
33
34         struct payload_set_value payload = {
35                 .out = (uint32_t) magic,
36                 .unknown = 0x03
37         };
38
39         memcpy(packet, &header, sizeof(header));
40         memcpy(packet + sizeof(header), &payload, sizeof(payload));
41
42         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
43         return packet;
44 }
45
46 int job_chain_set_value(int fd) {
47         void *packet = set_value_helper(fd);
48
49         struct base_dependency depNoDep = {
50                 .atom_id = 0,
51                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
52         };
53
54         struct base_jd_atom_v2 job = {
55                 .jc = (uint32_t) packet, /* 0x80 aligned */
56                 .extres_list = NULL,
57                 .nr_extres = 0,
58                 .core_req = BASE_JD_REQ_V,
59                 .atom_number = ++atom_count,
60                 .prio = BASE_JD_PRIO_LOW,
61                 .device_nr = 0
62         };
63
64         job.pre_dep[0] = depNoDep;
65         job.pre_dep[1] = depNoDep;
66
67         submit_job(fd, job);
68
69         return 0;
70 }
71
72 uint64_t make_mfbd(bool tiler)
73 {
74         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
75         memset(mfbd, 0, sizeof(struct tentative_mfbd));
76
77         /* zeroes */
78         mfbd->block2[0] = (uint32_t) galloc(64);
79         mfbd->block2[1] = (uint32_t) galloc(64);
80         mfbd->ugaT = (uint32_t) galloc(64);
81         mfbd->unknown_gpu_address = (uint32_t) galloc(64);
82
83         /* Unknown contents -- it's a mystery! */
84         mfbd->unknown2 = (uint32_t) galloc(64);
85         mfbd->unknown_gpu_addressN = (uint32_t) galloc(64);
86
87         /* Match traces. TODO decode */
88         mfbd->flags = 0xF0;
89         mfbd->heap_free_address = 0x102000000;
90         mfbd->blah = 0x1F00000000;
91         mfbd->unknown3 = tiler ? 0 : 0xFFFFF8C0;
92         mfbd->unknown1 = 0x200;
93
94         mfbd->block1[4] = 0x02D801C2;
95         mfbd->block1[6] = 0x02D801C2;
96         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
97         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
98         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
99
100         mfbd->block3[0] = 0x00000158;
101         mfbd->block3[1] = 0x00000420;
102         mfbd->block3[14] = 0x04000000;
103         mfbd->block3[15] = 0x880A8899;
104
105         return (uint64_t ) (uint32_t) mfbd | MFBD | (tiler ? FBD_VERTEX_TILER : FBD_FRAGMENT);
106 }
107
108 void free_mfbd(struct tentative_mfbd *mfbd) {
109         gfree((void*) (uint32_t) mfbd->block2[0]);
110         gfree((void*) (uint32_t) mfbd->block2[1]);
111         gfree((void*) (uint32_t) mfbd->ugaT);
112         gfree((void*) (uint32_t) mfbd->unknown_gpu_address);
113         gfree((void*) (uint32_t) mfbd->unknown_gpu_addressN);
114         gfree((void*) (uint32_t) mfbd->unknown2);
115         gfree((void*) mfbd);
116 }
117
118 int job_chain_fragment(int fd) {
119         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_fragment));
120
121         struct job_descriptor_header header = {
122                 .exception_status = JOB_NOT_STARTED,
123                 .first_incomplete_task = 0,
124                 .fault_pointer = 0,
125                 .job_descriptor_size = JOB_32_BIT,
126                 .job_type = JOB_TYPE_FRAGMENT,
127                 .job_barrier = 0, 
128                 .job_index = atom_count,
129                 .job_dependency_index_1 = 0,
130                 .job_dependency_index_2 = 0,
131                 .next_job = 0 
132         };
133
134         struct payload_fragment payload = {
135                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
136                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
137                 .fragment_fbd = make_mfbd(false)
138         };
139
140         memcpy(packet, &header, sizeof(header));
141         memcpy(packet + sizeof(header), &payload, sizeof(payload));
142         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
143
144         struct base_dependency depNoDep = {
145                 .atom_id = 0,
146                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
147         };
148
149         struct base_dependency depTiler = {
150                 .atom_id = atom_count /* last one */,
151                 .dependency_type = BASE_JD_DEP_TYPE_DATA
152         };
153
154         uint64_t resource = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
155
156         struct base_jd_atom_v2 job = {
157                 .jc = (uint32_t) packet,
158                 .extres_list = &resource,
159                 .nr_extres = 1,
160                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_FS,
161                 .atom_number = ++atom_count,
162                 .prio = BASE_JD_PRIO_MEDIUM,
163                 .device_nr = 0
164         };
165
166         job.pre_dep[0] = depTiler;
167         job.pre_dep[1] = depNoDep;
168
169         submit_job(fd, job);
170
171         gfree(packet);
172
173         //free_mfbd(mfbd);
174
175         return 0;
176 }
177
178 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
179 {
180         int pages = 1 + (sz >> PAGE_SHIFT);
181
182         uint64_t gpu = alloc_gpu_pages(fd, pages, BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_EX);
183         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
184
185         memcpy(cpu, shader, sz);
186         sync_gpu(fd, cpu, gpu, sz);
187
188         /* TODO: munmap */
189
190         return gpu | (fragment ? 9 : 5); /* Unknown flag */
191 }
192
193 void* vertex_tiler_helper(int fd, bool tiler)
194 {
195         void* packet = galloc(sizeof(struct job_descriptor_header) + sizeof(struct payload_vertex_tiler32));
196
197         struct job_descriptor_header header = {
198                 .exception_status = JOB_NOT_STARTED,
199                 .first_incomplete_task = 0,
200                 .fault_pointer = 0,
201                 .job_descriptor_size = JOB_32_BIT,
202                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX,
203                 .job_barrier = 0, 
204                 .job_index = atom_count,
205                 .job_dependency_index_1 = 0,
206                 .job_dependency_index_2 = 0,
207                 .next_job = 0 
208         };
209
210         /* TODO */
211         uint32_t mode_gooks = tiler ? 0x1403000C : 0x14000000;
212         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
213
214         struct payload_vertex_tiler32 payload = {
215                 .block1 = {
216                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
217                         0x00000000, other_gook, 0x00000000, 0x00000000,
218                         0x00000005, 0x00000000, 0x00000000
219                 },
220                 .zeroes = (uint32_t) galloc(64),
221                 .unknown1 = (uint32_t) galloc(16),
222                 .null1 = 0,
223                 .null2 = 0,
224                 .unknown2 = (uint32_t) galloc(32),
225                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
226                 .vertices = (uint32_t) galloc(sizeof(struct vertex_buffer)),
227                 .unknown4 = (uint32_t) galloc(16),
228                 .unknown5 = (uint32_t) galloc(32),
229                 .unknown6 = (uint32_t) galloc(64),
230                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
231                 .null4 = 0,
232                 .fbd = (uint32_t) make_mfbd(true),
233                 .unknown7 = tiler ? 0 : (uint32_t) galloc(64)
234         };
235
236         struct shader_meta *shader = (struct shader_meta*) payload.shader;
237
238         /* TODO: Integrate an assembler */
239 #include "../shader_hex.h"
240         shader->shader = import_shader(fd,
241                         (uint8_t*) (tiler ? fragment_shader : vertex_shader),
242                         tiler ? sizeof(fragment_shader) : sizeof(vertex_shader),
243                         tiler);
244
245         if(tiler) {
246                 /* Lose precision... on purpose? */
247                 payload.unknown7 = (uint32_t) shader->shader;
248         }
249
250         /* TODO: Decode me! */
251
252         if(tiler) {
253                 shader->unknown1 = 0x0007000000000000;
254                 shader->unknown2 = 0x0000000000020602;
255         } else {
256                 shader->unknown1 = 0x0005000100000000;
257                 shader->unknown2 = 0x0000000000420002;
258         }
259
260         /* I have *no* idea */
261
262         if(tiler) {
263                 uint32_t *p = (uint32_t*) payload.unknown4;
264                 p[0] = 0x2DEA2200;
265
266                 uint64_t pi[] = {
267                         0x0000000017E49000, 0x0000000017E49000, 
268                         0x0000000017E49000, 0x0000000017E49000, 
269                         0x00000000179A2200, 0x0000000017E49000, 
270                         0x0000000017E49000
271                 };
272
273                 memcpy((void*) payload.unknown6, pi, sizeof(pi));
274
275                 uint32_t ni[] = {
276                         0xFF800000, 0xFF800000,
277                         0x7F800000, 0x7F800000,
278                         0x00000000, 0x3F800000,
279                         0x00000000, 0x00EF013F,
280                         0x00000000, 0x0000001F,
281                         0x02020000, 0x00000001
282                 };
283
284                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
285         }
286
287         /* TODO: Vertices should be parametric */
288         float vertices[] = {
289                 0.0, 0.0, 0.0,
290                 0.5, 1.0, 0.0,
291                 1.0, 0.0, 0.0
292         };
293
294         struct vertex_buffer *vb = (struct vertex_buffer*) payload.vertices;
295         vb->vertices = (uint64_t) (uint32_t) galloc(sizeof(vertices));
296         memcpy((void*) (uint32_t) vb->vertices, vertices, sizeof(vertices));
297         vb->vertex_size = sizeof(float) * 3;
298         vb->size = sizeof(vertices);
299
300         vb->vertices |= 1; /* TODO flags */
301         
302         /* Use some magic numbers from the traces */
303         uint64_t* unk1 = (uint64_t*) payload.unknown1;
304         unk1[0] = 0x000000B296271001;
305         unk1[1] = 0x000000B296273000;
306
307         uint32_t writeBuffer = (uint32_t) galloc(64);
308
309         uint64_t* unk5 = (uint64_t*) payload.unknown5;
310         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0xA3) << 56) | writeBuffer | 1;
311         unk5[1] = 0x0000004000000010;
312
313         if(tiler) {
314                 uint32_t ni[] = {
315                         0x00000001, 0x00000000,
316                         0x00070000, 0x00020602,
317                         0x00000000, 0x00000000,
318                         0x00000000, 0x3712FFFF,
319                         0x44F0FFFF, 0x0007FF00,
320                         0x0007FF00, 0x00000000,
321                         0x00000000, 0x00000000,
322                         0x00000000, 0x00000200,
323                         0x00000000, 0xF0122122,
324                         0x00000000, 0x00000000,
325                         0x00000000, 0xF0122122,
326                         0x00000000, 0xFF800000,
327                         0xFF800000, 0x7F800000,
328                         0x7F800000, 0x00000000,
329                         0x3F800000, 0x00000000,
330                         0xEF013F00, 0x00000000,
331                         0x0000001F, 0x02020000,
332                         0x00000001, 0x00000000
333                 };
334
335                 memcpy(payload.block2, ni, sizeof(ni));
336         }
337
338         memcpy(packet, &header, sizeof(header));
339         memcpy(packet + sizeof(header), &payload, sizeof(payload));
340         sync_gpu(fd, packet, (uint32_t) packet, sizeof(header) + sizeof(payload));
341
342         return packet;
343 }
344
345 int job_chain_vertex_tiler(int fd) {
346         void *set = set_value_helper(fd);
347         void *vertex = vertex_tiler_helper(fd, false);
348         void *tiler = vertex_tiler_helper(fd, true);
349
350         ((struct job_descriptor_header*) set)->next_job = (uint32_t) vertex;
351         ((struct job_descriptor_header*) vertex)->next_job = (uint32_t) tiler;
352
353         struct base_dependency depNoDep = {
354                 .atom_id = 0,
355                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
356         };
357
358         struct base_jd_atom_v2 job = {
359                 .jc = (uint32_t) set,
360                 .extres_list = NULL,
361                 .nr_extres = 0,
362                 .core_req = BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | BASE_JD_REQ_COHERENT_GROUP,
363                 .atom_number = ++atom_count,
364                 .prio = BASE_JD_PRIO_MEDIUM
365                 .device_nr = 0
366         };
367
368         job.pre_dep[0] = depNoDep;
369         job.pre_dep[1] = depNoDep;
370
371         submit_job(fd, job);
372
373         return 0;
374 }
375
376 void job_chain_replay(int fd)
377 {
378         struct base_dependency depNoDep = {
379                 .atom_id = 0,
380                 .dependency_type = BASE_JD_DEP_TYPE_INVALID
381         };
382
383         uint64_t resource = framebuffer | BASE_EXT_RES_ACCESS_EXCLUSIVE;
384
385         struct base_jd_atom_v2 job = {
386                 .jc = /* TBD */0,
387                 .extres_list = &resource,
388                 .nr_extres = 1,
389                 .core_req = BASE_JD_REQ_EXTERNAL_RESOURCES | BASE_JD_REQ_SOFT_REPLAY,
390                 .atom_number = ++atom_count,
391                 .prio = BASE_JD_PRIO_LOW,
392                 .device_nr = 0
393         };
394
395         job.pre_dep[0] = depNoDep;
396         job.pre_dep[1] = depNoDep;
397
398         submit_job(fd, job);
399 }
400
401 int main()
402 {
403         int fd = open_kernel_module();
404
405         init_cbma(fd);
406
407         size_t fb_size = 29 * 16 * 45 * 16 * 4 * 2;
408
409         framebuffer = (uint64_t) (uint32_t) galloc(fb_size);
410         job_chain_vertex_tiler(fd);
411         job_chain_fragment(fd);
412         flush_job_queue(fd);
413         //job_chain_replay(fd);
414
415         /* Dump framebuffer to a file */
416         uint8_t *fb = (uint8_t*) (uint32_t) framebuffer;
417         FILE *fp = fopen("framebuffer.bin", "wb");
418         fwrite(fb, 1, fb_size, fp);
419         fclose(fp);
420
421         /* Hang to prevent the tracer from going bananas */
422
423         while(1);
424
425         return 0;
426 }