96e10405e78021318379bacd553e887db5218638
[chai.git] / src / synthesise.c
1 /*
2  *
3  * Copyright (C) 2017 Cafe Beverage. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16 #include "synthesise.h"
17 #include <mali-ioctl.h>
18
19 #include <stdlib.h>
20 #include <memory.h>
21
22 #define SV_OFFSET (0x4000)
23
24 #define XYZ_COMPONENT_COUNT 3
25
26 #define INDEX_FRAGMENT 1
27
28 int atom_count = 0;
29
30 struct mali_jd_dependency no_dependency = {
31         .atom_id = 0,
32         .dependency_type = MALI_JD_DEP_TYPE_INVALID
33 };
34
35 struct job_descriptor_header* set_value_helper(int fd, uint64_t out)
36 {
37         void* packet = galloc(sizeof(struct job_descriptor_header) +
38                         sizeof(struct payload_set_value));
39
40         struct job_descriptor_header header = {
41                 .exception_status = JOB_NOT_STARTED,
42                 .job_descriptor_size = JOB_64_BIT,
43                 .job_type = JOB_TYPE_SET_VALUE
44         };
45
46         struct payload_set_value payload = {
47                 .out = out,
48                 .unknown = 0x03
49         };
50
51         memcpy(packet, &header, sizeof(header));
52         memcpy(packet + sizeof(header), &payload, sizeof(payload));
53
54         return packet;
55 }
56
57 uint64_t make_mfbd(bool tiler, uint64_t heap_free_address, uint64_t scratchpad)
58 {
59         struct tentative_mfbd *mfbd = galloc(sizeof(struct tentative_mfbd));
60
61         mfbd->block2[0] = scratchpad + SV_OFFSET;
62         mfbd->block2[1] = scratchpad + SV_OFFSET + 0x200;
63         mfbd->ugaT = scratchpad;
64         mfbd->unknown2 = heap_free_address | 0x8000000;
65         mfbd->flags = 0xF0;
66         mfbd->heap_free_address = heap_free_address;
67         mfbd->blah = 0x1F00000000;
68         mfbd->unknown1 = 0x1600;
69
70         if(!tiler)
71                 mfbd->unknown3 = 0xFFFFF8C0;
72
73         mfbd->block1[4] = 0x02D801C2;
74         mfbd->block1[6] = 0x02D801C2;
75
76         /* This might not a tiler issue so much as a which-frame issue.
77          * First tiler is 0xFF form. Rest of C021. All fragment C021.
78          * TODO: Investigate!
79          */
80
81         mfbd->block1[7] = tiler ? 0x04001080 : 0x01001080;
82         mfbd->block1[8] = tiler ? 0x000000FF : 0xC0210000;
83         mfbd->block1[9] = tiler ? 0x3F800000 : 0x00000000;
84
85         uint64_t sab0 = 0x5ABA5ABA;
86
87         uint64_t block3[] = {
88                 0x0000000000000000,
89                 0x0000000000030005,
90                 sab0,
91                 mfbd->block2[0],
92                 0x0000000000000003,
93                 0x0000000000000000,
94                 0x0000000000000000,
95                 0x0000000000000000,
96                 sab0 + 0x300,
97         };
98
99         memcpy(mfbd->block3, block3, sizeof(block3));
100
101         return (uint32_t) mfbd | MFBD | (tiler ? FBD_TILER : FBD_FRAGMENT);
102 }
103
104 uint32_t job_chain_fragment(int fd, uint64_t framebuffer,
105                 uint64_t heap_free_address, uint64_t scratchpad)
106 {
107         void* packet = galloc(sizeof(struct job_descriptor_header)
108                         + sizeof(struct payload_fragment));
109
110         struct job_descriptor_header header = {
111                 .exception_status = JOB_NOT_STARTED,
112                 .job_descriptor_size = JOB_32_BIT,
113                 .job_type = JOB_TYPE_FRAGMENT,
114                 .job_index = INDEX_FRAGMENT,
115         };
116
117         struct payload_fragment payload = {
118                 .min_tile_coord = MAKE_TILE_COORD(0, 0, 0),
119                 .max_tile_coord = MAKE_TILE_COORD(29, 45, 0),
120                 .fragment_fbd = make_mfbd(false, heap_free_address, scratchpad)
121         };
122
123         memcpy(packet, &header, sizeof(header));
124         memcpy(packet + sizeof(header), &payload, sizeof(payload));
125
126         struct mali_jd_dependency depTiler = {
127                 .atom_id = atom_count /* last one */,
128                 .dependency_type = MALI_JD_DEP_TYPE_DATA
129         };
130
131         uint64_t* resource = calloc(sizeof(u64), 1);
132         resource[0] = framebuffer | MALI_EXT_RES_ACCESS_EXCLUSIVE;
133
134         /* TODO: free resource */
135
136         struct mali_jd_atom_v2 job = {
137                 .jc = (uint32_t) packet,
138                 .ext_res_list = (struct mali_external_resource*) resource /* TODO */,
139                 .nr_ext_res = 1,
140                 .core_req = MALI_JD_REQ_EXTERNAL_RESOURCES | MALI_JD_REQ_FS,
141                 .atom_number = ++atom_count,
142                 .prio = MALI_JD_PRIO_MEDIUM,
143                 .device_nr = 0,
144                 .pre_dep = { depTiler, no_dependency }
145         };
146
147         submit_job(fd, job);
148
149         return (uint32_t) packet;
150 }
151
152 uint64_t import_shader(int fd, uint8_t *shader, size_t sz, bool fragment)
153 {
154         int pages = 1 + (sz >> PAGE_SHIFT);
155
156         uint64_t gpu = alloc_gpu_pages(fd, pages, MALI_MEM_PROT_CPU_RD |
157                         MALI_MEM_PROT_CPU_WR | MALI_MEM_PROT_GPU_RD |
158                         MALI_MEM_PROT_GPU_EX);
159
160         uint8_t *cpu = mmap_gpu(fd, gpu, pages);
161
162         memcpy(cpu, shader, sz);
163
164         /* TODO: munmap */
165
166         return gpu | SHADER | (fragment ? SHADER_FRAGMENT : SHADER_VERTEX);
167 }
168
169 uint32_t upload_vertices(float *vertices, size_t sz)
170 {
171         struct attribute_buffer *vb;
172         vb = (struct attribute_buffer*) galloc(sizeof(*vb));
173         
174         float *verts = (float*) galloc(sz);
175         memcpy(verts, vertices, sz);
176         vb->elements = (uint64_t) (uintptr_t) verts;
177
178         vb->element_size = sizeof(float) * XYZ_COMPONENT_COUNT; 
179         vb->total_size = sz;
180
181         vb->elements |= 1; /* TODO flags */
182         
183         return (uint32_t) vb;
184 }
185
186 struct job_descriptor_header* vertex_tiler_helper(int fd, bool tiler,
187                 uint32_t fbd, uint32_t vertex_buffer,
188                 uint32_t zero_buffer, uint32_t mode,
189                 void *shader, size_t shader_size)
190 {
191         void* packet = galloc(sizeof(struct job_descriptor_header)
192                         + sizeof(struct payload_vertex_tiler32));
193
194         struct job_descriptor_header header = {
195                 .exception_status = JOB_NOT_STARTED,
196                 .job_descriptor_size = JOB_32_BIT,
197                 .job_type = tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX
198         };
199
200         /* TODO */
201         uint32_t mode_gooks = 0x14000000 | (tiler ? (0x030000 | mode) : 0);
202         uint32_t other_gook = tiler ? 0x00000003 : 0x00000000;
203
204         struct payload_vertex_tiler32 payload = {
205                 .block1 = {
206                         0x00000003, 0x28000000, mode_gooks, 0x00000000,
207                         0x00000000, other_gook, 0x00000000, 0x00000000,
208                         0x00000005, 0x00000000, 0x00000000
209                 },
210                 .zeroes = zero_buffer,
211                 .unknown1 = (uint32_t) galloc(16),
212                 .null1 = 0,
213                 .null2 = 0,
214                 .unknown2 = (uint32_t) galloc(32),
215                 .shader = (uint32_t) galloc(sizeof(struct shader_meta)),
216                 .attributes = vertex_buffer,
217                 .attribute_meta = (uint32_t) galloc(16), /* TODO */
218                 .unknown5 = (uint32_t) galloc(32),
219                 .unknown6 = (uint32_t) galloc(64),
220                 .nullForVertex = tiler ? (uint32_t) galloc(64) : 0,
221                 .null4 = 0,
222                 .fbd = fbd,
223                 .unknown7 = tiler ? 0 : ((uint32_t) galloc(64) | 1) /* TODO */
224         };
225
226         struct shader_meta *s = (struct shader_meta*) payload.shader;
227         s->shader = import_shader(fd, shader, shader_size, tiler);
228
229         if(!tiler) {
230                 uint32_t ni[] = {
231                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000,
232                         0x43200000, 0x42F00000, 0x3F000000, 0x00000000
233                 };
234
235                 memcpy((void*) payload.unknown2, ni, sizeof(ni));
236         }
237
238         if(tiler) {
239                 /* Lose precision... on purpose? */
240                 payload.unknown7 = (uint32_t) s->shader;
241         }
242
243         payload.unknown7 = tiler ? 0xDEADBA00 : 0xDEADFA00;
244
245         /* TODO: Decode me! */
246
247         if(tiler) {
248                 s->unknown1 = 0x0007000000000000;
249                 s->unknown2 = 0x0000000000020602;
250         } else {
251                 s->unknown1 = 0x0005000100000000;
252                 s->unknown2 = 0x0000000000420002;
253         }
254
255         /* TODO: Generate on the fly (see trace.c) */
256         uint32_t *p = (uint32_t*) payload.attribute_meta;
257         *p = 0x2DEA2200;
258
259         /* I have *no* idea */
260
261         uint64_t pi[] = {
262                 0x0000000017E49000, 0x0000000017E49000, 
263                 0x0000000017E49000, 0x0000000017E49000, 
264                 0x00000000179A2200, 0x0000000017E49000, 
265                 0x0000000017E49000
266         };
267
268         memcpy((void*) payload.unknown6, pi, sizeof(pi));
269
270         if(tiler) {
271                 uint32_t ni[] = {
272                         0xFF800000, 0xFF800000,
273                         0x7F800000, 0x7F800000,
274                         0x00000000, 0x3F800000,
275                         0x00000000, 0x00EF013F,
276                         0x00000000, 0x0000001F,
277                         0x02020000, 0x00000001
278                 };
279
280                 memcpy((void*) payload.nullForVertex, ni, sizeof(ni));
281         }
282
283         /* Use some magic numbers from the traces */
284         uint64_t* unk1 = (uint64_t*) payload.unknown1;
285         /* unk1[0] = 0x000000B296271001;
286         unk1[1] = 0x000000B296273000; */
287
288         unk1[0] = 0x5a5a5a5a5a5a1001;
289         unk1[1] = 0x5a5a5a5a5a5a3000;
290
291         uint32_t writeBuffer = (uint32_t) galloc(64);
292
293         uint64_t* unk5 = (uint64_t*) payload.unknown5;
294         unk5[0] = ((uint64_t) (tiler ? 0xDB : 0x7A) << 56) | writeBuffer | 1;
295         unk5[1] = 0x0000004000000010;
296
297         if(tiler) {
298                 uint32_t ni[] = {
299                         0x00000001, 0x00000000, 0x00070000, 0x00020602,
300                         0x00000000, 0x00000000, 0x00000000, 0x3712FFFF,
301                         0x44F0FFFF, 0x0007FF00, 0x0007FF00, 0x00000000,
302                         0x00000000, 0x00000000, 0x00000000, 0x00000200,
303                         0x00000000, 0xF0122122, 0x00000000, 0x00000000,
304                         0x00000000, 0xF0122122, 0x00000000, 0xFF800000,
305                         0xFF800000, 0x7F800000, 0x7F800000, 0x00000000,
306                         0x3F800000, 0x00000000, 0xEF013F00, 0x00000000,
307                         0x0000001F, 0x02020000, 0x00000001, 0x00000000
308                 };
309
310                 memcpy(payload.block2, ni, sizeof(ni));
311         } else {
312                 uint32_t ni[] = {
313                         0x00000000, 0x0000000C, 0x00000030, 0x2DEA2200,
314                         0x00000000, 0x00000000, 0x00000000, /* Address to 1 */ 0xCAFEDA01,
315                         0x57000000, 0x00000010, 0x00000040, 0x17E49000,
316                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
317                         0x00000000, 0x17E49000, 0x00000000, 0x179A2200,
318                         0x00000000, 0x17E49000, 0x00000000, 0x17E49000,
319                         0x00000000, 0x00000000, 0x00000000, 0x43200000,
320                         0x42F00000, 0x3F000000, 0x00000000, 0x43200000,
321                         0x42F00000, 0x3F000000, 0x00000000, 0x00000000
322                 };
323
324                 memcpy(payload.block2, ni, sizeof(ni));
325         }
326
327         /* Trap tiler job execution */
328
329         if(tiler) {
330                 payload.shader = 0x5AB00A05;
331
332                 /* Hit second */
333                 //payload.zeroes = 0x5AB01A00;
334
335                 payload.unknown1 = 0x5AB02A00;
336                 payload.unknown2 = 0x5AB03A00;
337                 payload.attributes = 0x5AB04A00;
338                 payload.attribute_meta = 0x5AB05A00;
339                 payload.unknown5 = 0x5AB06A00;
340                 payload.unknown6 = 0x5AB07A00;
341                 payload.unknown7 = 0x5AB0DA00;
342
343                 /* Hit third */
344                 //payload.fbd    = 0x5AB09A00;
345
346                 /* Hit first */
347                 // payload.nullForVertex = 0x5AB08A00;
348         }
349
350         memcpy(packet, &header, sizeof(header));
351         memcpy(packet + sizeof(header), &payload, sizeof(payload));
352
353         return packet;
354 }
355
356 uint32_t job_chain_vertex_tiler(int fd,
357                 float *vertices, size_t vertex_size, int mode,
358                 void* vertex_shader, size_t vs_sz,
359                 void *fragment_shader, size_t fs_sz,
360                 uint64_t heap_free_address, uint64_t scratchpad)
361 {
362         uint32_t vertex_buffer = upload_vertices(vertices, vertex_size);
363         uint32_t vertex_fbd = (uint32_t) make_mfbd(true, heap_free_address, scratchpad);
364
365         uint32_t zero_buffer = (uint32_t) alloc_gpu_pages(fd, 0x20,
366                         0x3800 | MALI_MEM_PROT_CPU_RD |
367                         MALI_MEM_PROT_CPU_WR | MALI_MEM_PROT_GPU_RD);
368
369         struct job_descriptor_header *set = set_value_helper(fd, scratchpad + SV_OFFSET);
370
371         struct job_descriptor_header *vertex =
372                 vertex_tiler_helper(fd, false,
373                                 vertex_fbd, vertex_buffer,
374                                 zero_buffer, mode,
375                                 vertex_shader, vs_sz);
376
377         struct job_descriptor_header *tiler =
378                 vertex_tiler_helper(fd, true,
379                                 vertex_fbd, vertex_buffer,
380                                 zero_buffer, mode,
381                                 fragment_shader, fs_sz);
382
383         set->next_job._32 = (uint32_t) vertex;
384         vertex->next_job._32 = (uint32_t) tiler;
385
386         /* TODO: Determine if these numbers are meaningful */
387         set->job_index = 3;
388         vertex->job_index = 1;
389         tiler->job_index = 2;
390
391         vertex->job_dependency_index_2 = set->job_index;
392         tiler->job_dependency_index_1 = vertex->job_index;
393
394         struct mali_jd_atom_v2 job = {
395                 .jc = (uint32_t) set,
396                 .ext_res_list = NULL,
397                 .nr_ext_res = 0,
398                 .core_req = MALI_JD_REQ_CS | MALI_JD_REQ_T
399                         | MALI_JD_REQ_CF | MALI_JD_REQ_COHERENT_GROUP,
400                 .atom_number = ++atom_count,
401                 .prio = MALI_JD_PRIO_MEDIUM,
402                 .device_nr = 0,
403                 .pre_dep = { no_dependency, no_dependency }
404         };
405
406         submit_job(fd, job);
407
408         return (uint32_t) tiler;
409 }
410
411 void job_chain_replay(int fd, uint32_t tiler_jc, uint32_t fragment_jc,
412                 uint64_t heap_free_address, uint64_t framebuffer)
413 {
414         struct mali_jd_replay_payload *payload;
415
416         payload = (struct mali_jd_replay_payload*) galloc(sizeof(*payload));
417
418         payload->tiler_jc_list = tiler_jc;
419         payload->fragment_jc = fragment_jc;
420         payload->tiler_heap_free = heap_free_address;
421         payload->fragment_hierarchy_mask = 0;
422         payload->tiler_hierarchy_mask = 0;
423         payload->hierarchy_default_weight = 0x10000;
424         payload->tiler_core_req = MALI_JD_REQ_T | MALI_JD_REQ_COHERENT_GROUP;
425         payload->fragment_core_req = MALI_JD_REQ_FS;
426
427         struct mali_jd_dependency depFragment = {
428                 .atom_id = atom_count,
429                 .dependency_type = MALI_JD_DEP_TYPE_DATA
430         };
431
432         uint64_t* resource = malloc(sizeof(u64) * 1);
433         resource[0] = framebuffer | MALI_EXT_RES_ACCESS_EXCLUSIVE;
434
435         struct mali_jd_atom_v2 job = {
436                 .jc = (uint32_t) payload,
437                 .ext_res_list = (struct mali_external_resource*)resource,
438                 .nr_ext_res = 1,
439                 .core_req = MALI_JD_REQ_EXTERNAL_RESOURCES | MALI_JD_REQ_SOFT_REPLAY,
440                 .atom_number = ++atom_count,
441                 .prio = MALI_JD_PRIO_LOW,
442                 .device_nr = 0,
443                 .pre_dep = { depFragment, no_dependency }
444         };
445
446         submit_job(fd, job);
447 }