6727a4599a1ab47808a5320ff9d47e87441add19
[chai.git] / src / trace.c
1 /*
2  *
3  * Copyright (C) 2017 Cafe Beverage. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16 #include "shim.h"
17
18 static void formatted_hex_dump(char *array, uint8_t *buffer, size_t s) 
19 {
20         if (!buffer) return;
21
22         panwrap_log_hexdump(buffer, s, "\t\t");
23 }
24
25 /* Assert that synthesised command stream is bit-identical with trace */
26
27 static void assert_gpu_same(uint64_t addr, size_t s, uint8_t *synth) 
28 {
29         uint8_t *buffer = fetch_mapped_gpu(addr, s);
30         int i;
31
32         if (!buffer) {
33                 panwrap_log("Bad allocation in assert %LX\n", addr);
34                 return;
35         }
36
37         for (i = 0; i < s; ++i) {
38                 if (buffer[i] != synth[i]) {
39                         panwrap_log("At %LX, expected:\n", addr);
40                         formatted_hex_dump("b", synth, s);
41                         panwrap_log("Instead got:\n");
42                         formatted_hex_dump("b", buffer, s);
43
44                         break;
45                 }
46         }
47
48         relax_mapped_gpu(buffer);
49 }
50
51 static void assert_gpu_zeroes(uint64_t addr, size_t s) 
52 {
53         char *zero = calloc(s, 1);
54
55         assert_gpu_same(addr, s, zero);
56         free(zero);
57 }
58
59 static void quick_dump_gpu(uint64_t addr, size_t s)
60 {
61         uint8_t *buf;
62
63         if(!addr) {
64                 panwrap_log("Null quick dump\n");
65                 return;
66         }
67
68         buf = fetch_mapped_gpu(addr, s);
69
70         panwrap_log("Quick GPU dump (%LX)\n", addr);
71
72         if(!buf) {
73                 panwrap_log("Not found\n");
74                 return;
75         }
76
77         formatted_hex_dump("a", buf, s);
78         relax_mapped_gpu(buf);
79 }
80
81 #include "chai-notes.h"
82
83 #define DEFINE_CASE(label) case label: return #label;
84
85 static char *chai_job_type_name(int type)
86 {
87         switch (type) {
88                 DEFINE_CASE(JOB_NOT_STARTED)
89                 DEFINE_CASE(JOB_TYPE_NULL)
90                 DEFINE_CASE(JOB_TYPE_SET_VALUE)
91                 DEFINE_CASE(JOB_TYPE_CACHE_FLUSH)
92                 DEFINE_CASE(JOB_TYPE_COMPUTE)
93                 DEFINE_CASE(JOB_TYPE_VERTEX)
94                 DEFINE_CASE(JOB_TYPE_TILER)
95                 DEFINE_CASE(JOB_TYPE_FUSED)
96                 DEFINE_CASE(JOB_TYPE_FRAGMENT)
97
98                 default:
99                         panwrap_log("Requested job type %X\n", type);
100                         return "UNKNOWN";
101         }
102 }
103
104 static char* chai_gl_mode_name(uint8_t b) 
105 {
106         switch (b) {
107                 DEFINE_CASE(CHAI_POINTS)
108                 DEFINE_CASE(CHAI_LINES)
109                 DEFINE_CASE(CHAI_TRIANGLES)
110                 DEFINE_CASE(CHAI_TRIANGLE_STRIP)
111                 DEFINE_CASE(CHAI_TRIANGLE_FAN)
112
113                 default:
114                         panwrap_log("Unknown mode %X\n", b);
115                         return "GL_UNKNOWN";
116         }
117 }
118
119 /* TODO: Figure out what "fbd" means */
120 /* TODO: Corresponding SFBD decode (don't assume MFBD) */
121
122 static void chai_trace_fbd(uint32_t fbd)
123 {
124         struct tentative_mfbd *mfbd = (struct tentative_mfbd *) fetch_mapped_gpu((uint64_t) (uint32_t) fbd & FBD_POINTER_MASK, sizeof(struct tentative_mfbd));
125         uint8_t *buf;
126         uint32_t *buf32;
127
128         panwrap_log("MFBD @ %X (%X)\n", fbd & FBD_POINTER_MASK, fbd & ~FBD_POINTER_MASK);
129         panwrap_log("MFBD flags %X, heap free address %LX\n",
130                         mfbd->flags, mfbd->heap_free_address);
131
132         formatted_hex_dump("Block 1", (uint8_t *) mfbd->block1, sizeof(mfbd->block1));
133
134         panwrap_log("unk2\n");
135         buf = fetch_mapped_gpu(mfbd->unknown2, 64);
136         formatted_hex_dump("B", buf, 64);
137         relax_mapped_gpu(buf);
138
139         assert_gpu_zeroes(mfbd->block2[0], 64);
140         assert_gpu_zeroes(mfbd->block2[1], 64);
141         assert_gpu_zeroes(mfbd->ugaT, 64);
142         assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
143
144         /* Somehow maybe sort of kind of framebufferish?
145          * It changes predictably in the same way as the FB.
146          * Unclear what exactly it is, though.
147          *
148          * Where the framebuffer is: 1A 33 00 00
149          * This is: 71 B3 03 71 6C 4D 87 46
150          * Where the framebuffer is: 1A 33 1A 00
151          * This is: AB E4 43 9C E8 D6 D1 25
152          *
153          * It repeats, too, but everything 8 bytes rather than 4.
154          *
155          * It is a function of the colour painted. But the exact details
156          * are elusive.
157          *
158          * Also, this is an output, not an input.
159          * Assuming the framebuffer works as intended, RE may be
160          * pointless.
161          */
162
163         panwrap_log("ugaT %LX, uga %LX\n", mfbd->ugaT, mfbd->unknown_gpu_address);
164         panwrap_log("ugan %LX\n", mfbd->unknown_gpu_addressN);
165         buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
166         formatted_hex_dump("B", buf, 64);
167         relax_mapped_gpu(buf);
168
169         panwrap_log("unk1 %X, b1 %LX, b2 %LX, unk2 %LX, unk3 %LX, blah %LX\n",
170                         mfbd->unknown1,
171                         mfbd->block2[0],
172                         mfbd->block2[1],
173                         mfbd->unknown2,
174                         mfbd->unknown3,
175                         mfbd->blah);
176
177         panwrap_log("Weights [ %X, %X, %X, %X, %X, %X, %X, %X ]\n",
178                         mfbd->weights[0], mfbd->weights[1],
179                         mfbd->weights[2], mfbd->weights[3],
180                         mfbd->weights[4], mfbd->weights[5],
181                         mfbd->weights[6], mfbd->weights[7]);
182
183         formatted_hex_dump("Block 3", (uint8_t *) mfbd->block3, sizeof(mfbd->block3));
184         panwrap_log("---\n");
185         formatted_hex_dump("Block 4", (uint8_t *) mfbd->block4, sizeof(mfbd->block4));
186
187         panwrap_log("--- (seriously though) --- %X\n", mfbd->block3[4]);
188         buf32 = fetch_mapped_gpu(mfbd->block3[4], 128);
189         
190         if(buf32) {
191                 formatted_hex_dump("a", (uint8_t*) buf32, 128);
192
193                 quick_dump_gpu(buf32[6], 64);
194                 quick_dump_gpu(buf32[20], 64);
195                 quick_dump_gpu(buf32[23], 64);
196                 quick_dump_gpu(buf32[24], 64);
197                 quick_dump_gpu(buf32[25], 64);
198                 quick_dump_gpu(buf32[26], 64);
199                 quick_dump_gpu(buf32[27], 64);
200                 quick_dump_gpu(buf32[28], 64);
201                 quick_dump_gpu(buf32[31], 64);
202
203                 relax_mapped_gpu(buf32);
204         }
205
206         quick_dump_gpu(mfbd->block3[16], 128);
207
208         relax_mapped_gpu(mfbd);
209 }
210
211 static void chai_trace_vecN(float *p, size_t count)
212 {
213         if(count == 1) 
214                 panwrap_log("\t<%f>,\n", p[0]);
215         else if(count == 2)
216                 panwrap_log("\t<%f, %f>,\n", p[0], p[1]);
217         else if(count == 3)
218                 panwrap_log("\t<%f, %f, %f>,\n", p[0], p[1], p[2]);
219         else if(count == 4)
220                 panwrap_log("\t<%f, %f, %f, %f>,\n", p[0], p[1], p[2], p[3]);
221         else
222                 panwrap_log("Cannot print vec%d\n", count);
223 }
224
225 #include "shim.c"
226
227 static void chai_trace_attribute(uint64_t address)
228 {
229         uint64_t raw;
230         uint64_t flags;
231         int vertex_count;
232         int component_count;
233         float *v;
234         float *p;
235         int i;
236
237         struct attribute_buffer *vb =
238                 (struct attribute_buffer *) fetch_mapped_gpu(
239                                 address,
240                                 sizeof(struct attribute_buffer));
241         if (!vb) return;
242
243         vertex_count = vb->total_size / vb->element_size;
244         component_count = vb->element_size / sizeof(float);
245
246         raw = vb->elements & ~3;
247         flags = vb->elements ^ raw;
248
249         p = v = fetch_mapped_gpu(raw, vb->total_size);
250
251         panwrap_log("attribute vec%d mem%LXflag%LX = {\n", component_count, raw, flags);
252
253         for (i = 0; i < vertex_count; i++, p += component_count) {
254                 chai_trace_vecN(p, component_count);
255
256                 /* I don't like these verts... let's add some flare! */
257
258                 p[0] += (float) (rand() & 0xFF) / 1024.0f;
259                 p[1] += (float) (rand() & 0xFF) / 1024.0f;
260                 p[2] += (float) (rand() & 0xFF) / 1024.0f;
261         }
262
263         panwrap_log("}\n");
264
265         relax_mapped_gpu(vb);
266 }
267
268 static void chai_trace_hw_chain(uint64_t chain)
269 {
270         struct job_descriptor_header *h;
271         uint8_t *gen_pay;
272         u64 next;
273         u64 payload;
274
275         /* Trace descriptor */
276         h = fetch_mapped_gpu(chain, sizeof(*h));
277
278         if(!h) {
279                 panwrap_log("Failed to map the job chain %LX\n\n", chain);
280                 return;
281         }
282
283         panwrap_log("%s job, %d-bit, status %X, incomplete %X, fault %LX, barrier %d, index %hX, dependencies (%hX, %hX)\n",
284                         chai_job_type_name(h->job_type),
285                         h->job_descriptor_size ? 64 : 32,
286                         h->exception_status,
287                         h->first_incomplete_task,
288                         h->fault_pointer,
289                         h->job_barrier,
290                         h->job_index,
291                         h->job_dependency_index_1,
292                         h->job_dependency_index_2);
293
294         payload = chain + sizeof(*h);
295
296         switch (h->job_type) {
297         case JOB_TYPE_SET_VALUE: {
298                 struct payload_set_value *s;
299
300                 s = fetch_mapped_gpu(payload, sizeof(*s));
301                 panwrap_log("set value -> %LX (%LX)\n", s->out, s->unknown);
302                 relax_mapped_gpu(s);
303                 break;
304         }
305
306         case JOB_TYPE_VERTEX:
307         case JOB_TYPE_TILER: {
308                 struct payload_vertex_tiler32 *v;
309                 int addr = 0;
310                 uint64_t *i_shader;
311                 uint8_t *shader;
312
313                 v = fetch_mapped_gpu(payload, sizeof(*v));
314
315                 if ((v->shader & 0xFFF00000) == 0x5AB00000) {
316                         panwrap_log("Job sabotaged\n");
317                         break;
318                 }
319
320                 /* Mask out lower 128-bit (instruction word) for flags.
321                  *
322                  * TODO: Decode flags.
323                  *
324                  * TODO: Link with cwabbotts-open-gpu-tools to
325                  * disassemble on the fly.
326                  */
327
328                 i_shader = fetch_mapped_gpu(v->shader, sizeof(u64));
329
330                 panwrap_log("%s shader @ %LX (flags %LX)\n",
331                         h->job_type == JOB_TYPE_VERTEX ? "Vertex" : "Fragment",
332                         *i_shader & ~15,
333                         *i_shader & 15);
334
335                 shader = fetch_mapped_gpu(*i_shader & ~15, 0x880 - 0x540);
336                 formatted_hex_dump("s", shader, 0x880 - 0x540);
337                 relax_mapped_gpu(shader);
338                 relax_mapped_gpu(i_shader);
339
340                 /* Trace attribute based on metadata */
341                 uint64_t s = v->attribute_meta;
342
343                 for(;;) {
344                         attribute_meta_t *attr_meta = fetch_mapped_gpu(s, sizeof(attribute_meta_t));
345
346                         if(!HAS_ATTRIBUTE(*attr_meta)) break;
347
348                         panwrap_log("Attribute %LX (flags %LX)\n",
349                                         ATTRIBUTE_NO(*attr_meta),
350                                         ATTRIBUTE_FLAGS(*attr_meta));
351
352                         chai_trace_attribute(v->attributes + ATTRIBUTE_NO(*attr_meta) * sizeof(struct attribute_buffer));
353
354                         s += sizeof(attribute_meta_t);
355
356                         relax_mapped_gpu(attr_meta);
357                 }
358
359                 if (h->job_type == JOB_TYPE_TILER)
360                         panwrap_log("Drawing in %s\n", chai_gl_mode_name(((uint8_t *) v->block1)[8]));
361
362                 assert_gpu_zeroes(v->zeroes, 64);
363
364                 if (v->null1 | v->null2 | v->null4)
365                         panwrap_log("Null tripped?\n");
366
367                 panwrap_log("%cFBD\n", v->fbd & FBD_TYPE ? 'M' : 'S');
368                 chai_trace_fbd(v->fbd);
369
370                 formatted_hex_dump("Block 1", (uint8_t *) v->block1, sizeof(v->block1));
371
372                 for (addr = 0; addr < 14; ++addr) {
373                         uint32_t address = ((uint32_t *) &(v->zeroes))[addr];
374                         uint8_t *buf;
375                         size_t sz = 64;
376
377                         /* Structure known. Skip hex dump */
378                         if (addr == 2) continue;
379                         if (addr == 3) continue;
380                         if (addr == 6) continue;
381                         if (addr == 10 && h->job_type == JOB_TYPE_VERTEX) continue;
382                         if (addr == 11) continue;
383                         if (addr == 12) continue;
384
385                         /* Size known exactly but not structure; cull */
386                         if (addr == 0) sz = 0x100;
387                         if (addr == 1) sz = 0x10;
388                         if (addr == 4) sz = 0x40;
389                         if (addr == 5) sz = 0x20;
390                         if (addr == 7) sz = 0x20;
391                         if (addr == 8) sz = 0x20;
392
393                         panwrap_log("Addr %d %X\n", addr, address);
394
395                         if (!address) continue;
396
397                         buf = fetch_mapped_gpu(address, sz);
398
399                         formatted_hex_dump("B", buf, sz);
400
401                         if (addr == 8) {
402                                 uint32_t sub = *((uint32_t *) buf) & 0xFFFFFFFE;
403                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
404
405                                 panwrap_log("---\n");
406                                 formatted_hex_dump("S", sbuf, 64);
407                                 relax_mapped_gpu(sbuf);
408                         }
409
410                         if (addr == 1) {
411                                 uint64_t sub = *((uint64_t*) buf) >> 8;
412                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
413
414                                 panwrap_log("--- %LX\n", sub);
415                                 formatted_hex_dump("S", sbuf, 64);
416                                 relax_mapped_gpu(sbuf);
417                         }
418
419                         relax_mapped_gpu(buf);
420                 }
421
422                 formatted_hex_dump("Block 2", (uint8_t *) v->block2, sizeof(v->block2));
423
424                 relax_mapped_gpu(v);
425                 break;
426         }
427
428         case JOB_TYPE_FRAGMENT: {
429                 struct payload_fragment *f;
430
431                 f = fetch_mapped_gpu(payload, sizeof(*f));
432
433                 /* Bit 31 of max_tile_coord clear on the first frame.
434                  * Set after.
435                  * TODO: Research.
436                  */
437
438                 panwrap_log("frag %X %X (%d, %d) -> (%d, %d), fbd type %cFBD at %LX (%LX) \n",
439                                 f->min_tile_coord, f->max_tile_coord,
440                                 TILE_COORD_X(f->min_tile_coord),
441                                 TILE_COORD_Y(f->min_tile_coord),
442                                 TILE_COORD_X(f->max_tile_coord),
443                                 TILE_COORD_Y(f->max_tile_coord),
444                                 f->fragment_fbd & FBD_TYPE ? 'M' : 'S',
445                                 f->fragment_fbd,
446                                 f->fragment_fbd & FBD_POINTER_MASK);
447
448                 chai_trace_fbd(f->fragment_fbd);
449
450                 relax_mapped_gpu(f);
451                 break;
452         }
453
454         default: {
455                 panwrap_log("Dumping payload %LX for job type %s\n",
456                                 payload,
457                                 chai_job_type_name(h->job_type));
458
459                 gen_pay = fetch_mapped_gpu(payload, 256);
460                 formatted_hex_dump("pl", gen_pay, 256);
461                 relax_mapped_gpu(gen_pay);
462         }
463         }
464
465         next = h->job_descriptor_size
466                 ? h->next_job._64
467                 : h->next_job._32;
468
469         relax_mapped_gpu(h);
470
471         /* Traverse the job chain */
472         if (next)
473                 chai_trace_hw_chain(next);
474 }
475
476 static void chai_trace_atom(const struct mali_jd_atom_v2 *v)
477 {
478         if (v->core_req & MALI_JD_REQ_SOFT_JOB) {
479                 if (v->core_req & MALI_JD_REQ_SOFT_REPLAY) {
480                         struct base_jd_replay_payload *payload;
481
482                         payload = (struct base_jd_replay_payload *)
483                                 fetch_mapped_gpu(v->jc, sizeof(*payload));
484
485                         panwrap_log("tiler_jc_list = %LX, fragment_jc = %LX, \nt "
486                                 "tiler_heap_free = %LX, fragment hierarchy mask = %hX, "
487                                 "tiler hierachy mask = %hX, hierarchy def weight %X, "
488                                 "tiler core_req = %X, fragment core_req = %X",
489                                 payload->tiler_jc_list,
490                                 payload->fragment_jc,
491                                 payload->tiler_heap_free,
492                                 payload->fragment_hierarchy_mask,
493                                 payload->tiler_hierarchy_mask,
494                                 payload->hierarchy_default_weight,
495                                 payload->tiler_core_req,
496                                 payload->fragment_core_req);
497
498                         relax_mapped_gpu(payload);
499                 } else  {
500                         /* TODO: Soft job decoding */
501                         panwrap_log("Unknown soft job\n");
502                 }
503         } else {
504                 chai_trace_hw_chain(v->jc);
505         }
506 }