15ee0281b7e38e946522102df0825d920617b888
[chai.git] / src / trace.c
1 /*
2  *
3  * Copyright (C) 2017 Cafe Beverage. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16 #include "shim.h"
17
18 static void formatted_hex_dump(char *array, uint8_t *buffer, size_t s) 
19 {
20         if (!buffer) return;
21
22         panwrap_log_hexdump_trimmed(buffer, s, "\t\t");
23 }
24
25 /* Assert that synthesised command stream is bit-identical with trace */
26
27 static void assert_gpu_same(uint64_t addr, size_t s, uint8_t *synth) 
28 {
29         uint8_t *buffer = fetch_mapped_gpu(addr, s);
30
31         if (!buffer) {
32                 panwrap_log("Bad allocation in assert %llX\n", addr);
33                 return;
34         }
35
36         for (unsigned int i = 0; i < s; ++i) {
37                 if (buffer[i] != synth[i]) {
38                         panwrap_log("At %llX, expected:\n", addr);
39                         formatted_hex_dump("b", synth, s);
40                         panwrap_log("Instead got:\n");
41                         formatted_hex_dump("b", buffer, s);
42
43                         break;
44                 }
45         }
46
47         relax_mapped_gpu(buffer);
48 }
49
50 static void assert_gpu_zeroes(uint64_t addr, size_t s) 
51 {
52         uint8_t *zero = calloc(s, 1);
53         assert_gpu_same(addr, s, zero);
54         free(zero);
55 }
56
57 static void quick_dump_gpu(uint64_t addr, size_t s)
58 {
59         uint8_t *buf;
60
61         if(!addr) {
62                 panwrap_log("Null quick dump\n");
63                 return;
64         }
65
66         buf = fetch_mapped_gpu(addr, s);
67
68         panwrap_log("Quick GPU dump (%llX)\n", addr);
69
70         if(!buf) {
71                 panwrap_log("Not found\n");
72                 return;
73         }
74
75         formatted_hex_dump("a", buf, s);
76         relax_mapped_gpu(buf);
77 }
78
79 #include "chai-notes.h"
80
81 #define DEFINE_CASE(label) case label: return #label;
82
83 static char *chai_job_type_name(int type)
84 {
85         switch (type) {
86                 DEFINE_CASE(JOB_NOT_STARTED)
87                 DEFINE_CASE(JOB_TYPE_NULL)
88                 DEFINE_CASE(JOB_TYPE_SET_VALUE)
89                 DEFINE_CASE(JOB_TYPE_CACHE_FLUSH)
90                 DEFINE_CASE(JOB_TYPE_COMPUTE)
91                 DEFINE_CASE(JOB_TYPE_VERTEX)
92                 DEFINE_CASE(JOB_TYPE_TILER)
93                 DEFINE_CASE(JOB_TYPE_FUSED)
94                 DEFINE_CASE(JOB_TYPE_FRAGMENT)
95
96                 default:
97                         panwrap_log("Requested job type %X\n", type);
98                         return "UNKNOWN";
99         }
100 }
101
102 static char* chai_gl_mode_name(uint8_t b) 
103 {
104         switch (b) {
105                 DEFINE_CASE(CHAI_POINTS)
106                 DEFINE_CASE(CHAI_LINES)
107                 DEFINE_CASE(CHAI_TRIANGLES)
108                 DEFINE_CASE(CHAI_TRIANGLE_STRIP)
109                 DEFINE_CASE(CHAI_TRIANGLE_FAN)
110
111                 default:
112                         panwrap_log("Unknown mode %X\n", b);
113                         return "GL_UNKNOWN";
114         }
115 }
116
117 /* TODO: Figure out what "fbd" means */
118 /* TODO: Corresponding SFBD decode (don't assume MFBD) */
119
120 static void chai_trace_fbd(uint32_t fbd)
121 {
122         struct tentative_mfbd *mfbd = (struct tentative_mfbd *) fetch_mapped_gpu((uint64_t) (uint32_t) fbd & FBD_POINTER_MASK, sizeof(struct tentative_mfbd));
123         uint8_t *buf;
124         uint32_t *buf32;
125
126         panwrap_log("MFBD @ %X (%X)\n", fbd & FBD_POINTER_MASK, fbd & ~FBD_POINTER_MASK);
127         panwrap_log("MFBD flags %X, heap free address %llX\n",
128                         mfbd->flags, mfbd->heap_free_address);
129
130         formatted_hex_dump("Block 1", (uint8_t *) mfbd->block1, sizeof(mfbd->block1));
131
132         panwrap_log("unk2\n");
133         buf = fetch_mapped_gpu(mfbd->unknown2, 64);
134         formatted_hex_dump("B", buf, 64);
135         relax_mapped_gpu(buf);
136
137         assert_gpu_zeroes(mfbd->block2[0], 64);
138         assert_gpu_zeroes(mfbd->block2[1], 64);
139         assert_gpu_zeroes(mfbd->ugaT, 64);
140         assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
141
142         /* Somehow maybe sort of kind of framebufferish?
143          * It changes predictably in the same way as the FB.
144          * Unclear what exactly it is, though.
145          *
146          * Where the framebuffer is: 1A 33 00 00
147          * This is: 71 B3 03 71 6C 4D 87 46
148          * Where the framebuffer is: 1A 33 1A 00
149          * This is: AB E4 43 9C E8 D6 D1 25
150          *
151          * It repeats, too, but everything 8 bytes rather than 4.
152          *
153          * It is a function of the colour painted. But the exact details
154          * are elusive.
155          *
156          * Also, this is an output, not an input.
157          * Assuming the framebuffer works as intended, RE may be
158          * pointless.
159          */
160
161         panwrap_log("ugaT %llX, uga %llX\n", mfbd->ugaT, mfbd->unknown_gpu_address);
162         panwrap_log("ugan %llX\n", mfbd->unknown_gpu_addressN);
163         buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
164         formatted_hex_dump("B", buf, 64);
165         relax_mapped_gpu(buf);
166
167         panwrap_log("unk1 %X, b1 %llX, b2 %llX, unk2 %llX, unk3 %llX, blah %llX\n",
168                         mfbd->unknown1,
169                         mfbd->block2[0],
170                         mfbd->block2[1],
171                         mfbd->unknown2,
172                         mfbd->unknown3,
173                         mfbd->blah);
174
175         panwrap_log("Weights [ %X, %X, %X, %X, %X, %X, %X, %X ]\n",
176                         mfbd->weights[0], mfbd->weights[1],
177                         mfbd->weights[2], mfbd->weights[3],
178                         mfbd->weights[4], mfbd->weights[5],
179                         mfbd->weights[6], mfbd->weights[7]);
180
181         formatted_hex_dump("Block 3", (uint8_t *) mfbd->block3, sizeof(mfbd->block3));
182         panwrap_log("---\n");
183         formatted_hex_dump("Block 4", (uint8_t *) mfbd->block4, sizeof(mfbd->block4));
184
185         panwrap_log("--- (seriously though) --- %X\n", mfbd->block3[4]);
186         buf32 = fetch_mapped_gpu(mfbd->block3[4], 128);
187         
188         if(buf32) {
189                 formatted_hex_dump("a", (uint8_t*) buf32, 128);
190
191                 quick_dump_gpu(buf32[6], 64);
192                 quick_dump_gpu(buf32[20], 64);
193                 quick_dump_gpu(buf32[23], 64);
194                 quick_dump_gpu(buf32[24], 64);
195                 quick_dump_gpu(buf32[25], 64);
196                 quick_dump_gpu(buf32[26], 64);
197                 quick_dump_gpu(buf32[27], 64);
198                 quick_dump_gpu(buf32[28], 64);
199                 quick_dump_gpu(buf32[31], 64);
200
201                 relax_mapped_gpu(buf32);
202         }
203
204         quick_dump_gpu(mfbd->block3[16], 128);
205
206         relax_mapped_gpu(mfbd);
207 }
208
209 static void chai_trace_vecN(float *p, size_t count)
210 {
211         if(count == 1) 
212                 panwrap_log("\t<%f>,\n", p[0]);
213         else if(count == 2)
214                 panwrap_log("\t<%f, %f>,\n", p[0], p[1]);
215         else if(count == 3)
216                 panwrap_log("\t<%f, %f, %f>,\n", p[0], p[1], p[2]);
217         else if(count == 4)
218                 panwrap_log("\t<%f, %f, %f, %f>,\n", p[0], p[1], p[2], p[3]);
219         else
220                 panwrap_log("Cannot print vec%d\n", count);
221 }
222
223 //#include "shim.c"
224
225 static void chai_trace_attribute(uint64_t address)
226 {
227         uint64_t raw;
228         uint64_t flags;
229         size_t vertex_count;
230         size_t component_count;
231         float *v;
232         float *p;
233
234         struct attribute_buffer *vb =
235                 (struct attribute_buffer *) fetch_mapped_gpu(
236                                 address,
237                                 sizeof(struct attribute_buffer));
238         if (!vb) return;
239
240         vertex_count = vb->total_size / vb->element_size;
241         component_count = vb->element_size / sizeof(float);
242
243         raw = vb->elements & ~3;
244         flags = vb->elements ^ raw;
245
246         p = v = fetch_mapped_gpu(raw, vb->total_size);
247
248         panwrap_log("attribute vec%d mem%llXflag%llX = {\n", component_count, raw, flags);
249
250         for (unsigned int i = 0; i < vertex_count; i++, p += component_count) {
251                 chai_trace_vecN(p, component_count);
252
253                 /* I don't like these verts... let's add some flare! */
254
255                 p[0] += (float) (rand() & 0xFF) / 1024.0f;
256                 p[1] += (float) (rand() & 0xFF) / 1024.0f;
257                 p[2] += (float) (rand() & 0xFF) / 1024.0f;
258         }
259
260         panwrap_log("}\n");
261
262         relax_mapped_gpu(vb);
263 }
264
265 static void chai_trace_hw_chain(uint64_t chain)
266 {
267         struct job_descriptor_header *h;
268         uint8_t *gen_pay;
269         u64 next;
270         u64 payload;
271
272         /* Trace descriptor */
273         h = fetch_mapped_gpu(chain, sizeof(*h));
274
275         if(!h) {
276                 panwrap_log("Failed to map the job chain %llX\n\n", chain);
277                 return;
278         }
279
280         panwrap_log("%s job, %d-bit, status %X, incomplete %X, fault %llX, barrier %d, index %hX, dependencies (%hX, %hX)\n",
281                         chai_job_type_name(h->job_type),
282                         h->job_descriptor_size ? 64 : 32,
283                         h->exception_status,
284                         h->first_incomplete_task,
285                         h->fault_pointer,
286                         h->job_barrier,
287                         h->job_index,
288                         h->job_dependency_index_1,
289                         h->job_dependency_index_2);
290
291         payload = chain + sizeof(*h);
292
293         switch (h->job_type) {
294         case JOB_TYPE_SET_VALUE: {
295                 struct payload_set_value *s;
296
297                 s = fetch_mapped_gpu(payload, sizeof(*s));
298                 panwrap_log("set value -> %llX (%llX)\n", s->out, s->unknown);
299                 relax_mapped_gpu(s);
300                 break;
301         }
302
303         case JOB_TYPE_VERTEX:
304         case JOB_TYPE_TILER: {
305                 struct payload_vertex_tiler32 *v;
306                 uint64_t *i_shader;
307                 uint8_t *shader;
308
309                 v = fetch_mapped_gpu(payload, sizeof(*v));
310
311                 if ((v->shader & 0xFFF00000) == 0x5AB00000) {
312                         panwrap_log("Job sabotaged\n");
313                         break;
314                 }
315
316                 /* Mask out lower 128-bit (instruction word) for flags.
317                  *
318                  * TODO: Decode flags.
319                  *
320                  * TODO: Link with cwabbotts-open-gpu-tools to
321                  * disassemble on the fly.
322                  */
323
324                 i_shader = fetch_mapped_gpu(v->shader, sizeof(u64));
325
326                 panwrap_log("%s shader @ %llX (flags %llX)\n",
327                         h->job_type == JOB_TYPE_VERTEX ? "Vertex" : "Fragment",
328                         *i_shader & ~15,
329                         *i_shader & 15);
330
331                 shader = fetch_mapped_gpu(*i_shader & ~15, 0x880 - 0x540);
332                 formatted_hex_dump("s", shader, 0x880 - 0x540);
333                 relax_mapped_gpu(shader);
334                 relax_mapped_gpu(i_shader);
335
336                 /* Trace attribute based on metadata */
337                 uint64_t s = v->attribute_meta;
338
339                 for(;;) {
340                         attribute_meta_t *attr_meta = fetch_mapped_gpu(s, sizeof(attribute_meta_t));
341
342                         if(!HAS_ATTRIBUTE(*attr_meta)) break;
343
344                         panwrap_log("Attribute %llX (flags %llX)\n",
345                                         ATTRIBUTE_NO(*attr_meta),
346                                         ATTRIBUTE_FLAGS(*attr_meta));
347
348                         chai_trace_attribute(v->attributes + ATTRIBUTE_NO(*attr_meta) * sizeof(struct attribute_buffer));
349
350                         s += sizeof(attribute_meta_t);
351
352                         relax_mapped_gpu(attr_meta);
353                 }
354
355                 if (h->job_type == JOB_TYPE_TILER)
356                         panwrap_log("Drawing in %s\n", chai_gl_mode_name(((uint8_t *) v->block1)[8]));
357
358                 assert_gpu_zeroes(v->zeroes, 64);
359
360                 if (v->null1 | v->null2 | v->null4)
361                         panwrap_log("Null tripped?\n");
362
363                 panwrap_log("%cFBD\n", v->fbd & FBD_TYPE ? 'M' : 'S');
364                 chai_trace_fbd(v->fbd);
365
366                 formatted_hex_dump("Block 1", (uint8_t *) v->block1, sizeof(v->block1));
367
368                 for (int addr = 0; addr < 14; ++addr) {
369                         uint32_t address = ((uint32_t *) &(v->zeroes))[addr];
370                         uint8_t *buf;
371                         size_t sz = 64;
372
373                         /* Structure known. Skip hex dump */
374                         if (addr == 2) continue;
375                         if (addr == 3) continue;
376                         if (addr == 6) continue;
377                         if (addr == 10 && h->job_type == JOB_TYPE_VERTEX) continue;
378                         if (addr == 11) continue;
379                         if (addr == 12) continue;
380
381                         /* Size known exactly but not structure; cull */
382                         if (addr == 0) sz = 0x100;
383                         if (addr == 1) sz = 0x10;
384                         if (addr == 4) sz = 0x40;
385                         if (addr == 5) sz = 0x20;
386                         if (addr == 7) sz = 0x20;
387                         if (addr == 8) sz = 0x20;
388
389                         panwrap_log("Addr %d %X\n", addr, address);
390
391                         if (!address) continue;
392
393                         buf = fetch_mapped_gpu(address, sz);
394
395                         formatted_hex_dump("B", buf, sz);
396
397                         if (addr == 8) {
398                                 uint32_t sub = *((uint32_t *) buf) & 0xFFFFFFFE;
399                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
400
401                                 panwrap_log("---\n");
402                                 formatted_hex_dump("S", sbuf, 64);
403                                 relax_mapped_gpu(sbuf);
404                         }
405
406                         if (addr == 1) {
407                                 uint64_t sub = *((uint64_t*) buf) >> 8;
408                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
409
410                                 panwrap_log("--- %llX\n", sub);
411                                 formatted_hex_dump("S", sbuf, 64);
412                                 relax_mapped_gpu(sbuf);
413                         }
414
415                         relax_mapped_gpu(buf);
416                 }
417
418                 formatted_hex_dump("Block 2", (uint8_t *) v->block2, sizeof(v->block2));
419
420                 relax_mapped_gpu(v);
421                 break;
422         }
423
424         case JOB_TYPE_FRAGMENT: {
425                 struct payload_fragment *f;
426
427                 f = fetch_mapped_gpu(payload, sizeof(*f));
428
429                 /* Bit 31 of max_tile_coord clear on the first frame.
430                  * Set after.
431                  * TODO: Research.
432                  */
433
434                 panwrap_log("frag %X %X (%d, %d) -> (%d, %d), fbd type %cFBD at %llX (%llX) \n",
435                                 f->min_tile_coord, f->max_tile_coord,
436                                 TILE_COORD_X(f->min_tile_coord),
437                                 TILE_COORD_Y(f->min_tile_coord),
438                                 TILE_COORD_X(f->max_tile_coord),
439                                 TILE_COORD_Y(f->max_tile_coord),
440                                 f->fragment_fbd & FBD_TYPE ? 'M' : 'S',
441                                 f->fragment_fbd,
442                                 f->fragment_fbd & FBD_POINTER_MASK);
443
444                 chai_trace_fbd(f->fragment_fbd);
445
446                 relax_mapped_gpu(f);
447                 break;
448         }
449
450         default: {
451                 panwrap_log("Dumping payload %llX for job type %s\n",
452                                 payload,
453                                 chai_job_type_name(h->job_type));
454
455                 gen_pay = fetch_mapped_gpu(payload, 256);
456                 formatted_hex_dump("pl", gen_pay, 256);
457                 relax_mapped_gpu(gen_pay);
458         }
459         }
460
461         next = h->job_descriptor_size
462                 ? h->next_job._64
463                 : h->next_job._32;
464
465         relax_mapped_gpu(h);
466
467         /* Traverse the job chain */
468         if (next)
469                 chai_trace_hw_chain(next);
470 }
471
472 static void chai_trace_atom(const struct mali_jd_atom_v2 *v)
473 {
474         if (v->core_req & MALI_JD_REQ_SOFT_JOB) {
475                 if (v->core_req & MALI_JD_REQ_SOFT_REPLAY) {
476                         struct base_jd_replay_payload *payload;
477
478                         payload = (struct base_jd_replay_payload *)
479                                 fetch_mapped_gpu(v->jc, sizeof(*payload));
480
481                         panwrap_log("tiler_jc_list = %llX, fragment_jc = %llX, \nt "
482                                 "tiler_heap_free = %llX, fragment hierarchy mask = %hX, "
483                                 "tiler hierachy mask = %hX, hierarchy def weight %X, "
484                                 "tiler core_req = %X, fragment core_req = %X",
485                                 payload->tiler_jc_list,
486                                 payload->fragment_jc,
487                                 payload->tiler_heap_free,
488                                 payload->fragment_hierarchy_mask,
489                                 payload->tiler_hierarchy_mask,
490                                 payload->hierarchy_default_weight,
491                                 payload->tiler_core_req,
492                                 payload->fragment_core_req);
493
494                         relax_mapped_gpu(payload);
495                 } else  {
496                         /* TODO: Soft job decoding */
497                         panwrap_log("Unknown soft job\n");
498                 }
499         } else {
500                 chai_trace_hw_chain(v->jc);
501         }
502 }