Fix tracer
[chai.git] / src / trace.c
1 /*
2  *
3  * Copyright (C) 2017 Cafe Beverage. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15
16 #include "shim.h"
17
18 static void formatted_hex_dump(char *array, uint8_t *buffer, size_t s) 
19 {
20         if (!buffer) return;
21
22         panwrap_log_hexdump_trimmed(buffer, s, "\t\t");
23 }
24
25 /* Assert that synthesised command stream is bit-identical with trace */
26
27 static void assert_gpu_same(uint64_t addr, size_t s, uint8_t *synth) 
28 {
29         uint8_t *buffer = fetch_mapped_gpu(addr, s);
30
31         if (!buffer) {
32                 panwrap_log("Bad allocation in assert %llX\n", addr);
33                 return;
34         }
35
36         for (unsigned int i = 0; i < s; ++i) {
37                 if (buffer[i] != synth[i]) {
38                         panwrap_log("At %llX, expected:\n", addr);
39                         formatted_hex_dump("b", synth, s);
40                         panwrap_log("Instead got:\n");
41                         formatted_hex_dump("b", buffer, s);
42
43                         break;
44                 }
45         }
46
47         relax_mapped_gpu(buffer);
48 }
49
50 static void assert_gpu_zeroes(uint64_t addr, size_t s) 
51 {
52         uint8_t *zero = calloc(s, 1);
53         printf("Zero address %LX\n", addr);
54         assert_gpu_same(addr, s, zero);
55         free(zero);
56 }
57
58 static void quick_dump_gpu(uint64_t addr, size_t s)
59 {
60         uint8_t *buf;
61
62         if(!addr) {
63                 panwrap_log("Null quick dump\n");
64                 return;
65         }
66
67         buf = fetch_mapped_gpu(addr, s);
68
69         panwrap_log("Quick GPU dump (%llX)\n", addr);
70
71         if(!buf) {
72                 panwrap_log("Not found\n");
73                 return;
74         }
75
76         formatted_hex_dump("a", buf, s);
77         relax_mapped_gpu(buf);
78 }
79
80 #include "chai-notes.h"
81
82 #define DEFINE_CASE(label) case label: return #label;
83
84 static char *chai_job_type_name(int type)
85 {
86         switch (type) {
87                 DEFINE_CASE(JOB_NOT_STARTED)
88                 DEFINE_CASE(JOB_TYPE_NULL)
89                 DEFINE_CASE(JOB_TYPE_SET_VALUE)
90                 DEFINE_CASE(JOB_TYPE_CACHE_FLUSH)
91                 DEFINE_CASE(JOB_TYPE_COMPUTE)
92                 DEFINE_CASE(JOB_TYPE_VERTEX)
93                 DEFINE_CASE(JOB_TYPE_TILER)
94                 DEFINE_CASE(JOB_TYPE_FUSED)
95                 DEFINE_CASE(JOB_TYPE_FRAGMENT)
96
97                 default:
98                         panwrap_log("Requested job type %X\n", type);
99                         return "UNKNOWN";
100         }
101 }
102
103 static char* chai_gl_mode_name(uint8_t b) 
104 {
105         switch (b) {
106                 DEFINE_CASE(CHAI_POINTS)
107                 DEFINE_CASE(CHAI_LINES)
108                 DEFINE_CASE(CHAI_TRIANGLES)
109                 DEFINE_CASE(CHAI_TRIANGLE_STRIP)
110                 DEFINE_CASE(CHAI_TRIANGLE_FAN)
111
112                 default:
113                         panwrap_log("Unknown mode %X\n", b);
114                         return "GL_UNKNOWN";
115         }
116 }
117
118 /* TODO: Figure out what "fbd" means */
119 /* TODO: Corresponding SFBD decode (don't assume MFBD) */
120
121 static void chai_trace_fbd(uint32_t fbd)
122 {
123         struct tentative_mfbd *mfbd = (struct tentative_mfbd *) fetch_mapped_gpu((uint64_t) (uint32_t) fbd & FBD_POINTER_MASK, sizeof(struct tentative_mfbd));
124         uint8_t *buf;
125         uint32_t *buf32;
126
127         panwrap_log("MFBD @ %X (%X)\n", fbd & FBD_POINTER_MASK, fbd & ~FBD_POINTER_MASK);
128         panwrap_log("MFBD flags %X, heap free address %llX\n",
129                         mfbd->flags, mfbd->heap_free_address);
130
131         formatted_hex_dump("Block 1", (uint8_t *) mfbd->block1, sizeof(mfbd->block1));
132
133         panwrap_log("unk2\n");
134         buf = fetch_mapped_gpu(mfbd->unknown2, 64);
135         formatted_hex_dump("B", buf, 64);
136         relax_mapped_gpu(buf);
137
138         assert_gpu_zeroes(mfbd->block2[0], 64);
139         assert_gpu_zeroes(mfbd->block2[1], 64);
140         assert_gpu_zeroes(mfbd->ugaT, 64);
141         assert_gpu_zeroes(mfbd->unknown_gpu_address, 64);
142
143         /* Somehow maybe sort of kind of framebufferish?
144          * It changes predictably in the same way as the FB.
145          * Unclear what exactly it is, though.
146          *
147          * Where the framebuffer is: 1A 33 00 00
148          * This is: 71 B3 03 71 6C 4D 87 46
149          * Where the framebuffer is: 1A 33 1A 00
150          * This is: AB E4 43 9C E8 D6 D1 25
151          *
152          * It repeats, too, but everything 8 bytes rather than 4.
153          *
154          * It is a function of the colour painted. But the exact details
155          * are elusive.
156          *
157          * Also, this is an output, not an input.
158          * Assuming the framebuffer works as intended, RE may be
159          * pointless.
160          */
161
162         panwrap_log("ugaT %llX, uga %llX\n", mfbd->ugaT, mfbd->unknown_gpu_address);
163         panwrap_log("ugan %llX\n", mfbd->unknown_gpu_addressN);
164         buf = fetch_mapped_gpu(mfbd->unknown_gpu_addressN, 64);
165         formatted_hex_dump("B", buf, 64);
166         relax_mapped_gpu(buf);
167
168         panwrap_log("unk1 %X, b1 %llX, b2 %llX, unk2 %llX, unk3 %llX, blah %llX\n",
169                         mfbd->unknown1,
170                         mfbd->block2[0],
171                         mfbd->block2[1],
172                         mfbd->unknown2,
173                         mfbd->unknown3,
174                         mfbd->blah);
175
176         panwrap_log("Weights [ %X, %X, %X, %X, %X, %X, %X, %X ]\n",
177                         mfbd->weights[0], mfbd->weights[1],
178                         mfbd->weights[2], mfbd->weights[3],
179                         mfbd->weights[4], mfbd->weights[5],
180                         mfbd->weights[6], mfbd->weights[7]);
181
182         formatted_hex_dump("Block 3", (uint8_t *) mfbd->block3, sizeof(mfbd->block3));
183         panwrap_log("---\n");
184         formatted_hex_dump("Block 4", (uint8_t *) mfbd->block4, sizeof(mfbd->block4));
185
186         panwrap_log("--- (seriously though) --- %X\n", mfbd->block3[4]);
187         buf32 = fetch_mapped_gpu(mfbd->block3[4], 128);
188         
189         if(buf32) {
190                 formatted_hex_dump("a", (uint8_t*) buf32, 128);
191
192                 quick_dump_gpu(buf32[6], 64);
193                 quick_dump_gpu(buf32[20], 64);
194                 quick_dump_gpu(buf32[23], 64);
195                 quick_dump_gpu(buf32[24], 64);
196                 quick_dump_gpu(buf32[25], 64);
197                 quick_dump_gpu(buf32[26], 64);
198                 quick_dump_gpu(buf32[27], 64);
199                 quick_dump_gpu(buf32[28], 64);
200                 quick_dump_gpu(buf32[31], 64);
201
202                 relax_mapped_gpu(buf32);
203         }
204
205         quick_dump_gpu(mfbd->block3[16], 128);
206
207         relax_mapped_gpu(mfbd);
208 }
209
210 static void chai_trace_vecN(float *p, size_t count)
211 {
212         if(count == 1) 
213                 panwrap_log("\t<%f>,\n", p[0]);
214         else if(count == 2)
215                 panwrap_log("\t<%f, %f>,\n", p[0], p[1]);
216         else if(count == 3)
217                 panwrap_log("\t<%f, %f, %f>,\n", p[0], p[1], p[2]);
218         else if(count == 4)
219                 panwrap_log("\t<%f, %f, %f, %f>,\n", p[0], p[1], p[2], p[3]);
220         else
221                 panwrap_log("Cannot print vec%d\n", count);
222 }
223
224 //#include "shim.c"
225
226 static void chai_trace_attribute(uint64_t address)
227 {
228         uint64_t raw;
229         uint64_t flags;
230         size_t vertex_count;
231         size_t component_count;
232         float *v;
233         float *p;
234
235         struct attribute_buffer *vb =
236                 (struct attribute_buffer *) fetch_mapped_gpu(
237                                 address,
238                                 sizeof(struct attribute_buffer));
239         if (!vb) return;
240
241         vertex_count = vb->total_size / vb->element_size;
242         component_count = vb->element_size / sizeof(float);
243
244         raw = vb->elements & ~3;
245         flags = vb->elements ^ raw;
246
247         p = v = fetch_mapped_gpu(raw, vb->total_size);
248
249         panwrap_log("attribute vec%d mem%llXflag%llX = {\n", component_count, raw, flags);
250
251         for (unsigned int i = 0; i < vertex_count; i++, p += component_count) {
252                 chai_trace_vecN(p, component_count);
253
254                 /* I don't like these verts... let's add some flare! */
255
256                 p[0] += (float) (rand() & 0xFF) / 1024.0f;
257                 p[1] += (float) (rand() & 0xFF) / 1024.0f;
258                 p[2] += (float) (rand() & 0xFF) / 1024.0f;
259         }
260
261         panwrap_log("}\n");
262
263         relax_mapped_gpu(vb);
264 }
265
266 static void chai_trace_hw_chain(uint64_t chain)
267 {
268         struct job_descriptor_header *h;
269         uint8_t *gen_pay;
270         u64 next;
271         u64 payload;
272
273         /* Trace descriptor */
274         h = fetch_mapped_gpu(chain, sizeof(*h));
275
276         if(!h) {
277                 panwrap_log("Failed to map the job chain %llX\n\n", chain);
278                 return;
279         }
280
281         panwrap_log("%s job, %d-bit, status %X, incomplete %X, fault %llX, barrier %d, index %hX, dependencies (%hX, %hX)\n",
282                         chai_job_type_name(h->job_type),
283                         h->job_descriptor_size ? 64 : 32,
284                         h->exception_status,
285                         h->first_incomplete_task,
286                         h->fault_pointer,
287                         h->job_barrier,
288                         h->job_index,
289                         h->job_dependency_index_1,
290                         h->job_dependency_index_2);
291
292         payload = chain + sizeof(*h);
293
294         switch (h->job_type) {
295         case JOB_TYPE_SET_VALUE: {
296                 struct payload_set_value *s;
297
298                 s = fetch_mapped_gpu(payload, sizeof(*s));
299                 panwrap_log("set value -> %llX (%llX)\n", s->out, s->unknown);
300                 relax_mapped_gpu(s);
301                 break;
302         }
303
304         case JOB_TYPE_VERTEX:
305         case JOB_TYPE_TILER: {
306                 struct payload_vertex_tiler32 *v;
307                 uint64_t *i_shader;
308                 uint8_t *shader;
309
310                 v = fetch_mapped_gpu(payload, sizeof(*v));
311
312                 if ((v->shader & 0xFFF00000) == 0x5AB00000) {
313                         panwrap_log("Job sabotaged\n");
314                         break;
315                 }
316
317                 /* Mask out lower 128-bit (instruction word) for flags.
318                  *
319                  * TODO: Decode flags.
320                  *
321                  * TODO: Link with cwabbotts-open-gpu-tools to
322                  * disassemble on the fly.
323                  */
324
325                 i_shader = fetch_mapped_gpu(v->shader, sizeof(u64));
326
327                 panwrap_log("%s shader @ %llX (flags %llX)\n",
328                         h->job_type == JOB_TYPE_VERTEX ? "Vertex" : "Fragment",
329                         *i_shader & ~15,
330                         *i_shader & 15);
331
332                 shader = fetch_mapped_gpu(*i_shader & ~15, 0x880 - 0x540);
333                 formatted_hex_dump("s", shader, 0x880 - 0x540);
334                 relax_mapped_gpu(shader);
335                 relax_mapped_gpu(i_shader);
336
337                 /* Trace attribute based on metadata */
338                 uint64_t s = v->attribute_meta;
339
340                 for(;;) {
341                         attribute_meta_t *attr_meta = fetch_mapped_gpu(s, sizeof(attribute_meta_t));
342
343                         if(!HAS_ATTRIBUTE(*attr_meta)) break;
344
345                         panwrap_log("Attribute %llX (flags %llX)\n",
346                                         ATTRIBUTE_NO(*attr_meta),
347                                         ATTRIBUTE_FLAGS(*attr_meta));
348
349                         chai_trace_attribute(v->attributes + ATTRIBUTE_NO(*attr_meta) * sizeof(struct attribute_buffer));
350
351                         s += sizeof(attribute_meta_t);
352
353                         relax_mapped_gpu(attr_meta);
354                 }
355
356                 if (h->job_type == JOB_TYPE_TILER)
357                         panwrap_log("Drawing in %s\n", chai_gl_mode_name(((uint8_t *) v->block1)[8]));
358
359                 assert_gpu_zeroes(v->zeroes, 64);
360
361                 if (v->null1 | v->null2 | v->null4)
362                         panwrap_log("Null tripped?\n");
363
364                 panwrap_log("%cFBD\n", v->fbd & FBD_TYPE ? 'M' : 'S');
365                 chai_trace_fbd(v->fbd);
366
367                 formatted_hex_dump("Block 1", (uint8_t *) v->block1, sizeof(v->block1));
368
369                 for (int addr = 0; addr < 14; ++addr) {
370                         uint32_t address = ((uint32_t *) &(v->zeroes))[addr];
371                         uint8_t *buf;
372                         size_t sz = 64;
373
374                         /* Structure known. Skip hex dump */
375                         if (addr == 2) continue;
376                         if (addr == 3) continue;
377                         if (addr == 6) continue;
378                         if (addr == 10 && h->job_type == JOB_TYPE_VERTEX) continue;
379                         if (addr == 11) continue;
380                         if (addr == 12) continue;
381
382                         /* Size known exactly but not structure; cull */
383                         if (addr == 0) sz = 0x100;
384                         if (addr == 1) sz = 0x10;
385                         if (addr == 4) sz = 0x40;
386                         if (addr == 5) sz = 0x20;
387                         if (addr == 7) sz = 0x20;
388                         if (addr == 8) sz = 0x20;
389
390                         panwrap_log("Addr %d %X\n", addr, address);
391
392                         if (!address) continue;
393
394                         buf = fetch_mapped_gpu(address, sz);
395
396                         formatted_hex_dump("B", buf, sz);
397
398                         if (addr == 8) {
399                                 uint32_t sub = *((uint32_t *) buf) & 0xFFFFFFFE;
400                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
401
402                                 panwrap_log("---\n");
403                                 formatted_hex_dump("S", sbuf, 64);
404                                 relax_mapped_gpu(sbuf);
405                         }
406
407                         if (addr == 1) {
408                                 uint64_t sub = *((uint64_t*) buf) >> 8;
409                                 uint8_t *sbuf = fetch_mapped_gpu(sub, 64);
410
411                                 panwrap_log("--- %llX\n", sub);
412                                 formatted_hex_dump("S", sbuf, 64);
413                                 relax_mapped_gpu(sbuf);
414                         }
415
416                         relax_mapped_gpu(buf);
417                 }
418
419                 formatted_hex_dump("Block 2", (uint8_t *) v->block2, sizeof(v->block2));
420
421                 relax_mapped_gpu(v);
422                 break;
423         }
424
425         case JOB_TYPE_FRAGMENT: {
426                 struct payload_fragment *f;
427
428                 f = fetch_mapped_gpu(payload, sizeof(*f));
429
430                 /* Bit 31 of max_tile_coord clear on the first frame.
431                  * Set after.
432                  * TODO: Research.
433                  */
434
435                 panwrap_log("frag %X %X (%d, %d) -> (%d, %d), fbd type %cFBD at %llX (%llX) \n",
436                                 f->min_tile_coord, f->max_tile_coord,
437                                 TILE_COORD_X(f->min_tile_coord),
438                                 TILE_COORD_Y(f->min_tile_coord),
439                                 TILE_COORD_X(f->max_tile_coord),
440                                 TILE_COORD_Y(f->max_tile_coord),
441                                 f->fragment_fbd & FBD_TYPE ? 'M' : 'S',
442                                 f->fragment_fbd,
443                                 f->fragment_fbd & FBD_POINTER_MASK);
444
445                 chai_trace_fbd(f->fragment_fbd);
446
447                 relax_mapped_gpu(f);
448                 break;
449         }
450
451         default: {
452                 panwrap_log("Dumping payload %llX for job type %s\n",
453                                 payload,
454                                 chai_job_type_name(h->job_type));
455
456                 gen_pay = fetch_mapped_gpu(payload, 256);
457                 formatted_hex_dump("pl", gen_pay, 256);
458                 relax_mapped_gpu(gen_pay);
459         }
460         }
461
462         next = h->job_descriptor_size
463                 ? h->next_job._64
464                 : h->next_job._32;
465
466         relax_mapped_gpu(h);
467
468         /* Traverse the job chain */
469         if (next)
470                 chai_trace_hw_chain(next);
471 }
472
473 static void chai_trace_atom(const struct mali_jd_atom_v2 *v)
474 {
475         if (v->core_req & MALI_JD_REQ_SOFT_JOB) {
476                 if (v->core_req & MALI_JD_REQ_SOFT_REPLAY) {
477                         struct mali_jd_replay_payload *payload;
478
479                         payload = (struct mali_jd_replay_payload *)
480                                 fetch_mapped_gpu(v->jc, sizeof(*payload));
481
482                         panwrap_log("tiler_jc_list = %llX, fragment_jc = %llX, \nt "
483                                 "tiler_heap_free = %llX, fragment hierarchy mask = %hX, "
484                                 "tiler hierachy mask = %hX, hierarchy def weight %X, "
485                                 "tiler core_req = %X, fragment core_req = %X",
486                                 payload->tiler_jc_list,
487                                 payload->fragment_jc,
488                                 payload->tiler_heap_free,
489                                 payload->fragment_hierarchy_mask,
490                                 payload->tiler_hierarchy_mask,
491                                 payload->hierarchy_default_weight,
492                                 payload->tiler_core_req,
493                                 payload->fragment_core_req);
494
495                         relax_mapped_gpu(payload);
496                 } else  {
497                         /* TODO: Soft job decoding */
498                         panwrap_log("Unknown soft job\n");
499                 }
500         } else {
501                 chai_trace_hw_chain(v->jc);
502         }
503 }