GNU Linux-libre 6.7.9-gnu
[releases.git] / drivers / media / platform / verisilicon / hantro_h264.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip RK3288 VPU codec driver
4  *
5  * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6  *      Hertz Wong <hertz.wong@rock-chips.com>
7  *      Herman Chen <herman.chen@rock-chips.com>
8  *
9  * Copyright (C) 2014 Google, Inc.
10  *      Tomasz Figa <tfiga@chromium.org>
11  */
12
13 #include <linux/types.h>
14 #include <media/v4l2-h264.h>
15 #include <media/v4l2-mem2mem.h>
16
17 #include "hantro.h"
18 #include "hantro_hw.h"
19
20 /* Size with u32 units. */
21 #define CABAC_INIT_BUFFER_SIZE          (460 * 2)
22 #define POC_BUFFER_SIZE                 34
23 #define SCALING_LIST_SIZE               (6 * 16 + 2 * 64)
24
25 /*
26  * For valid and long term reference marking, index are reversed, so bit 31
27  * indicates the status of the picture 0.
28  */
29 #define REF_BIT(i)                      BIT(32 - 1 - (i))
30
31 /* Data structure describing auxiliary buffer format. */
32 struct hantro_h264_dec_priv_tbl {
33         u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
34         u32 poc[POC_BUFFER_SIZE];
35         u8 scaling_list[SCALING_LIST_SIZE];
36 };
37
38 /*
39  * Constant CABAC table.
40  * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
41  * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
42  * chromeos-3.14 branch.
43  */
44 static const u32 h264_cabac_table[] = {
45         0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
46         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
47         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
48         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
49         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
50         0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
51         0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
52         0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
53         0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
54         0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
55         0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
56         0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
57         0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
58         0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
59         0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
60         0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
61         0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
62         0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
63         0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
64         0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
65         0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
66         0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
67         0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
68         0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
69         0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
70         0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
71         0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
72         0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
73         0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
74         0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
75         0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
76         0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
77         0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
78         0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
79         0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
80         0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
81         0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
82         0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
83         0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
84         0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
85         0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
86         0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
87         0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
88         0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
89         0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
90         0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
91         0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
92         0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
93         0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
94         0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
95         0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
96         0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
97         0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
98         0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
99         0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
100         0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
101         0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
102         0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
103         0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
104         0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
105         0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
106         0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
107         0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
108         0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
109         0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
110         0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
111         0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
112         0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
113         0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
114         0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
115         0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
116         0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
117         0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
118         0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
119         0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
120         0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
121         0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
122         0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
123         0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
124         0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
125         0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
126         0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
127         0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
128         0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
129         0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
130         0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
131         0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
132         0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
133         0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
134         0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
135         0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
136         0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
137         0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
138         0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
139         0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
140         0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
141         0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
142         0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
143         0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
144         0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
145         0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
146         0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
147         0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
148         0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
149         0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
150         0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
151         0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
152         0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
153         0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
154         0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
155         0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
156         0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
157         0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
158         0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
159         0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
160         0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
161         0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
162         0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
163         0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
164         0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
165         0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
166         0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
167         0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
168         0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
169         0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
170         0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
171         0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
172         0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
173         0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
174         0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
175         0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
176         0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
177         0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
178         0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
179         0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
180         0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
181         0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
182         0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
183         0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
184         0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
185         0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
186         0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
187         0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
188         0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
189         0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
190         0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
191         0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
192         0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
193         0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
194         0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
195         0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
196         0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
197         0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
198         0x1f0c2517, 0x1f261440
199 };
200
201 static void
202 assemble_scaling_list(struct hantro_ctx *ctx)
203 {
204         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
205         const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
206         const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
207         const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
208         const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
209         const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
210         struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
211         u32 *dst = (u32 *)tbl->scaling_list;
212         const u32 *src;
213         int i, j;
214
215         if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
216                 return;
217
218         for (i = 0; i < num_list_4x4; i++) {
219                 src = (u32 *)&scaling->scaling_list_4x4[i];
220                 for (j = 0; j < list_len_4x4 / 4; j++)
221                         *dst++ = swab32(src[j]);
222         }
223
224         /* Only Intra/Inter Y lists */
225         for (i = 0; i < 2; i++) {
226                 src = (u32 *)&scaling->scaling_list_8x8[i];
227                 for (j = 0; j < list_len_8x8 / 4; j++)
228                         *dst++ = swab32(src[j]);
229         }
230 }
231
232 static void prepare_table(struct hantro_ctx *ctx)
233 {
234         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
235         const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
236         const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
237         struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
238         const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
239         u32 dpb_longterm = 0;
240         u32 dpb_valid = 0;
241         int i;
242
243         for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
244                 tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
245                 tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
246
247                 if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
248                         continue;
249
250                 /*
251                  * Set up bit maps of valid and long term DPBs.
252                  * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame
253                  * decoding, bit 31 to 15 are used, while for field decoding,
254                  * all bits are used, with bit 31 being a top field, 30 a bottom
255                  * field and so on.
256                  */
257                 if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) {
258                         if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF)
259                                 dpb_valid |= REF_BIT(i * 2);
260
261                         if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)
262                                 dpb_valid |= REF_BIT(i * 2 + 1);
263
264                         if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) {
265                                 dpb_longterm |= REF_BIT(i * 2);
266                                 dpb_longterm |= REF_BIT(i * 2 + 1);
267                         }
268                 } else {
269                         dpb_valid |= REF_BIT(i);
270
271                         if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
272                                 dpb_longterm |= REF_BIT(i);
273                 }
274         }
275         ctx->h264_dec.dpb_valid = dpb_valid;
276         ctx->h264_dec.dpb_longterm = dpb_longterm;
277
278         if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) ||
279             !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) {
280                 tbl->poc[32] = ctx->h264_dec.cur_poc;
281                 tbl->poc[33] = 0;
282         } else {
283                 tbl->poc[32] = dec_param->top_field_order_cnt;
284                 tbl->poc[33] = dec_param->bottom_field_order_cnt;
285         }
286
287         assemble_scaling_list(ctx);
288 }
289
290 static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
291                             const struct v4l2_h264_dpb_entry *b)
292 {
293         return a->reference_ts == b->reference_ts;
294 }
295
296 static void update_dpb(struct hantro_ctx *ctx)
297 {
298         const struct v4l2_ctrl_h264_decode_params *dec_param;
299         DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
300         DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
301         unsigned int i, j;
302
303         dec_param = ctx->h264_dec.ctrls.decode;
304
305         /* Disable all entries by default. */
306         for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
307                 ctx->h264_dec.dpb[i].flags = 0;
308
309         /* Try to match new DPB entries with existing ones by their POCs. */
310         for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
311                 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
312
313                 if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
314                         continue;
315
316                 /*
317                  * To cut off some comparisons, iterate only on target DPB
318                  * entries which are not used yet.
319                  */
320                 for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
321                         struct v4l2_h264_dpb_entry *cdpb;
322
323                         cdpb = &ctx->h264_dec.dpb[j];
324                         if (!dpb_entry_match(cdpb, ndpb))
325                                 continue;
326
327                         *cdpb = *ndpb;
328                         set_bit(j, used);
329                         break;
330                 }
331
332                 if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
333                         set_bit(i, new);
334         }
335
336         /* For entries that could not be matched, use remaining free slots. */
337         for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
338                 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
339                 struct v4l2_h264_dpb_entry *cdpb;
340
341                 /*
342                  * Both arrays are of the same sizes, so there is no way
343                  * we can end up with no space in target array, unless
344                  * something is buggy.
345                  */
346                 j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
347                 if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
348                         return;
349
350                 cdpb = &ctx->h264_dec.dpb[j];
351                 *cdpb = *ndpb;
352                 set_bit(j, used);
353         }
354 }
355
356 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
357                                    unsigned int dpb_idx)
358 {
359         struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
360         dma_addr_t dma_addr = 0;
361         s32 cur_poc = ctx->h264_dec.cur_poc;
362         u32 flags;
363
364         if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
365                 dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts);
366
367         if (!dma_addr) {
368                 struct vb2_v4l2_buffer *dst_buf;
369                 struct vb2_buffer *buf;
370
371                 /*
372                  * If a DPB entry is unused or invalid, address of current
373                  * destination buffer is returned.
374                  */
375                 dst_buf = hantro_get_dst_buf(ctx);
376                 buf = &dst_buf->vb2_buf;
377                 dma_addr = hantro_get_dec_buf_addr(ctx, buf);
378         }
379
380         flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0;
381         flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) <
382                  abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ?
383                  0x1 : 0;
384
385         return dma_addr | flags;
386 }
387
388 u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
389 {
390         const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx];
391
392         if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
393                 return 0;
394         return dpb->frame_num;
395 }
396
397 /*
398  * Removes all references with the same parity as the current picture from the
399  * reference list. The remaining list will have references with the opposite
400  * parity. This is effectively a deduplication of references since each buffer
401  * stores two fields. For this reason, each buffer is found twice in the
402  * reference list.
403  *
404  * This technique has been chosen through trial and error. This simple approach
405  * resulted in the highest conformance score. Note that this method may suffer
406  * worse quality in the case an opposite reference frame has been lost. If this
407  * becomes a problem in the future, it should be possible to add a preprocessing
408  * to identify un-paired fields and avoid removing them.
409  */
410 static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b,
411                                 struct v4l2_h264_reference *reflist)
412 {
413         int write_idx = 0;
414         int i;
415
416         if (b->cur_pic_fields == V4L2_H264_FRAME_REF) {
417                 write_idx = b->num_valid;
418                 goto done;
419         }
420
421         for (i = 0; i < b->num_valid; i++) {
422                 if (!(b->cur_pic_fields == reflist[i].fields)) {
423                         reflist[write_idx++] = reflist[i];
424                         continue;
425                 }
426         }
427
428 done:
429         /* Should not happen unless we have a bug in the reflist builder. */
430         if (WARN_ON(write_idx > 16))
431                 write_idx = 16;
432
433         /* Clear the remaining, some streams fails otherwise */
434         for (; write_idx < 16; write_idx++)
435                 reflist[write_idx].index = 15;
436 }
437
438 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
439 {
440         struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
441         struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
442         struct v4l2_h264_reflist_builder reflist_builder;
443
444         hantro_start_prepare_run(ctx);
445
446         ctrls->scaling =
447                 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
448         if (WARN_ON(!ctrls->scaling))
449                 return -EINVAL;
450
451         ctrls->decode =
452                 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
453         if (WARN_ON(!ctrls->decode))
454                 return -EINVAL;
455
456         ctrls->sps =
457                 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS);
458         if (WARN_ON(!ctrls->sps))
459                 return -EINVAL;
460
461         ctrls->pps =
462                 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS);
463         if (WARN_ON(!ctrls->pps))
464                 return -EINVAL;
465
466         /* Update the DPB with new refs. */
467         update_dpb(ctx);
468
469         /* Build the P/B{0,1} ref lists. */
470         v4l2_h264_init_reflist_builder(&reflist_builder, ctrls->decode,
471                                        ctrls->sps, ctx->h264_dec.dpb);
472         h264_ctx->cur_poc = reflist_builder.cur_pic_order_count;
473
474         /* Prepare data in memory. */
475         prepare_table(ctx);
476
477         v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
478         v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
479                                     h264_ctx->reflists.b1);
480
481         /*
482          * Reduce ref lists to at most 16 entries, Hantro hardware will deduce
483          * the actual picture lists in field through the dpb_valid,
484          * dpb_longterm bitmap along with the current frame parity.
485          */
486         if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) {
487                 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.p);
488                 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b0);
489                 deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b1);
490         }
491
492         return 0;
493 }
494
495 void hantro_h264_dec_exit(struct hantro_ctx *ctx)
496 {
497         struct hantro_dev *vpu = ctx->dev;
498         struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
499         struct hantro_aux_buf *priv = &h264_dec->priv;
500
501         dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
502 }
503
504 int hantro_h264_dec_init(struct hantro_ctx *ctx)
505 {
506         struct hantro_dev *vpu = ctx->dev;
507         struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
508         struct hantro_aux_buf *priv = &h264_dec->priv;
509         struct hantro_h264_dec_priv_tbl *tbl;
510
511         priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
512                                        GFP_KERNEL);
513         if (!priv->cpu)
514                 return -ENOMEM;
515
516         priv->size = sizeof(*tbl);
517         tbl = priv->cpu;
518         memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
519
520         return 0;
521 }