73342c925a4b6efb900885e849fa797fdc63782e
[releases.git] / unicode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/hfsplus/unicode.c
4  *
5  * Copyright (C) 2001
6  * Brad Boyer (flar@allandria.com)
7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
8  *
9  * Handler routines for unicode strings
10  */
11
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16 case_fold(u16 c)
20 {
21         u16 tmp;
22
23         tmp = hfsplus_case_fold_table[c >> 8];
24         if (tmp)
25                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26         else
27                 tmp = c;
28         return tmp;
29 }
30
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33                        const struct hfsplus_unistr *s2)
34 {
35         u16 len1, len2, c1, c2;
36         const hfsplus_unichr *p1, *p2;
37
38         len1 = be16_to_cpu(s1->length);
39         len2 = be16_to_cpu(s2->length);
40         p1 = s1->unicode;
41         p2 = s2->unicode;
42
43         while (1) {
44                 c1 = c2 = 0;
45
46                 while (len1 && !c1) {
47                         c1 = case_fold(be16_to_cpu(*p1));
48                         p1++;
49                         len1--;
50                 }
51                 while (len2 && !c2) {
52                         c2 = case_fold(be16_to_cpu(*p2));
53                         p2++;
54                         len2--;
55                 }
56
57                 if (c1 != c2)
58                         return (c1 < c2) ? -1 : 1;
59                 if (!c1 && !c2)
60                         return 0;
61         }
62 }
63
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66                    const struct hfsplus_unistr *s2)
67 {
68         u16 len1, len2, c1, c2;
69         const hfsplus_unichr *p1, *p2;
70         int len;
71
72         len1 = be16_to_cpu(s1->length);
73         len2 = be16_to_cpu(s2->length);
74         p1 = s1->unicode;
75         p2 = s2->unicode;
76
77         for (len = min(len1, len2); len > 0; len--) {
78                 c1 = be16_to_cpu(*p1);
79                 c2 = be16_to_cpu(*p2);
80                 if (c1 != c2)
81                         return c1 < c2 ? -1 : 1;
82                 p1++;
83                 p2++;
84         }
85
86         return len1 < len2 ? -1 :
87                len1 > len2 ? 1 : 0;
88 }
89
90
91 #define Hangul_SBase    0xac00
92 #define Hangul_LBase    0x1100
93 #define Hangul_VBase    0x1161
94 #define Hangul_TBase    0x11a7
95 #define Hangul_SCount   11172
96 #define Hangul_LCount   19
97 #define Hangul_VCount   21
98 #define Hangul_TCount   28
99 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
100
101
102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103 {
104         int i, s, e;
105
106         s = 1;
107         e = p[1];
108         if (!e || cc < p[s * 2] || cc > p[e * 2])
109                 return NULL;
110         do {
111                 i = (s + e) / 2;
112                 if (cc > p[i * 2])
113                         s = i + 1;
114                 else if (cc < p[i * 2])
115                         e = i - 1;
116                 else
117                         return hfsplus_compose_table + p[i * 2 + 1];
118         } while (s <= e);
119         return NULL;
120 }
121
122 int hfsplus_uni2asc(struct super_block *sb,
123                 const struct hfsplus_unistr *ustr,
124                 char *astr, int *len_p)
125 {
126         const hfsplus_unichr *ip;
127         struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128         u8 *op;
129         u16 cc, c0, c1;
130         u16 *ce1, *ce2;
131         int i, len, ustrlen, res, compose;
132
133         op = astr;
134         ip = ustr->unicode;
135         ustrlen = be16_to_cpu(ustr->length);
136         len = *len_p;
137         ce1 = NULL;
138         compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
139
140         while (ustrlen > 0) {
141                 c0 = be16_to_cpu(*ip++);
142                 ustrlen--;
143                 /* search for single decomposed char */
144                 if (likely(compose))
145                         ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
146                 if (ce1)
147                         cc = ce1[0];
148                 else
149                         cc = 0;
150                 if (cc) {
151                         /* start of a possibly decomposed Hangul char */
152                         if (cc != 0xffff)
153                                 goto done;
154                         if (!ustrlen)
155                                 goto same;
156                         c1 = be16_to_cpu(*ip) - Hangul_VBase;
157                         if (c1 < Hangul_VCount) {
158                                 /* compose the Hangul char */
159                                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
160                                 cc = (cc + c1) * Hangul_TCount;
161                                 cc += Hangul_SBase;
162                                 ip++;
163                                 ustrlen--;
164                                 if (!ustrlen)
165                                         goto done;
166                                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
167                                 if (c1 > 0 && c1 < Hangul_TCount) {
168                                         cc += c1;
169                                         ip++;
170                                         ustrlen--;
171                                 }
172                                 goto done;
173                         }
174                 }
175                 while (1) {
176                         /* main loop for common case of not composed chars */
177                         if (!ustrlen)
178                                 goto same;
179                         c1 = be16_to_cpu(*ip);
180                         if (likely(compose))
181                                 ce1 = hfsplus_compose_lookup(
182                                         hfsplus_compose_table, c1);
183                         if (ce1)
184                                 break;
185                         switch (c0) {
186                         case 0:
187                                 c0 = 0x2400;
188                                 break;
189                         case '/':
190                                 c0 = ':';
191                                 break;
192                         }
193                         res = nls->uni2char(c0, op, len);
194                         if (res < 0) {
195                                 if (res == -ENAMETOOLONG)
196                                         goto out;
197                                 *op = '?';
198                                 res = 1;
199                         }
200                         op += res;
201                         len -= res;
202                         c0 = c1;
203                         ip++;
204                         ustrlen--;
205                 }
206                 ce2 = hfsplus_compose_lookup(ce1, c0);
207                 if (ce2) {
208                         i = 1;
209                         while (i < ustrlen) {
210                                 ce1 = hfsplus_compose_lookup(ce2,
211                                         be16_to_cpu(ip[i]));
212                                 if (!ce1)
213                                         break;
214                                 i++;
215                                 ce2 = ce1;
216                         }
217                         cc = ce2[0];
218                         if (cc) {
219                                 ip += i;
220                                 ustrlen -= i;
221                                 goto done;
222                         }
223                 }
224 same:
225                 switch (c0) {
226                 case 0:
227                         cc = 0x2400;
228                         break;
229                 case '/':
230                         cc = ':';
231                         break;
232                 default:
233                         cc = c0;
234                 }
235 done:
236                 res = nls->uni2char(cc, op, len);
237                 if (res < 0) {
238                         if (res == -ENAMETOOLONG)
239                                 goto out;
240                         *op = '?';
241                         res = 1;
242                 }
243                 op += res;
244                 len -= res;
245         }
246         res = 0;
247 out:
248         *len_p = (char *)op - astr;
249         return res;
250 }
251
252 /*
253  * Convert one or more ASCII characters into a single unicode character.
254  * Returns the number of ASCII characters corresponding to the unicode char.
255  */
256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
257                               wchar_t *uc)
258 {
259         int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
260         if (size <= 0) {
261                 *uc = '?';
262                 size = 1;
263         }
264         switch (*uc) {
265         case 0x2400:
266                 *uc = 0;
267                 break;
268         case ':':
269                 *uc = '/';
270                 break;
271         }
272         return size;
273 }
274
275 /* Decomposes a non-Hangul unicode character. */
276 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
277 {
278         int off;
279
280         off = hfsplus_decompose_table[(uc >> 12) & 0xf];
281         if (off == 0 || off == 0xffff)
282                 return NULL;
283
284         off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
285         if (!off)
286                 return NULL;
287
288         off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
289         if (!off)
290                 return NULL;
291
292         off = hfsplus_decompose_table[off + (uc & 0xf)];
293         *size = off & 3;
294         if (*size == 0)
295                 return NULL;
296         return hfsplus_decompose_table + (off / 4);
297 }
298
299 /*
300  * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
301  * precomposed Hangul, otherwise return the length of the decomposition.
302  *
303  * This function was adapted from sample code from the Unicode Standard
304  * Annex #15: Unicode Normalization Forms, version 3.2.0.
305  *
306  * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
307  * under the Terms of Use in http://www.unicode.org/copyright.html.
308  */
309 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
310 {
311         int index;
312         int l, v, t;
313
314         index = uc - Hangul_SBase;
315         if (index < 0 || index >= Hangul_SCount)
316                 return 0;
317
318         l = Hangul_LBase + index / Hangul_NCount;
319         v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
320         t = Hangul_TBase + index % Hangul_TCount;
321
322         result[0] = l;
323         result[1] = v;
324         if (t != Hangul_TBase) {
325                 result[2] = t;
326                 return 3;
327         }
328         return 2;
329 }
330
331 /* Decomposes a single unicode character. */
332 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
333 {
334         u16 *result;
335
336         /* Hangul is handled separately */
337         result = hangul_buffer;
338         *size = hfsplus_try_decompose_hangul(uc, result);
339         if (*size == 0)
340                 result = hfsplus_decompose_nonhangul(uc, size);
341         return result;
342 }
343
344 int hfsplus_asc2uni(struct super_block *sb,
345                     struct hfsplus_unistr *ustr, int max_unistr_len,
346                     const char *astr, int len)
347 {
348         int size, dsize, decompose;
349         u16 *dstr, outlen = 0;
350         wchar_t c;
351         u16 dhangul[3];
352
353         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
354         while (outlen < max_unistr_len && len > 0) {
355                 size = asc2unichar(sb, astr, len, &c);
356
357                 if (decompose)
358                         dstr = decompose_unichar(c, &dsize, dhangul);
359                 else
360                         dstr = NULL;
361                 if (dstr) {
362                         if (outlen + dsize > max_unistr_len)
363                                 break;
364                         do {
365                                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
366                         } while (--dsize > 0);
367                 } else
368                         ustr->unicode[outlen++] = cpu_to_be16(c);
369
370                 astr += size;
371                 len -= size;
372         }
373         ustr->length = cpu_to_be16(outlen);
374         if (len > 0)
375                 return -ENAMETOOLONG;
376         return 0;
377 }
378
379 /*
380  * Hash a string to an integer as appropriate for the HFS+ filesystem.
381  * Composed unicode characters are decomposed and case-folding is performed
382  * if the appropriate bits are (un)set on the superblock.
383  */
384 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
385 {
386         struct super_block *sb = dentry->d_sb;
387         const char *astr;
388         const u16 *dstr;
389         int casefold, decompose, size, len;
390         unsigned long hash;
391         wchar_t c;
392         u16 c2;
393         u16 dhangul[3];
394
395         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
396         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
397         hash = init_name_hash(dentry);
398         astr = str->name;
399         len = str->len;
400         while (len > 0) {
401                 int dsize;
402                 size = asc2unichar(sb, astr, len, &c);
403                 astr += size;
404                 len -= size;
405
406                 if (decompose)
407                         dstr = decompose_unichar(c, &dsize, dhangul);
408                 else
409                         dstr = NULL;
410                 if (dstr) {
411                         do {
412                                 c2 = *dstr++;
413                                 if (casefold)
414                                         c2 = case_fold(c2);
415                                 if (!casefold || c2)
416                                         hash = partial_name_hash(c2, hash);
417                         } while (--dsize > 0);
418                 } else {
419                         c2 = c;
420                         if (casefold)
421                                 c2 = case_fold(c2);
422                         if (!casefold || c2)
423                                 hash = partial_name_hash(c2, hash);
424                 }
425         }
426         str->hash = end_name_hash(hash);
427
428         return 0;
429 }
430
431 /*
432  * Compare strings with HFS+ filename ordering.
433  * Composed unicode characters are decomposed and case-folding is performed
434  * if the appropriate bits are (un)set on the superblock.
435  */
436 int hfsplus_compare_dentry(const struct dentry *dentry,
437                 unsigned int len, const char *str, const struct qstr *name)
438 {
439         struct super_block *sb = dentry->d_sb;
440         int casefold, decompose, size;
441         int dsize1, dsize2, len1, len2;
442         const u16 *dstr1, *dstr2;
443         const char *astr1, *astr2;
444         u16 c1, c2;
445         wchar_t c;
446         u16 dhangul_1[3], dhangul_2[3];
447
448         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
449         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
450         astr1 = str;
451         len1 = len;
452         astr2 = name->name;
453         len2 = name->len;
454         dsize1 = dsize2 = 0;
455         dstr1 = dstr2 = NULL;
456
457         while (len1 > 0 && len2 > 0) {
458                 if (!dsize1) {
459                         size = asc2unichar(sb, astr1, len1, &c);
460                         astr1 += size;
461                         len1 -= size;
462
463                         if (decompose)
464                                 dstr1 = decompose_unichar(c, &dsize1,
465                                                           dhangul_1);
466                         if (!decompose || !dstr1) {
467                                 c1 = c;
468                                 dstr1 = &c1;
469                                 dsize1 = 1;
470                         }
471                 }
472
473                 if (!dsize2) {
474                         size = asc2unichar(sb, astr2, len2, &c);
475                         astr2 += size;
476                         len2 -= size;
477
478                         if (decompose)
479                                 dstr2 = decompose_unichar(c, &dsize2,
480                                                           dhangul_2);
481                         if (!decompose || !dstr2) {
482                                 c2 = c;
483                                 dstr2 = &c2;
484                                 dsize2 = 1;
485                         }
486                 }
487
488                 c1 = *dstr1;
489                 c2 = *dstr2;
490                 if (casefold) {
491                         c1 = case_fold(c1);
492                         if (!c1) {
493                                 dstr1++;
494                                 dsize1--;
495                                 continue;
496                         }
497                         c2 = case_fold(c2);
498                         if (!c2) {
499                                 dstr2++;
500                                 dsize2--;
501                                 continue;
502                         }
503                 }
504                 if (c1 < c2)
505                         return -1;
506                 else if (c1 > c2)
507                         return 1;
508
509                 dstr1++;
510                 dsize1--;
511                 dstr2++;
512                 dsize2--;
513         }
514
515         if (len1 < len2)
516                 return -1;
517         if (len1 > len2)
518                 return 1;
519         return 0;
520 }