arch/c6x/lib/divremu.S

   1 ;; SPDX-License-Identifier: GPL-2.0-or-later
   2 ;;  Copyright 2011  Free Software Foundation, Inc.
   3 ;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
   4 ;;
   5
   6 #include <linux/linkage.h>
   7
   8         .text
   9 ENTRY(__c6xabi_divremu)
  10         ;; We use a series of up to 31 subc instructions.  First, we find
  11         ;; out how many leading zero bits there are in the divisor.  This
  12         ;; gives us both a shift count for aligning (shifting) the divisor
  13         ;; to the, and the number of times we have to execute subc.
  14
  15         ;; At the end, we have both the remainder and most of the quotient
  16         ;; in A4.  The top bit of the quotient is computed first and is
  17         ;; placed in A2.
  18
  19         ;; Return immediately if the dividend is zero.  Setting B4 to 1
  20         ;; is a trick to allow us to leave the following insns in the jump
  21         ;; delay slot without affecting the result.
  22         mv      .s2x    A4, B1
  23
  24   [b1]  lmbd    .l2     1, B4, B1
  25 ||[!b1] b       .s2     B3      ; RETURN A
  26 ||[!b1] mvk     .d2     1, B4
  27
  28 ||[!b1] zero    .s1     A5
  29         mv      .l1x    B1, A6
  30 ||      shl     .s2     B4, B1, B4
  31
  32         ;; The loop performs a maximum of 28 steps, so we do the
  33         ;; first 3 here.
  34         cmpltu  .l1x    A4, B4, A2
  35   [!A2] sub     .l1x    A4, B4, A4
  36 ||      shru    .s2     B4, 1, B4
  37 ||      xor     .s1     1, A2, A2
  38
  39         shl     .s1     A2, 31, A2
  40 || [b1] subc    .l1x    A4,B4,A4
  41 || [b1] add     .s2     -1, B1, B1
  42    [b1] subc    .l1x    A4,B4,A4
  43 || [b1] add     .s2     -1, B1, B1
  44
  45         ;; RETURN A may happen here (note: must happen before the next branch)
  46 __divremu0:
  47         cmpgt   .l2     B1, 7, B0
  48 || [b1] subc    .l1x    A4,B4,A4
  49 || [b1] add     .s2     -1, B1, B1
  50    [b1] subc    .l1x    A4,B4,A4
  51 || [b1] add     .s2     -1, B1, B1
  52 || [b0] b       .s1     __divremu0
  53    [b1] subc    .l1x    A4,B4,A4
  54 || [b1] add     .s2     -1, B1, B1
  55    [b1] subc    .l1x    A4,B4,A4
  56 || [b1] add     .s2     -1, B1, B1
  57    [b1] subc    .l1x    A4,B4,A4
  58 || [b1] add     .s2     -1, B1, B1
  59    [b1] subc    .l1x    A4,B4,A4
  60 || [b1] add     .s2     -1, B1, B1
  61    [b1] subc    .l1x    A4,B4,A4
  62 || [b1] add     .s2     -1, B1, B1
  63         ;; loop backwards branch happens here
  64
  65         ret     .s2     B3
  66 ||      mvk     .s1     32, A1
  67         sub     .l1     A1, A6, A6
  68 ||      extu    .s1     A4, A6, A5
  69         shl     .s1     A4, A6, A4
  70         shru    .s1     A4, 1, A4
  71 ||      sub     .l1     A6, 1, A6
  72         or      .l1     A2, A4, A4
  73         shru    .s1     A4, A6, A4
  74         nop
  75 ENDPROC(__c6xabi_divremu)