Announcement

Collapse

Forum Guidelines

This forum is for finished source code that is working properly. If you have questions about this or any other source code, please post it in one of the Discussion Forums, not here.
See more
See less

cube root function

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • cube root function

    This is a fast, extended-precision cube root function which may be be useful to PB programmers who require such a thing.

    Just save the code to a file and include it in your source header.

    To invoke the function in a PB procedure use x3rdR(numeric expression).

    If you don't like the function name, use a macro to change it. I wont be offended.

    Any questions or bug reports, please message me.​

    Code:
    'x3rdR include file "FPx3rdR.inc"
    
    #IF NOT %DEF(%x3rdR_I)
    
    MACRO FUNCTION x3rdR(x) 'x may be a numeric variable or expression
        mGxfA = x
       !call FPX3rdR
    END MACRO = mGxfA
    
    FASTPROC FPX3rdR()
       !mov ax, mGxfA[8]     ;ax = sb(x) + biased exponent of x
       !test ax, ax
       !jz JZ_               ;exit if x = 0
    
       'set exponent of x to zero and sign of x to +
       !mov ecx, &h00003fff  ;cx = exponent bias
       !mov mGxfA[8], cx     ;mGxfA = ABS(Significand of x)
       !fld mGxfA            ;st(0) = S
    
       'Newton's-method constants for cube roots
       !fld tbyte ADC_xfOneThird ;st(0) = 1/3, st(1) = S
       !fmul st(1), st(0)    ;st(0) = 1/3, st(1) = S/3
       !fadd st(0), st(0)    ;st(0) = 2/3, st(1) = S/3
    
       'First approximation, A  = S/3 + 2/3 max error is 5.83% at S = 1.999...
       !fld st(0)            ;st(0) = 2/3, st(1) = 2/3, st(2) = S/3
       !fadd st(0), st(2)    ;st(0) = A, st(1) = 2/3, st(2) = S/3
    
       '1st iteration of NM
       !fld st(0)            ;st(0) = A, st(1) = A, st(2) = 2/3, st(3) = S/3
       !fmul st(0), st(2)    ;st(0) = 2*A/3, st(1) = A, st(2) = 2/3, st(3) = S/3
       !fxch st(1)           ;st(0) <-> st(1) --- st(0) = A, st(1) = 2*A/3, st(2) = 2/3, st(3) = S/3
       !fmul st(0), st(0)    ;st(0) = A^2, st(1) = 2*A/3, st(2) = 2/3, st(3) = S/3
       !fdivr st(0), st(3)   ;st(0) = S/(3*A^2), st(1) = 2*A/3, st(2) = 2/3, st(3) = S/3
         !xor ebx, ebx         ;ebx = 0
         !btr ax, 15           ;cf = sb(x): sb(ax) = 0
         !rcr bx, 1            ;sb(bx) = sb(x)
       !faddp st(1), st(0)   ;st(0) = revised approx of S^(1/3) = 2*A/3 + S/(3*A^2), st(1) = 2/3, st(2) = S/3
    
       '2nd iteration
       !fld st(0)
       !fmul st(0), st(2)
       !fxch st(1)
       !fmul st(0), st(0)
       !fdivr st(0), st(3)
         !sub ax, cx           ;ax = exponent of x in 2's complement (E)
         !movsx eax, ax        ;eax = E
         !mov edx, &H55555556  ;edx ~= (2^32)/3
       !faddp st(1), st(0)
    
       '3rd
       !fld st(0)
       !fmul st(0), st(2)
       !fxch st(1)
       !fmul st(0), st(0)
       !fdivr st(0), st(3)
         !imul edx             ;edx = INT(E/3)
       !faddp st(1), st(0)
    
       '4th
       !fld st(0)
       !fmul st(0), st(2)
       !fxch st(1)
       !fmul st(0), st(0)
       !fdivr st(0), st(3)
          !add eax, &h00010000  ;results in cf = 1 if eax = &hffff????
          !adc edx, ecx         ;edx = edx + &h3fff + carry from previous add
          !or edx, ebx          ;sb(edx) = sb(x)
          !shr eax, 24          ;see below at R01
       !faddp st(1), st(0)
    
       'discard NM constants 2/3 and S/3
       !fstp st(1)
       !fstp st(1)           ;st(0) ~= S^(1/3)
    R01:
       !jz J02               ;zero means no remainder (2^0 = 1), so mult at J01 is redundant
       !fld tbyte ptr ADC_3rdR_xf2toOneThird ;st(0) = 2^(1/3), st(1) ~= S^(1/3)
       !sub eax, &h0000055   ;for &h55 factor is 2^(1/3); for &hAA, 2^(2/3)
       !jz J01
       !fmul st(0), st(0)    ;st(0) = 2^(2/3), st(1) = S^(1/3)
    J01:
       !fmulp st(1), st(0)   ;st(0) = S^(1/3) * 2^([1|2]/3)
    J02:
       !fstp mGxfA           ;store this part of result in return variable
       !mov mGxfA[8], dx     ;exponent of answer = INT(E/3) + exponent of st(0) at J02
    JZ_: 'Exit point for x = 0
    
    END FASTPROC
    
    '*** RFT for 3rdroot
    #ALIGN 16
    ASMDATA ADC_xfOneThird
     DW &HAAAB, &HAAAA, &HAAAA, &HAAAA, &H3FFD, 0, 0, 0  '1/3
    END ASMDATA
    
    ASMDATA ADC_3rdR_xf2toOneThird
     DW &H5712, &H6B94, &H17CC, &HA145, &H3FFF  '2^(1/3)
    END ASMDATA
    
    #IF NOT %DEF(%mGVars)
    #ALIGN 16
     GLOBAL mGxfA AS EXT
     GLOBAL mGInt AS INTEGER
     GLOBAL mGLInt AS LONG
     GLOBAL mGxfEst AS EXT
    %mGVars = -1
    #ENDIF
    
    %x3rdR_I = -1
    
    #ENDIF
    
    'end x3rdR include file "FPx3rdR.inc"

  • #2

    Here is a single-precision cube-root function written for computers that support SSE2. This may be better than the above if your requirement is more toward speed than accuracy. To trade off more accuracy for speed, delete the 2nd NM iteration Two iterations almost but not quite reaches 7-digit accuracy, anyway.

    '
    Code:
    'Single precision cube root -- s3rdR()
    
    #IF NOT %DEF(%s3rdR_i)
    
    MACRO FUNCTION s3rdR(x)
       mGsfA = x
       !call FPS3rdR
    END MACRO = mGsfA
    
    FASTPROC FPS3rdR()
    
    PREFIX "!"
       mov eax, mGsfA       'alias x
       test eax, eax        'Test for x = 0 and x<0
       jz JZ_               'if x = 0, return 0
    
       movss xmm2, mGsfA
       movss xmm3, ADC_sfOneThird
    
       'first approximation
       mov ecx, &H7f000000 'ecx = (&h3f8 << 1)
       bt  eax, 31         'cf = sign bit(eax)
       rcr ecx, 1          'cf --> sb(ecx) &h7f >> 1
       sub eax, ecx        'removes sb + exponent bias
       sar eax, 10         'eax = eax/1024
       imul eax, 341       '341/1024 ~= 1/3
       add  eax, ecx       'restore sb and eb in eax
       mov mGsfA, eax
    
       movss xmm0, mGsfA  'xmm0 =  A, xmm2 = x, xmm3 = 1/3
    
       'N-R constants
       mulss xmm2, xmm3   'xmm0 =  A, xmm2 = x/3, xmm3 = 1/3
       addss xmm3, xmm3   'xmm0 =  A, xmm2 = x/3, xmm3 = 2/3
    
       '1st N-R iteration
       movss xmm4, xmm0   'xmm0 =  A, xmm2 = x/3, xmm3 = 2/3, xmm4 = A
       mulss xmm0, xmm3   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = A
       mulss xmm4, xmm4   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = A^2
       rcpss xmm4, xmm4   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = 1/A^2
       mulss xmm4, xmm2   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = x/(3*A^2)
       addss xmm0, xmm4   'xmm0 =  2*A/3 + x/(3*A^2), xmm2 = x/3, xmm3 = 2/3, xmm4 = x/(3*A^2)
    
       '2nd
       movss xmm4, xmm0
       mulss xmm0, xmm3
       mulss xmm4, xmm4
       rcpss xmm4, xmm4
       mulss xmm4, xmm2
       addss xmm0, xmm4
    
       '3rd differs slightly in that it divides x/3 by A^2 directly instead of multiplying by the reciprocal of A^2
       movss xmm4, xmm0   'xmm0 =  A, xmm2 = x/3, xmm3 = 2/3, xmm4 = A
       mulss xmm0, xmm3   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = A
       mulss xmm4, xmm4   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = A^2
       movss xmm5, xmm2   'xmm0 =  2*A/3, xmm2 = x/3, xmm3 = 2/3, xmm4 = A^2, xmm5 = x/3
       divss xmm5, xmm4   'xmm0 =  2*A/3, xmm2 = x/3 * A^2, xmm3 = 2/3, xmm4 = A^2, xmm5 = x/(3*A^2)
       addss xmm0, xmm5   'xmm0 =  2*A/3 + x/(3*A^2), xmm2 = x/3, xmm3 = 2/3, xmm4 = A^2, xmm5 = x/(3*A^2)
    
       movss mGsfA, xmm0
    JZ_:
    
    END PREFIX
    
    END FASTPROC
    
    ASMDATA ADC_sfOneThird  '1/3
     DW &HAAAB, &H3EAA
    END ASMDATA
    
    #IF NOT %DEF(%mGsVars)
     GLOBAL mGsfA, mGsfEst AS SINGLE
     %mGsVars = -1
    #ENDIF
    
    %s3rdR_i = -1
    
    #ENDIF
    
    'end fps3rdR.inc


    Here's another SP version for Windows computers that don't support SSE2.

    Hello?

    '
    Code:
    's3rdRC include file "FPs3rdRC.inc"
    
    #IF NOT %DEF(%s3rdRC_I)
    
    MACRO FUNCTION s3rdRC(x) 'x may be a numeric variable or expression
        mGsfA = x
       !call FPs3rdRC
    END MACRO = mGsfA
    
    FASTPROC FPs3rdRC()
       !mov eax, mGsfA       ;eax = x
       !test eax, eax
       !jz JZ_               ;exit if x = 0
    
       !push esi
       !push edi
    
       'build significand including an explicit 1 before binary fraction
       !shl eax, 8           ;left justify binary fraction bits
       !bts eax, 31          ;put a 1 before the binary point (b.p)
    
       'Newton's-method constants for cube roots
       !mov edx, &h2aaaaaab  ;edx = 1/3
       !mov ecx, edx         ;ecx = 1/3
       !mul edx              ;S * 1/3
       !mov ebx, edx         ;ebx = S/3
       !shr ebx, 2           ;S/3 pre-aligned
    
       'First approximation
       !add edx, ecx         ;edx = A
       !mov eax, edx
       !shl ecx, 2           ;2/3 pre-aligned
       !cmp eax, &h3000000  ;1.25
       !jb J01               ;
       !sub eax, &h03333340  ;0.05 - crude attempt to reduce error in 1st estimate when S > 1.8
    j01:
    
       '1st iteration of Newton method
       !mov edi, eax         ;eax = A, ebx = S/3, ecx = 2/3, edx = ??, edi = A
       !mul ecx              ;eax = A, ebx = S/3, ecx = 2/3, edx = 2*A/3, edi = A
       !mov esi, edx         ;eax = ??, ebx = S/3, ecx = 2/3, edx = 2*A/3, edi = A, esi = 2*A/3
       !mov eax, edi         ;eax = A, ebx = S/3, ecx = 2/3, edx = 2*A/3, edi = A, esi = 2*A/3
       !mul edi              ;eax = ??, ebx = S/3, ecx = 2/3, edx = A^2, edi = A, esi = 2*A/3
       !mov edi, edx         ;eax = ??, ebx = S/3, ecx = 2/3, edx = A^2>>2, edi = A^2>>2, esi = 2*A/3
       !shl edi, 2           ;eax = ??, ebx = S/3, ecx = 2/3, edx = A^2>>2, edi = A^2, esi = 2*A/3
       !mov edx, ebx         ;eax = ??, ebx = S/3, ecx = 2/3, edx = S/3, edi = A^2, esi = 2*A/3
       !div edi              ;eax = S/3*A^2, ebx = S/3, ecx = 2/3, edx = ??, edi = A^2, esi = 2*A/3
       !add eax, esi         ;eax = S/3*A^2 + 2*A/3, ebx = S/3, ecx = 2/3
    
       '2nd iteration of NM
       !mov edi, eax
       !mul ecx
       !mov esi, edx
       !mov eax, edi
       !mul edi
       !mov edi, edx
       !shl edi, 2
       !mov edx, ebx
       !div edi
       !add eax, esi
    
       '3rd iteration
       !mov edi, eax
       !mul ecx
       !mov esi, edx
       !mov eax, edi
       !mul edi
       !mov edi, edx
       !shl edi, 2
       !mov edx, ebx
       !div edi
       !add esi, eax         ;operands here are reversed for last iteration
    
       'divide exponent of x by 3 and multiply the 3rd root of the significand by 2^(1/r),
       'where r is the integer remainder of the exponent division, then set the sign bit of the
       'answer to the same as that of x. Finally, assemble the result as a single float.
       !mov eax, mGsfA        ;to extract exponent and sign
       !xor ebx, ebx
       !btr eax, 31
       !rcr ebx, 1            ;msb of ebx is sign bit of x
       !shr eax, 23           ;right justify exponent field
       !mov edx, &h55555556   ;2^32/3
       !sub eax, &h7f         ;exponent bias for single floats
       !imul edx
       !add eax, &h00010000  ;results in cf = 1 if eax = &hffff????
       !adc edx, &h7f         ;edx = edx + &h7f + carry from previous add
       !mov ecx, edx         ;uhg
       !shr eax, 24          ;isolate the MS byte of remainder
       !mov edx, esi
       !jz J04               ;if z, no mult req'd
       !sub eax, &h0000055   ; z --> 2^(1/3), nz -->2^(2/3)
       !jnz J02
       !mov eax, &h50a28be6  ;2^(1/3)
       !jmp J03
    J02:
       !mov eax, &h6597fa95  ;2^(2/3)
    J03:
       !mul edx
       !shl edx, 2        ;re-align to b.p
    J04:
       !btr edx, 30       ;remove the explicit 1 before the b.p
       !shl ecx, 23       ;
       !shr edx, 7        ;align significand for single float result
       !adc edx, ecx      ;ecx = int(E/3)<<23. also rounds significand according to the state of cf after previous right shifts
       !or  edx, ebx      ;sign bit of x --> sign of answer
       !mov mGsfA, edx    ;store result
    
       !pop edi
       !pop esi
    JZ_: 'Exit point for x = 0
    
    END FASTPROC
    
    #IF NOT %DEF(%mGsVars)
     GLOBAL mGsfA, mGsfEst AS SINGLE
     GLOBAL mGSInt AS DWORD
     %mGsVars = -1
    #ENDIF
    
    %s3rdRC_I = -1
    
    #ENDIF
    
    'end s3rdRC include file "FPs3rdRC.inc"

    Comment

    Working...
    X