Announcement

Collapse
No announcement yet.

Searching with mismatches

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • Paul Dixon
    replied
    I just acquired a new PC and needed something to test it on so I quickly dabbled with this problem again.

    Earlier in this thread I said I tried XMM registers but they weren't too successful and that multicore CPUs should save time.

    I now have a PC with a multicore CPU and which runs SSE instructions so I reworked the code in post 118 of this thread to take advantage of both.
    The code in post 118 ran on an Athlon XP 2600+ in 205ms for each mismatch target, so for all mismatch targets from 0 to 10 it takes about 2.255 seconds.
    The reworked code below runs on a Phenom II quad core 3GHz CPU in about 210ms but does all mismatch targets from 0 to 10 simultaneously allowing all available CPU cores to be used. That's over 10 times as fast again as "the fastest so far".

    The code is posted below.
    It uses XMM registers for parallel comparisons.
    It does 2 blocks of comparisons at a time because that saves a little on loop overhead.
    It threads the tests so each mismatch target is run in its own thread allowing it to take advantage of multi core CPUs.

    Paul.
    Code:
    'PBCC5 program
    #COMPILE EXE
    #REGISTER NONE
    DEFLNG a-z
    
    #INCLUDE "win32api.inc"
    
    
    TYPE ThreadToken
        search_length   AS LONG
        query_length    AS LONG
        match_length    AS LONG
        mismatch_target AS LONG
        querystring     AS STRING PTR
        searchstring    AS STRING PTR
        TotalMatches    AS LONG
    END TYPE
    
    
    TYPE dquad
        l AS QUAD
        h AS QUAD
    END TYPE
    
    
    
    FUNCTION PBMAIN () AS LONG
    
    LOCAL Token() AS ThreadToken
    LOCAL hThreads() AS LONG
    
    DIM hThreads(0 TO 20)
    DIM Token(0 TO 20)
    LOCAL t1,t2 AS EXT
    LOCAL freq, count0, count1 AS QUAD
    
    
    QueryPerformanceFrequency freq   'Get timer frequency.
                   
    
        REM User defined run parameters
    '   -------------------------------------------------------------------------------------------------
        search_length = 6000: query_length = 5000: match_length = 22: mismatch_target =5
    '   -------------------------------------------------------------------------------------------------
    
        REM Set up sequences and variables, create random search and query sequences
    '   -------------------------------------------------------------------------------------------------
    
        LOCAL searchPtr, queryPtr, a_Ptr, b_Ptr AS BYTE PTR
    
        matches_needed = match_length - mismatch_target
    
        RANDOMIZE 1
    
        query$ = SPACE$(query_length)
        FOR a = 1 TO query_length: MID$(query$, a, 1) = MID$("1248", RND(1, 4), 1): NEXT a
        queryPtr = STRPTR(query$)
    
        FOR a = 1 TO query_length: @queryPtr[a] = @queryPtr[a] -48:NEXT
    
        query$ = query$ +  STRING$(32,CHR$(16))   'add a 32 byte tail to stop overrun when 32 bytes are taken and less bytes are valid
        queryPtr = STRPTR(query$)  'update string pointer because it changed on the above line
    
    
        search$ = SPACE$(search_length)
        FOR a = 1 TO search_length: MID$(search$, a, 1) = MID$("1248", RND(1, 4), 1): NEXT a
        searchPtr = STRPTR(search$)
        FOR a = 1 TO search_length: @searchPtr[a] = @searchPtr[a] - 48: NEXT
    
        search$ = search$ +  STRING$(32,CHR$(16)) 'add a 32 byte tail to stop overrun when 32 bytes are taken and less bytes are valid
        searchPtr = STRPTR(search$)  'update string pointer because it changed on the above line
    
        'now do the searches using threads to take advantage of multiple cores/CPUs
        QueryPerformanceCounter count0   'read the timer at the start of the test
    
        NoofThreads = 0
        FOR mismatch_target = 0 TO 10
            
            INCR NoofThreads
            'for each mismatch target set uop a new thread.
            'The following are the parameters that need to be passed to/from that thread
            Token(mismatch_target).search_length = search_length
            Token(mismatch_target).query_length = query_length
            Token(mismatch_target).match_length = match_length
            Token(mismatch_target).mismatch_target = mismatch_target
            Token(mismatch_target).querystring = VARPTR(query$)
            Token(mismatch_target).searchstring = VARPTR(search$)
    
            'create the thread
            THREAD CREATE ProcessingThread(VARPTR(Token(mismatch_target))), TO hThreads(mismatch_target)
        NEXT
    
        'wait for all threads to finish
        WaitForMultipleObjects(BYVAL NoofThreads, BYVAL VARPTR(hThreads(0)),BYVAL %true, %INFINITE)
    
        QueryPerformanceCounter count1   'read the timer at the end of the test
    
        'print out the results
        FOR r = 0 TO NoofThreads -1
            PRINT "Mismatch target ="r;" Total matches =";Token(r).TotalMatches
        NEXT
    
    PRINT "Total time taken =";FORMAT$(1000*(count1-count0)/freq,"######0.000");"msec"
    
    WAITKEY$
    
    
    END FUNCTION
    
    
    
    THREAD FUNCTION ProcessingThread(BYVAL TokenPointer AS LONG) AS LONG
    LOCAL InputData AS ThreadToken  PTR
    LOCAL r,t AS LONG
    LOCAL xmask1, xmask2 AS dquad
    LOCAL count0, count1 AS QUAD
    
    InputData=TokenPointer
    
            total_matches = 0
    '   -------------------------------------------------------------------------------------------------
    
        REM start timer and begin the search
    '   -------------------------------------------------------------------------------------------------
            TIX k&&
            'generate the masks used for non-multiples of 8 sequence lengths
            Mask0&& = &hffffffffffffffff
            Mask1&& = &hffffffffffffffff
            Mask2&& = &hffffffffffffffff
            Mask3&& = &hffffffffffffffff
    
            SELECT CASE @InputData.match_length
                CASE 1 TO 8
                    SHIFT LEFT mask0&&, 8 * @InputData.match_length
    
                CASE 9 TO 16
                    Mask0&& = &h0
                    SHIFT LEFT mask1&&,8 * (@InputData.match_length -8)
    
                CASE 17 TO 24
                    Mask0&& = &h0
                    Mask1&& = &h0
                    SHIFT LEFT mask2&&,8 * (@InputData.match_length -16)
    
                CASE 25 TO 32
                    Mask0&& = &h0
                    Mask1&& = &h0
                    Mask2&& = &h0
                    SHIFT LEFT mask3&&,8 * (@InputData.match_length -24)
    
            END SELECT
    
    xmask1.l=0
    xmask1.h=&h0
    xmask2.l=&hffff000000000000
    xmask2.h=&hffffffffffffffff
    
    query$ =  @[email protected]
    search$ = @[email protected]
    queryPtr = STRPTR(query$)
    searchPtr = STRPTR(search$)
    
    mismatch_target = @InputData.mismatch_target
    
            alimit = LEN(query$) - @InputData.match_length -32   'take off 32 as there's 32 bytes of padding at the end
    
            FOR b = 1 TO LEN(search$) - @InputData.match_length -32 'take off 32 as there's 32 bytes of padding at the end
    
                b_Ptr =searchPtr + b
    
                !mov esi,b_ptr    'get pointer into register ready for the loop
                !movdqu xmm0,[esi]         'get 32 bytes to compare for this attempt
                !movdqu xmm1,[esi+16]
    
    
             '   !por xmm0,mask0&&        'mask off the bytes that aren't to be included
                !por xmm1,xmask2
    
                !mov edi,queryPtr       'get pointer to other string
                !mov edx,mismatch_target
    
                ' FOR a = LEN(query$) - match_length to 1 step -1
                !mov ecx, alimit
    
                !pxor xmm6,xmm6                 'zero xmm6 for later
    
    #ALIGN 16
                'it's sligtly quicker to do the data 2 sets at a time, one shifted by 1 byte compared to the other
                aloop:
    
                !movdqu xmm4,[edi+ecx]        'get 32 bytes to check
                !movdqu xmm5,[edi+ecx+16]
    
                !movdqu xmm2,[edi+ecx-1]      'get next 32 bytes to check
                !movdqu xmm3,[edi+ecx+15]
    
                !pand xmm4,xmm0               'do the 32 byte ANDs
                !pand xmm5,xmm1
    
                !pand xmm2,xmm0               'do the next 32 byte ANDs
                !pand xmm3,xmm1
    
    
                !pcmpeqb xmm4,xmm6            'do the 32 bytes worth of compare with zero
                !pcmpeqb xmm5,xmm6
    
                !pcmpeqb xmm2,xmm6            'do the next 32 bytes worth of compare with zero
                !pcmpeqb xmm3,xmm6
    
    
                !paddb xmm4,xmm5              'add up how many mis-matched
                !psadbw xmm4,xmm6
    
                !paddb xmm2,xmm3              'add up how many mis-matched in next 32 bytes
                !psadbw xmm2,xmm6
    
    
                !movdqa xmm7,xmm4             'Sum the 1st 32 byte matches
                !psrldq xmm7,8
                !paddw xmm4,xmm7
    
                '!pextrw eax,xmm4,0           'get the answer into a CPU register, it's negative so -4 means 4 mismatches
                !movd eax,xmm4               'faster than pextrw
    
                !add al,dl 'mismatch_target   'if this overflows, condition is met and carry is set
                !adc total_matches,0          'add carry into total to count matches
    
    
    
                !movdqa xmm7,xmm2             'sum the 2nd 32 byte matches
                !psrldq xmm7,8
                !paddw xmm2,xmm7
    
                '!pextrw eax,xmm4,0           'get the answer into a CPU register, it's negative so -4 means 4 mismatches
                !movd eax,xmm2               'faster than pextrw
    
                !add al,dl 'mismatch_target  'if this overflows, condition is met and carry is set
                !adc total_matches,0         'add carry into total to count matches
    
    
                ' NEXT a
                !sub ecx,2                   '2 because we're ding 2 sets at a time for speed
                                             'this does mean the searc string MUST be a multple of 2 in length!
                !jnz short aloop
    
            NEXT b
    
    
        @InputData.TotalMatches = total_matches    'return the result
    
    END FUNCTION

    Leave a comment:


  • John Montenigro
    replied
    Thread change

    For anyone coming to this thread late (like me), this should serve as notice that the discussion has been moved to:

    http://www.powerbasic.com/support/pb...ad.php?t=42020

    This should be the last post in this thread...

    Leave a comment:


  • Rodney Hicks
    replied
    Well, I was referring to the ages before I used computers where I was taught to do them in the BEDMAS order, and there are still many of us around that think that way.

    Specifically, in the case we are discussing what I was defining was the length of the string minus one less than the segment length. Thus having to add something kind of doesn't make sense. Yet, it is right!! All is good.

    Leave a comment:


  • Jeff Blakeney
    replied
    Rodney, I wouldn't call that a "feature". It is the way math works even when doing it on paper. Addition and subtraction can appear in any order just like multiplication and division can appear in any order. When you start mixing addition/subtraction with multiplication/division is when you need to start worry about order because multiplication/division is done first.

    1 + 2 - 3 = (1 + 2) - 3
    1 * 2 / 3 = (1 * 2) / 3
    1 + 2 * 3 <> (1 + 2) * 3

    I think PB's only variance from the standard order of precedence is with its additional operators such as integer division operator (\} and modulo. This means that 4 / 2 * 2 <> 4 \ 2 * 2 as the former gives a result of 4 and the latter a result of 1. I haven't actually tried this in PB but the order of precedence is listed as this:

    Code:
    B   parentheses ( )
    E   exponentiation (^)
        negation (-)
    DM  multiplication (*),  floating point division (/)
        integer division (\)
        modulo (MOD)
    AS  addition (+), subtraction (-)
        et al
    I added the BEDMAS characters so that you can see PB is following it with some addtions. I see that another problem may lie in the negation being higher than multiplication/division. After all, negation is simply a short form for "-1 *" but I don't think it will ever be a problem as 2 * -5 should still give the same result as 2 * -1 * 5, -5 * 2 and -1 * 5 * 2.

    By the way, just as a note of trivia brackets are the square ones "[]", parentheses are the round ones "()" and braces are the squiggly ones "{}". Many people use the wrong terms for these symbols.

    Leave a comment:


  • Rodney Hicks
    replied
    You're right Peter!
    I was forgetting about that particular feature. I follow the old BEDMAS order because it's a standard in other programs that I use and I never think to alter it.

    Leave a comment:


  • Peter Simmonds
    replied
    sorry to contradict you but both:

    Code:
    (search_length - match_length) + 1
    (query_length - match_length) + 1
    
    and
    
    search_length - (match_length -1)
    query_length - (match_length -1)
    are arithmetically equivalent to what I originally posted:

    Code:
    search_length - match_length + 1
    query_length - match_length + 1
    Evaluation proceeds from left to right (for operators of equal precedence, as + and - are), so the first two numbers are subtracted, before the third term is added. And of course you can modify that with parentheses, but it's not necessary here.

    This is from the helpfile:
    To handle operations of the same priority, PowerBASIC proceeds from left to right. For example, in the expression 4 - 3 + 6, the subtraction (4 - 3) is performed before the addition (3 + 6), producing the intermediate expression 1 + 6.
    Try it!

    Code:
    #COMPILE EXE
    #DIM ALL 
    
    FUNCTION PBMAIN () AS LONG
    
        PRINT 50 - 10 + 1
        PRINT (50 - 10) + 1
        PRINT 50 - (10 - 1)
        WAITKEY$
    
    END FUNCTION
    Peter

    Leave a comment:


  • Rodney Hicks
    replied
    No, should be
    (search_length - match_length) + 1
    (query_length - match_length) + 1
    or
    search_length - (match_length -1)
    query_length - (match_length -1)

    Leave a comment:


  • Peter Simmonds
    replied
    Hi

    Yes, should be:

    search_length - match_length + 1
    query_length - match_length + 1

    I suppose. Sorry!

    Leave a comment:


  • Rodney Hicks
    replied
    FWIW or
    In the code in post #120 several thousand comparisons are not made because of these two expressions in the FOR/NEXT loops:
    Code:
    search_length - match_length
    query_length - match_length
    The final 28 (match_length) characters of the search string never get compared to the thousands of match segments and the final 28 characters of the match string never get compared to the thousands of search segments.
    That error has been in several posts previous to that.

    Leave a comment:


  • Gösta H. Lovgren-2
    replied
    Really not trying to be a pita here but ...
    ... Only Peter could say for sure.
    And Peter says
    ... then it is obviously hard to know a priori whether counts are correct or not ...
    ... suggests that they represent the correct answer.
    That doesn't souind very "sure" to me. I spent a pretty fair amount of time trying (nearly) every code posted using identical strings and search parameters and there were varying results. Now if I made an error in doing that which caused the variance in results, fair enough. But if I didn't, it "suggests" at least one of the routines is not reliable.

    ============================================
    "It's not the size of the dog in the fight,
    it's the size of the fight in the dog."
    Mark Twain (1835-1910)
    ============================================

    Leave a comment:


  • Peter Simmonds
    replied
    Hi

    Sorry just saw that the thread had re-activated again!

    Because the search strings are long and generated from random sequences of bases or numbers, then it is obviously hard to know a priori whether counts are correct or not. On the other hand, the observation of identical results using a variety of search window widths, numbers of tolerated mismatches and search and query string lengths between the original program and the final, corrected machine code version posted by Paul (post 114) suggests that they represent the correct answer. They are based on quite different methods for matching and counting mismatches, while the outer loops that generate the search windos in the query$ and search$ are pretty standard and straightforward.

    Peter

    Leave a comment:


  • Paul Dixon
    replied
    Gösta,
    maybe I assumed too much but when Peter posted "The original BASIC version produces.." I assumed that was his original version which he was happy with and worked but which he needed speeding up. Since the post in 118 now produces identical results to the original I'd take it that the problem is solved. Only Peter could say for sure.

    Paul.

    Leave a comment:


  • Gösta H. Lovgren-2
    replied
    Originally posted by Paul Dixon View Post
    Michael,

    The original poster gave a list of expected results in post 110:
    http://www.powerbasic.com/support/pb...&postcount=110

    The fastest code so far, in post 118, produces results which match exactly the figures given in post 110.

    Paul.
    What was given in 110 was a comparison between two methods, not expected results or even if either was giving accurate results. For all we know one was just taking longer to give the same wrong answer. It's the same for ALL the code used here. We don't have real life data to test on.

    ============================================
    "To put the world in order,
    we must first put the nation in order;
    To put the nation in order,
    we must first put the family in order;
    To put the family in order,
    we must first cultivate our personal lives;
    and set our hearts right."
    Confucius
    ============================================

    Leave a comment:


  • Paul Dixon
    replied
    Michael,
    Did anyone bother to PREDICT the correct answer before embarking on coding it? I thought not.)
    The original poster gave a list of expected results in post 110:
    http://www.powerbasic.com/support/pb...&postcount=110

    The fastest code so far, in post 118, produces results which match exactly the figures given in post 110.

    Paul.

    Leave a comment:


  • Rodney Hicks
    replied
    When it was established that we weren't worrying about the unknowns, accuracy became an unknown as well.
    Since the code developed doesn't consider unknowns, what happens when the process discovers an unknown? Does it bog down while it tries to sort itself out or is there a hitherto unmentioned second algorithm that handles the unknown?
    How much is this issue going to slow down the process?

    Michael's assessment is valid even though we were given a very good example on how to speed things up.

    Leave a comment:


  • Michael Mattias
    replied
    ... found 86 matches...Then ....it found less matches..... And Then .... It finds NO matches.
    You may call me old fashioned for saying so, but Correct Slowly beats the snot out of Wrong Quickly.

    (Did anyone bother to PREDICT the correct answer before embarking on coding it? I thought not.)

    Leave a comment:


  • Gösta H. Lovgren-2
    replied
    Houston, I have a problem.

    Using the exact same strings and exact same search parameters as in all the other tests found 86 matches (up to Post 80 code). Then in Post 111 code (Paul's assembler example) it found less matches. Hmmm... Okay, stuff happens. My bad somewhere I'm sure. Oh well, ... just move on.

    And Then .....And Then .....And Then ..... (Old R&R reference for you younguns.) Having a light bulb moment in the midst of an otherwise pleasant dream, I theorized a can't fail much faster revolutionary aha method for a faster algo to match DNA.

    And here's the problem. It finds NO matches.

    '
    Code:
    #Compile Exe
    '#Debug Display On' increases time as much as 50% longer when on 
    DefLng a-z
    Macro pf =  Print #1, 
    '        
    'we know in this example there must be at least 1 3 letter matching sequence in each match (28 \ 9)
    'ex 1111111111111111111111111111
    'ex 1110101011101010111011101110 '0 spaced as far apart as possible
    'ex    1 2 3   4 5 6   7   8   9
    Function PBMain () As Long
      #Register None   
      Register b As Long   'used second most
      Register c As Long   'used most
    '
    'DNA_Source_Strings.Inc - for testing consistency
    '
    ' *******************************************************
    '
      Randomize 1
           Open "Source_Strings.txt" For Input  As #1  '- For testing consistency
      Input #1, Query$ 'Sequence_to_be_Searched$
      Input #1, Search$ 
      Close #1
        ln = 50
        Query_Check$ = Space$(5) & Left$(query$, ln)
          Search_Check$ = Space$(5) & Left$(Search$, ln)
     'now replace with values
        Replace Any "ACGT" With "1248" In Query$
        Replace Any "ACGT" With "1248" In Search$
     '
      'find longest matching sequence in first 100 chars (long improbable match)
      'doesn't improve speed as I thought it might
    '   For ctr = 100 To 1 Step -1
    '       s$ = Mid$(Query$, 1, ctr) 'get snippet to look for
    '       i = InStr(Search$, s$)    'look for it
    '       If i Then 'eureka!
    '          search$ = Mid$(Search$, i) + Left$(Search$, i-1) 'move first match to beginning and put not matching portion on end to preserve length
    '          Exit For
    '       End If
    '   Next ctr
    '
        q_Check$ = Space$(5) & Left$(Query$, ln)
          s_Check$ = Space$(5) & Left$(Search$, ln)
     '
        Query_length =  Len(Query$)  
        search_length = Len(Search$)
        match_length = 28 '22
        mismatch_target = 9 '5
    '
    '
    ' *******************************************************
    ' At this point strings are identical to previous tests
      matches_needed = match_length - mismatch_target
      MisMatches_Allowed = mismatch_target
      '
    ' Mid$(query$, 10, match_length) = Search$ 'testing for matches, ensure at least 1 match 'didn't find it
       start_time! = Timer
       ' the time to assign arrays is insignificant, 1 or 2 hundredths of a second overall
        ln = Search_Length
        If ln < query_length Then ln = query_length 'find longest for arrays so no over-runs
         ln = ln * 2 ' make arrays plenty long JIC - no penalties involved
       Dim s(1 To ln) As Byte
         For a = 1 To search_length: s(a) = Val(Mid$(search$, a, 1)): Next a
            For b = a To ln: s(b) = 9: Next b 'ensure no matches with q()
       Dim q(1 To ln) As Byte
         For a = 1 To query_length: q(a) = Val(Mid$(query$, a, 1)): Next a
            For b = a To ln: q(b) = 0: Next b 'ensure no matches with s()
     '
    '  Test_Print = 1     
    '**  
      If Test_Print Then 
         'for checking in display
         Dim lmm(50)
        lm$ = Space$(50)
    '**
        For ctr = 1 To 50
    '**
           If s(ctr) = q(ctr) Then 
              Mid$(lm$, ctr) = "|"
              lmm(ctr) = ctr
           End If   
    '**
        Next ln 
    '**    
        lm$ = Space$(5) & lm$ 
        
       Open "Testing.txt" For Output As #1 
        pf Time$                                    
        pf Query_Check$: pf q_Check$
        pf lm$
        pf Search_Check$: pf S_Check$      
        pf "  at ";
    '**
       For ctr = 1 To UBound(lmm())
         If lmm(ctr) Then pf Str$(lmm(ctr));
       Next ctr
    '**            
       pf " "
        
      End If 
    '**  
    '************* comparing here
       start_time! = Timer
       For a = 1 To search_length - match_length   'piece to search                                    
         If Test_Print Then  Pf Using$(" ####}  # # = ##  Total Matches ##,###", a+1, q(a+1), s(a+1), q(a+1) = s(a+1), qs_TTl )
    '**    
         For b = 0 To query_length - match_length ' 
           Reset mm_ctr, scan_total, Matched_flag, Match_ctr 'set to 0
           ab = a + b  ' save an add in inner loop (Thanks, Paul)
           If Test_Print Then pf  "  Compares  (ab+c)  a  b  c  q() s()   Score   MisMatches  Matched  "
    '**  
           For c = 0 To match_length - 1
       '      abc = ab + c ' save an add in inner loop - actually adds .3 second
             If mm_ctr  > MisMatches_Allowed Then  Exit For  'more mismatches than allowed
             Incr Compares_Made  'count total checks  'time taken is insignificant, 1 or 2 hundredths of a second overall
             Incr scan_total
              
             'If q(abc) <> s(abc) Then Incr mm_ctr Else Incr match_ctr 'adds 3 tenths to routine this way
             If q(ab + c) <> s(ab + c) Then Incr mm_ctr Else Incr match_ctr 'no match for this letter but count matches
               
             If scan_total - mm_ctr => Matches_Needed Then Matched_Flag = 1
                                         '"  Compares  (ab+c)  a  b  c  q() s()   Score   MisMatches  Matched  "
             If Test_Print Then pf  Using$("###,###,###}   ##  ## ## ##   #   #      ##        ##        ##", _
                                           Compares_Made, ab+c, _
                                                               a, b, c, q(ab+c), _
                                                                           s(ab+c), _
                                                                               q(ab+c)=s(ab+c), _
                                                                                         mm_ctr, _
                                                                                                   Match_ctr)
               
          Next c
     '**
           If Test_Print Then pf " "
           Total_Matches = Total_Matches + Matched_Flag
           If Matched_Flag = 0 Then Incr Not_Matched   'only adds couple tenths
         Next b
    '**                           
         If Test_Print And a = 5 Then Reset Test_Print  'enough don't need 100's millions lines
       Next a                                      
    '**   
       Close #1
    '
    ' *******************************************************
    '
     Type_Run$ = "Post 120 - Gösta - Here is a shot using byte arrays,"
       Open "DNA_Testing_Results01.txt" For Append As #1 
          'Print #1, Query_Check$: Print #1, q_Check$
          'Print #1, Search_Check$: Print #1, s_Check$
          Print #1, Type_Run$
          Row$ = "###,###,###  ##.## Seconds  QL = ##,###  SL = ##,###  found #, Matches #, NotMatched MatchLen = ##  Mismatches allowed = ##"
          Print #1, Using$(Row$, _
                  Compares_Made, _
                               Timer - start_time!,_
                                              query_length, _
                                                            search_length, _
                                                                         Total_Matches, _              
                                                                                         Not_Matched, _
                                                                                                      match_length, _
                                                                                                                       MisMatches_Allowed)
          Print #1, " " : Close #1 
    '
    ' *******************************************************
    '
      ? "The end"
        
        
    '  ****************************  -----                              
    End Function
    '
    Unrem Test_Print and look at the files created in a text editor to see what I mean. Either I'm doing something radically critically stupid wrong here OR the algo in the other posts isn't really correct. I have spent hours (and hours and hours) trying all kinds of stuff but nada. Not a single match

    Oh, and by the way, it's a nifty way to test inner loop optimization stuff too.

    ============================
    "Just living is not enough.
    One must have sunshine,
    freedom,
    and a little flower."
    Hans Christian Anderson
    ============================

    Leave a comment:


  • Rodney Hicks
    replied
    Gösta

    BTW Rod, did you ever run down what was causing the computer to freeze running your matching program?
    No, not yet. I've been swamped with family adjustment issues(sounds serious! ) and haven't had time to work on it.

    Leave a comment:


  • Paul Dixon
    replied
    Peter,
    My last version of the code follows. It's the same as the one above but with mismatch_target held in a register.
    I don't have much more time to spend on this.
    The latest program runs in 205ms here (Athlon XP 2600+) and 120ms on a more modern PC (AthlonX2 6000+ dual core but just using 1 core).

    I quickly tried an XMM version expecting the speed to improve significantly but it gave poor results, so that would need looking into in more detail. It's possible that the limit is the rate at which the data can be read from memory in which case a different strategy might be needed to improve performance of the code further with XMM but I'm sure it can be done.

    If you get stuck into ASM programming then you'll find you only use a fraction of the available opcodes to do most things so don't be put off by the large number of opcodes. It's definitely worth learning if you need to do a lot of this sort of programming.


    Gösta
    I had a quick look at your program but I couldn't immediately see why it gives different results for my code. It's probably something the the manipulation of the strings before the code runs.

    Paul.
    Code:
    'PBCC5 program
    #COMPILE EXE
    #REGISTER NONE
    DEFLNG a-z
    
    %CPUfreq = 1918000000
    
    FUNCTION PBMAIN () AS LONG
    
        REM User defined run parameters
    '   -------------------------------------------------------------------------------------------------
        search_length = 6000: query_length = 5000: match_length = 22: mismatch_target =5
    '   -------------------------------------------------------------------------------------------------
    
        REM Set up sequences and variables, create random search and query sequences
    '   -------------------------------------------------------------------------------------------------
    
        LOCAL searchPtr, queryPtr, a_Ptr, b_Ptr AS BYTE PTR
        matches_needed = match_length - mismatch_target
    
        RANDOMIZE 1
    
        query$ = SPACE$(query_length)
        FOR a = 1 TO query_length: MID$(query$, a, 1) = MID$("1248", RND(1, 4), 1): NEXT a
        queryPtr = STRPTR(query$)
    
        FOR a = 1 TO query_length: @queryPtr[a] = @queryPtr[a] -48:NEXT
    
        query$ = query$ +  STRING$(32,CHR$(16))   'add a 32 byte tail to stop overrun when 32 bytes are taken and less bytes are valid
        queryPtr = STRPTR(query$)  'update string pointer because it changed on the above line
    
    
        search$ = SPACE$(search_length)
        FOR a = 1 TO search_length: MID$(search$, a, 1) = MID$("1248", RND(1, 4), 1): NEXT a
        searchPtr = STRPTR(search$)
        FOR a = 1 TO search_length: @searchPtr[a] = @searchPtr[a] - 48: NEXT
    
        search$ = search$ +  STRING$(32,CHR$(16)) 'add a 32 byte tail to stop overrun when 32 bytes are taken and less bytes are valid
        searchPtr = STRPTR(search$)  'update string pointer because it changed on the above line
    
    
        FOR mismatch_target = 0 TO 10
            PRINT "mismatch_target =";mismatch_target;
    
            total_matches = 0
    '   -------------------------------------------------------------------------------------------------
    
        REM start timer and begin the search
    '   -------------------------------------------------------------------------------------------------
            start_time! = TIMER
            TIX k&&
            'generate the masks used for non-multiples of 8 sequence lengths
            Mask0&& = &hffffffffffffffff
            Mask1&& = &hffffffffffffffff
            Mask2&& = &hffffffffffffffff
            Mask3&& = &hffffffffffffffff
    
            SELECT CASE match_length
                CASE 1 TO 8
                    SHIFT LEFT mask0&&, 8 * match_length
    
                CASE 9 TO 16
                    Mask0&& = &h0
                    SHIFT LEFT mask1&&,8 * (match_length -8)
    
                CASE 17 TO 24
                    Mask0&& = &h0
                    Mask1&& = &h0
                    SHIFT LEFT mask2&&,8 * (match_length -16)
    
                CASE 25 TO 32
                    Mask0&& = &h0
                    Mask1&& = &h0
                    Mask2&& = &h0
                    SHIFT LEFT mask3&&,8 * (match_length -24)
    
            END SELECT
    
            alimit = LEN(query$) - match_length -32   'take off 32 as there's 32 bytes of padding at the end
    
            FOR b = 1 TO LEN(search$) - match_length -32 'take off 32 as there's 32 bytes of padding at the end
    
                b_Ptr = searchPtr + b
    
                !mov esi,b_ptr    'get pointer into register ready for the loop
                !movq mm0,[esi]         'get 32 bytes to compare for this attempt
                !movq mm1,[esi+8]
                !movq mm2,[esi+16]
                !movq mm3,[esi+24]
    
             '   !por mm0,mask0&&        'mask off the bytes that aren't to be included
                !por mm1,mask1&&
                !por mm2,mask2&&
                !por mm3,mask3&&
    
                !mov edi,queryPtr       'get pointer to other string
    
                !mov edx,mismatch_target
    
                ' FOR a = LEN(query$) - match_length to 1 step -1
                !mov ecx, alimit
    
    #ALIGN 16
                aloop:
    
                !movq mm4,[edi+ecx]         'get 32 bytes to check
                !movq mm5,[edi+ecx+8]
                !movq mm6,[edi+ecx+16]
                !movq mm7,[edi+ecx+24]
    
                !pand mm4,mm0               'do the 32 byte ANDs
                !pxor mm0,mm0               'clear mm0 after use so we have a zero register
                !pand mm5,mm1
                !pand mm6,mm2
                !pand mm7,mm3
    
                !pcmpeqb mm4,mm0            'do the 32 bytes worth of compare with zero
                !pcmpeqb mm5,mm0
                !pcmpeqb mm6,mm0
                !pcmpeqb mm7,mm0
    
                !paddb mm4,mm5              'add up how many mis-matched
                !paddb mm4,mm6
                !paddb mm4,mm7
                !psadbw mm4,mm0
                !movq mm0,[esi]             'reload the low 8 bytes
    
    
                !pextrw eax,mm4,0           'get the answer into a CPU register, it's negative so -4 means 4 mismatches
    
                !add al,dl 'mismatch_target     'if this overflows, condition is met and carry is set
                !adc total_matches,0        'add carry into total to count matches
    
                ' NEXT a
                !dec ecx                     'a loop
                !jnz aloop
                !emms                        'restore FPU state after using MMX registers before using FPU again
    
            NEXT b
    
    
        REM search finished, print out time and match results
    '   -------------------------------------------------------------------------------------------------
    
            TIX END k&&
            PRINT "time=";FORMAT$(k&&/%CPUfreq*1000,"###,###,###");"ms";" total_matches=";total_matches
    
        NEXT
    
        WAITKEY$
    
    END FUNCTION

    Leave a comment:


  • Gösta H. Lovgren-2
    replied
    [quote=Rodney Hicks;327504]FWIW
    http://news.bbc.co.uk/2/hi/science/n...stm&#91;/quote]
    Barcodes for DNA, a natural fit seems like.

    BTW Rod, did you ever run down what was causing the computer to freeze running your matching program?

    ==============================================
    Do not consider painful what is good for you.
    Euripides
    ==============================================

    Leave a comment:

Working...
X