Announcement

Collapse
No announcement yet.

Opinion

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • Opinion

    Do you think the following PBCC5 code would be faster if files are parsed for the search string as they are encountered instead of waiting until all the directories are found?

    Code:
    #COMPILE EXE
    #BREAK ON
    DEFLNG A - Z
    
    %Normal   = &B000000
    
    FUNCTION PBMAIN()
    
    LOCAL SrchStr   AS STRING
    LOCAL Extns     AS STRING
    LOCAL CurDrv    AS STRING * 1
    LOCAL Curntdir  AS STRING
    LOCAL A         AS STRING
    
    LOCAL Drvs()    AS STRING
    LOCAL ExtnAry() AS STRING
    LOCAL SrchRes() AS STRING
    
    LOCAL Sz  AS LONG
    
    DIM SrchRes$(1 TO 1)
    
    SrchStr$ = "postmessage"
    Extns$ = ".txt,.bas,.inc,.bi,.rc,.rtf,.htm,.php"
    CurntDir$ = CURDIR$
    CurDrv = CurntDir$
    Sz = 1
    
    J = 0
    FOR I = ASC("A") TO ASC("Z")
      A$ = CHR$(I)
      ON ERROR GOTO DrvErr
      CHDRIVE A$
      INCR J
      REDIM PRESERVE Drvs$(1 TO J)
      Drvs$(J) = A$ + ":\"
    
      ErClear: '
    
    NEXT I
    
    GOTO ParseExtns
    
    DrvErr: '
    ERRCLEAR
    RESUME ErClear
    
    ParseExtns: '
    
    ON ERROR GOTO 0
    
    CHDRIVE CurDrv$
    CHDIR CurntDir$
    
    Extns$ = UCASE$(Extns$)
    SrchStr$ = UCASE$(SrchStr$)
    M = PARSECOUNT(Extns$, ",")
    REDIM ExtnAry(1 TO M)
    PARSE Extns$, ExtnAry$(), ","
    t = TIMER
    I = 1
    J = UBOUND(Drvs$)
    
    DO
      IF I > J THEN EXIT DO
    
      ErCount = 0
    
      Access_Clr1: '
    
      ON ERROR GOTO Access_Err1
    
      A$ = DIR$(Drvs$(I) + "*.*", 22)
    
      Access_Err1: '
    
      IF ERR THEN
        IF ErCount > 1 THEN EXIT IF
        IF ERR = 70 THEN
          INCR ErCount
          PRINT Drvs$(I) + A$
          PRINT ERR, ERROR$, ErCount, "error1"
          ERRCLEAR
          SETATTR Drvs$(I) + A$, %NORMAL
          IF ISFALSE ERR THEN RESUME Access_Clr1
        END IF
      END IF
    
      ON ERROR GOTO 0
    
      IF A$ = "" THEN
        INCR I
        IF I > J THEN EXIT DO
        ITERATE DO
      END IF
    
      K = GETATTR(Drvs$(I) + A$)
    
      IF (K AND 16) = 16 THEN
        INCR J
        REDIM PRESERVE Drvs$(1 TO J)
        Drvs$(J) = Drvs$(I) + A$ + "\"
      END IF
    
      DO
    
        ErCount = 0
    
        Access_Clr2: '
    
        ON ERROR GOTO Access_Err2
    
        A$ = DIR$(NEXT)
    
        Access_Err2: '
    
        IF ERR THEN
          IF ErCount > 1 THEN EXIT IF
          IF ERR = 70 THEN
            INCR ErCount
            PRINT Drvs$(I) + A$
            PRINT ERR, ERROR$, ErCount, "error2"
            ERRCLEAR
            SETATTR Drvs$(I) + A$, %NORMAL
            IF (ISFALSE ERR) AND (ErCount < 2) THEN
              RESUME Access_Clr3
            ELSE
              ON ERROR GOTO 0
            END IF
          END IF
        END IF
    
        Access_Clr3: '
        ErCount = 0
        ON ERROR GOTO 0
    
        IF A$ = "" THEN
          INCR I
          EXIT DO
        END IF
    
        K = GETATTR(Drvs$(I) + A$)
    
        IF (K AND 16) = 16 THEN
          INCR J
          REDIM PRESERVE Drvs$(1 TO J)
          Drvs$(J) = Drvs$(I) + A$ + "\"
        END IF
      LOOP
    LOOP
    
    Sz = 0
    
    FOR I = 1 TO J
      A$ = DIR$(Drvs$(I) + "*.*")
      IF A$ = "" THEN ITERATE FOR
      K_loop: '
      A$ = UCASE$(A$)
      FOR K = 1 TO UBOUND(ExtnAry$)
        B$ = ExtnAry$(K)
        IF INSTR(-1, A$, B$) THEN
          A$ = Drvs$(I) + A$
          ON ERROR GOTO Open_Err
          OPEN A$ FOR INPUT AS #1
          DO WHILE NOT EOF(1)
            LINE INPUT #1, B$
            B$ = UCASE$(B$)
            IF INSTR(B$, SrchStr$) THEN
              INCR Sz
              REDIM PRESERVE SrchRes$(1 TO Sz)
              SrchRes$(Sz) = A$
              EXIT DO
            END IF
          LOOP
          Open_Err: '
          IF ERR THEN
            PRINT ERR, ERROR$, "Open error"
            PRINT A$
            ERRCLEAR
            ON ERROR GOTO 0
          END IF
          CLOSE #1
          EXIT FOR
        END IF
      NEXT K
      A$ = DIR$(NEXT)
      IF A$ = "" THEN
        EXIT IF
      ELSE
        GOTO K_loop
      END IF
    NEXT I
    PRINT TIMER - T
    PRINT "done"
    WAITKEY$
    FOR I = 1 TO Sz
      PRINT SrchRes$(I)
    NEXT I
    PRINT "Files found = "; Sz
    PRINT "end"
    WAITKEY$
    
    END FUNCTION
    
    SUB Ck_File(A$, Fdir$, ExAry$(), Srch$, SRes$(), Sz, Flg)
    
    LOCAL ExtSize AS LONG
    LOCAL SrcLen  AS LONG
    
    LOCAL Src     AS STRING
    LOCAL B       AS STRING
    LOCAL Allowed AS STRING
    LOCAL Q       AS STRING * 1
    
    ExtSize = UBOUND(ExAry$)
    Src$ = UCASE$(Srch$)
    B$ = UCASE$(A$)
    
    FOR I = 1 TO ExtSize
      IF INSTR(-1, B$, ExAry$(I)) THEN GOTO Extn_Ok
    NEXT I
    
    EXIT SUB
    
    Extn_Ok: '
    
    Allowed$ = " " + "(" + "="
    
    SrcLen = LEN(Src$)
    '
    OPEN Fdir$ + A$ FOR INPUT AS #1
    
    DO WHILE NOT EOF(1)
      LINE INPUT #1, B$
      B$ = UCASE$(B$)
      I = INSTR(B$, Src$)
      IF I THEN
        IF Flg THEN
          IF I = 1 THEN
            B$ = LEFT$(B$, SrcLen + 1)
            Q = B$
            IF INSTR(Allowed$, Q) = 0 THEN ITERATE DO
            Q$ = RIGHT$(B$, 1)
            IF INSTR(Allowed$, Q) = 0 THEN ITERATE DO
            PRINT B$
          ELSE
            B$ = MID$(B$, I - 1, SrcLen + 2)
            Q = B$
            IF INSTR(Allowed$, Q) = 0 THEN ITERATE DO
            Q$ = RIGHT$(B$, 1)
            IF INSTR(Allowed$, Q) = 0 THEN ITERATE DO
            PRINT B$
          END IF
        END IF
        REDIM PRESERVE Sres$(1 TO Sz)
        Sres$(Sz) = Fdir$ + A$
        INCR Sz
        EXIT DO
      END IF
    LOOP
    CLOSE #1
    END SUB
    Walt Decker

  • #2
    I can't tell from that code, but if the program ends as soon as the string is found "n" times, of course it will be faster to search each file as found.

    If n=1 and the string is found in the first file, why bother getting a list of files you will never search?

    Personally, I wouldn't even bother make arrays of drives, folders or files... I'd just do it all 'on the fly.' Making a list is a waste of time.
    Michael Mattias
    Tal Systems (retired)
    Port Washington WI USA
    [email protected]
    http://www.talsystems.com

    Comment


    • #3
      Walt,
      if you want to search lots of drives then for files containing some string as fast as possible then:

      1) Use a separate thread for each drive.

      Drives are slow and clunky mechanical things, the CPU can search lots of drives at a time with no noticable loss in speed. If each drive is searched in its own thread then Windows will usefully allocate the CPU time to a drive that needs it instead of wasting lots of time waiting for the drive head to move track and the data on the disk to rotate under the head.


      2) Use another thread to search the file for the string while the main thread for that drive looks for the next file

      Same principle as 1), the data can be scanned while the next file is being looked up.



      3) Read much bigger chunks in than 1 line at a time

      Once the drive head is in position, you should read as much as possible, as quickly as possible. If you make lots of small, 1 line reads then the file system overhead becomes large.

      Paul.
      Last edited by Paul Dixon; 21 Feb 2009, 06:03 PM.

      Comment


      • #4
        Michael, the object is to find all files that contain the search string, not just the first file.

        Paul, these are good suggestions; however, I'm a little leary about reading larger chunks. Reading in larger chunks brings up the possibility that the search string may be split so a search would fail; unless, of course, I read in the entire file.

        Thanks for the suggestions.
        Walt Decker

        Comment


        • #5
          Walt,
          then read in the entire file. The size of chunk I was thinking of was 1MByte to 10MByte. That'll cover most files in one read anyway. For files >100MByte you may need to split them up and take a bit of care but there aren't many of those around and it's not difficult to do.

          Paul.

          Comment


          • #6
            Thanks, Paul. I'll try your suggestions; however, I think, unless the file is <= 64k, I'll break them up. I can find the last space character and prepend from that to the end of the string to the next read.
            Walt Decker

            Comment


            • #7
              If you need to read more than one chunk, just overlap them by the length of the search string - 1, and you'll be sure you won't miss it.

              Bye!
              -- The universe tends toward maximum irony. Don't push it.

              File Extension Seeker - Metasearch engine for file extensions / file types
              Online TrID file identifier | TrIDLib - Identify thousands of file formats

              Comment


              • #8
                Bob Scott has posted a nice "disk utility" program which does mass searches of files for tokens. I use the EXE all the time (even have a desktop shortcut). I "think" he posted the source.....
                Michael Mattias
                Tal Systems (retired)
                Port Washington WI USA
                [email protected]
                http://www.talsystems.com

                Comment


                • #9
                  >the object is to find all files that contain the search string, not just the first file.

                  Then it's six of one, half a dozen of the other. If all files have to be searched, the same amount of work has to be done and it really does not matter in what order. The multiple-thread thing looks really interesting, however.

                  You might also want to look at the Microsoft Indexing services provider for use with ADO.. that might get you your lists of drives and files a bit quicker if indexing is in use on the target machine. (That is an untried, untested and unmitigated guess).

                  MCM
                  Michael Mattias
                  Tal Systems (retired)
                  Port Washington WI USA
                  [email protected]
                  http://www.talsystems.com

                  Comment


                  • #10
                    I recently read about a search strategy that could help speed up string searches. It might even have been posted in these forums, but I haven't found it yet.

                    Typical searches pick a starting point in the target and begin to compare that character with the first of the match string. If they match, then the next character of the target is compared to the next of the match string, and so on. If they match, great, you're done. But if there's a mismatch, you have to start over, using a new starting point in the target string.

                    Depending on the author, some searches pick up teh next comparision at the character following the first target. Given some thought, however, this is obviously inefficent, as you've already seen that character and maybe more after it. So, some programmers will advance the pointer to the target string by the number of characters that were discarded in the first mis-match. This results in "leap-frogging" over chunks of the string. There's an obvious gain in speed when you're not going back over the same characters repeatedly.

                    However, this could still be improved. And here's what captured my admiration in the authors approach - if we want to spend the least time, we need to search the largest chunks, and do the fewest comparisons.
                    What's the largest chunk? The match string.
                    How can we do the fewest comparisions? (And here it comes!) Compare the first character of the match string against the first character in the target string, AND THEN -- compare the LAST character of the match string against the LEN(match string) character in the target string.
                    If the first and last characters do NOT match, advance the search to the next chunk of the target string, starting at the LAST char + 1.


                    This strategy may sound odd at first, but it makes complete sense, and I personally think it's nothing short of brilliant; an excellent example of examining the problem domain and finding an optimal solution based on algortihmic efficiency, rather than on efficiency of programmatic flow.

                    If I'm not mistaken, the article/post observed that a lot of searches are done on sorted strings, where the differences, if any, are toward the right end of the string (in L-to-R languages). So why not find out immediately if they're mismatched, rather than finding out after lots of false matches?

                    Well, if you don't find that helpful, I hope you at least find it interesting!
                    Last edited by John Montenigro; 22 Feb 2009, 12:26 PM. Reason: removed an inaccuracy regarding "working inward"

                    Comment


                    • #11
                      John,
                      sounds like you're talking about the Boyer-Moore algotithm.
                      http://www.powerbasic.com/support/pb...ead.php?t=4004

                      It only has an advantage when both the string being search and the string being searched for are big.

                      Paul.

                      Comment


                      • #12
                        Yup, that's the thread, Paul! Thanks for the link.

                        Comment


                        • #13
                          Thanks for the input guys. I really appreciate it.

                          This was first developed for an experiment I'm doing in PBWIN that concerns the use of only one global variable. Data is being passed between processes/windows either as pointers during the call to the process or as user-defined messages to callback procedures.

                          The threaded approach is really interesting and can be easily implemented in BPWIN without using globals; however, I'm not sure how that can be implemented in PBCC. I'll have to play with it a little.

                          Marco, thanks for the overlap suggestion. I hadn't thought of that.
                          Walt Decker

                          Comment


                          • #14
                            Just reading in bigger chunks of code reduced the search time from over 10 minutes to slightly over 4 minutes. That's not bad considering I have 5 drives to read.
                            Walt Decker

                            Comment


                            • #15
                              Well, here's new code using a separate thread for each drive. While in the thread it also searches files. It will accept multiple file types (extensions) and multiple file parameters. Doing it this way cut the time down to under 4 minutes.

                              Further suggestions are appreciated.

                              Code:
                              #COMPILE EXE
                              #BREAK ON
                              #DEBUG DISPLAY ON
                              DEFLNG A - Z
                              
                              TYPE ThreadStruct
                                tHnd    AS DWORD
                                TID     AS LONG
                                Done    AS LONG
                                pSrch   AS DWORD
                                pExtn   AS DWORD
                                pDrv    AS DWORD
                                pAry    AS DWORD
                                AryL    AS LONG
                                AryU    AS LONG
                                tClose  AS DWORD
                              END TYPE
                              
                              %Normal   = &B000000
                              
                              FUNCTION PBMAIN () AS LONG
                              
                              STATIC tThread() AS ThreadStruct
                              
                              LOCAL str_ptr AS STRING POINTER
                              
                              LOCAL Tcount  AS LONG
                              LOCAL DrvCnt  AS LONG
                              
                              LOCAL Ary()   AS STRING
                              LOCAL Drvs()  AS STRING
                              
                              LOCAL SrchStr AS STRING
                              LOCAL ExtnStr AS STRING
                              LOCAL A       AS STRING
                              LOCAL Cdir    AS STRING
                              LOCAL Cdrv    AS STRING * 1
                              LOCAL Ldrv    AS STRING * 1
                              
                              Cdir$ = CURDIR$
                              Cdrv$ = Cdir$
                              
                              SrchStr$ = "postmessage,%WM_DLG"
                              ExtnStr$ = ".txt,.bas,.inc,.bi,.rtf,.php,.htm"
                              
                              J = 0
                              
                              FOR I = ASC("A") TO ASC("Z")
                                A$ = CHR$(I)
                                TRY
                                  CHDRIVE A$
                                  INCR J
                                  REDIM PRESERVE Drvs$(1 TO J)
                                  Drvs$(J) = A$ + ":\"
                                  CATCH
                                END TRY
                              NEXT I
                              
                              DrvCnt = J
                              
                              CHDRIVE Cdrv$
                              CHDIR Cdir$
                              
                              FOR I = 1 TO DrvCnt
                                REDIM PRESERVE tThread(1 TO I)
                                tThread(I).TID = I
                                tThread(I).pDrv = VARPTR(Drvs$(I))
                                tThread(I).pSrch = VARPTR(SrchStr$)
                                tThread(I).pExtn = VARPTR(ExtnStr$)
                              NEXT I
                              
                              Tm = TIMER
                              FOR I = 1 TO DrvCnt
                                THREAD CREATE gDrvs(VARPTR(tThread(I))) TO tThread(I).tHnd
                                SLEEP 0
                              NEXT I
                              
                              Thread_Done: '
                              J = 1
                              DO
                                FOR M = 1 TO DrvCnt
                                  IF (tThread(M).Done <> 0) AND (tThread(M).tClose = 0) THEN
                                    IF tThread(M).AryU < 1 THEN GOTO Close_Thread
                                    Ubd = Ubd + tThread(M).AryU
                                    str_ptr = tThread(M).pAry
                                    REDIM PRESERVE Ary(1 TO Ubd)
                                    L = tThread(M).AryL - 1
                                    FOR K = J TO Ubd
                                      Ary$(K) = @str_Ptr[L]
                                      INCR L
                                    NEXT K
                                    J = K
                              
                                    Close_Thread:'
                              
                                    tThread(M).tClose = I
                                    THREAD RESUME tThread(M).tHnd TO Esc
                                    THREAD CLOSE tThread(M).tHnd TO Esc
                                    tCount = tCount + 1
                                  END IF
                                  SLEEP 0
                                NEXT M
                                IF tCount >= DrvCnt THEN EXIT DO
                              LOOP
                              Tm = TIMER - Tm
                              PRINT
                              PRINT "tCount = "; Tcount
                              PRINT "ubd = "; ubd, "Time = "; Tm / 60
                              SLEEP 1000
                              WAITKEY$
                              FOR I = 1 TO Ubd
                                PRINT Ary$(I)
                              NEXT I
                              
                              WAITKEY$
                              
                              END FUNCTION
                              
                              THREAD FUNCTION gDrvs(BYVAL Addrs AS DWORD) AS LONG
                              
                              LOCAL trd_ptr   AS ThreadStruct POINTER
                              LOCAL Str_ptr   AS STRING POINTER
                              
                              LOCAL Tid       AS LONG
                              LOCAL DrvCount  AS LONG
                              LOCAL THndl     AS LONG
                              LOCAL FileFound AS LONG
                              LOCAL NumExtns  AS LONG
                              LOCAL FileLen   AS LONG
                              LOCAL FileLoc   AS LONG
                              LOCAL FileOpen  AS LONG
                              LOCAL Er        AS LONG
                              LOCAL ErCount   AS LONG
                              LOCAL SrchCount AS LONG
                              
                              LOCAL TredNum   AS STRING
                              LOCAL A         AS STRING
                              LOCAL B         AS STRING
                              LOCAL Q         AS STRING
                              LOCAL SrchStr   AS STRING
                              LOCAL ExtnStr   AS STRING
                              
                              LOCAL Drv()     AS STRING
                              LOCAL FileFnd() AS STRING
                              LOCAL Filextn() AS STRING
                              LOCAL Sstr()    AS STRING
                              
                              DIM Drv$(1 TO 1)
                              
                              trd_ptr = Addrs
                              Tid = @trd_ptr.TID
                              
                              Str_Ptr = @trd_Ptr.pDrv
                              A$ = @Str_Ptr
                              
                              Str_Ptr = @trd_ptr.pSrch
                              SrchStr$ = @Str_Ptr
                              SrchStr$ = UCASE$(SrchStr$)
                              
                              Str_Ptr = @trd_ptr.pExtn
                              ExtnStr$ = @Str_Ptr
                              ExtnStr$ = UCASE$(ExtnStr$)
                              
                              IF ExtnStr$ = "" THEN
                                ExtnStr$ = "*.*"
                              END IF
                              
                              IF SrchStr$ = "" THEN GOTO Parse_Extns
                              
                              SrchCount = PARSECOUNT(SrchStr$, ",")
                              REDIM Sstr$(1 TO SrchCount)
                              PARSE SrchStr$, Sstr$(), ","
                              
                              Parse_Extns: '
                              
                              NumExtns = PARSECOUNT(ExtnStr$, ",")
                              REDIM Filextn$(1 TO NumExtns)
                              PARSE ExtnStr$, Filextn$(), ","
                              
                              
                              Drv$(1) = A$
                              TredNum$ = "Thread = " + STR$(Tid)
                              I = 1
                              DrvCount = 1
                              
                              SLEEP 0
                              
                              DO
                                Er = 0
                                TRY
                                  A$ = DIR$(Drv$(I) + "*.*", 22)
                                  CATCH
                                    Er = ERR
                                    ERRCLEAR
                                    IF Er = 70 THEN
                                      TRY
                                        SETATTR Drv$(I), %NORMAL
                                        CATCH
                                          Er = ERR
                                          ERRCLEAR
                                      END TRY
                                    END IF
                                END TRY
                              
                                IF (Er <> 0) AND (A$ <>"") THEN
                                  Er = 0
                                  GOTO Next_Loop
                                END IF
                                IF (A$ = "") AND (I < DrvCount) THEN GOTO End_Loop
                                IF A$ = "" THEN EXIT DO
                                Er = 0
                              
                                TRY
                                  J = GETATTR(Drv$(I) + A$)
                                  CATCH
                                    Er = ERR
                                    ERRCLEAR
                                    IF Er = 70 THEN
                                      TRY
                                        SETATTR Drv$(I) + A$, %NORMAL
                                        CATCH
                                          Er = ERR
                                          ERRCLEAR
                                      END TRY
                                    END IF
                                END TRY
                              
                                IF Er<> 0 THEN GOTO Next_Loop
                              
                                IF (J AND 16) = 16 THEN
                                  INCR DrvCount
                                  REDIM PRESERVE Drv$(1 TO DrvCount)
                                  Drv$(DrvCount) = Drv$(I) + A$ + "\"
                                ELSEIF (J = 0) OR (J = 128) THEN
                                  A$ = UCASE$(Drv$(I) + A$)
                                  GOSUB FindFile
                                ELSEIF ((J AND 1) = 1) OR ((J AND 2) = 2) OR ((J AND 32) = 32) THEN
                                  A$ = UCASE$(Drv$(I) + A$)
                                  GOSUB FindFile
                                END IF
                              
                                Next_Loop: '
                              
                                Er = 0
                              
                                  DO
                              
                                    TRY
                              
                                      A$ = DIR$(NEXT)
                              
                                      CATCH
                                        Er = ERR
                                        ERRCLEAR
                                        IF Er = 70 THEN
                                          TRY
                                            SETATTR Drv$(I) + A$, %NORMAL
                              
                                            CATCH
                                              Er = ERR
                                              ERRCLEAR
                                          END TRY
                                        END IF
                                    END TRY
                              
                                    IF (Er <> 0) THEN GOTO Next_Loop_end
                              
                                    IF A$ = "" THEN EXIT DO
                              
                                    Get_Attr: '
                              
                                    TRY
                                      J = GETATTR(Drv$(I) + A$)
                                      CATCH
                                        Er = ERR
                                        ERRCLEAR
                                        IF Er = 70 THEN
                                          TRY
                                          SETATTR Drv$(I) + A$, %NORMAL
                                          CATCH
                                            Er= ERR
                                            ERRCLEAR
                                          END TRY
                                        END IF
                                    END TRY
                              
                                    IF Er <> 0 THEN GOTO Next_Loop_End
                                    IF (J AND 16) = 16 THEN
                                      INCR DrvCount
                                      REDIM PRESERVE Drv$(1 TO DrvCount)
                                      Drv$(DrvCount) = Drv$(I) + A$ + "\"
                                    ELSEIF (J = 0) OR (J = 128) THEN
                                      A$ = UCASE$(Drv$(I) + A$)
                                      GOSUB FindFile
                                    ELSEIF ((J AND 1) = 1) OR ((J AND 2) = 2) OR ((J AND 32) = 32) THEN
                                      A$ = UCASE$(Drv$(I) + A$)
                                      GOSUB FindFile
                                    END IF
                              
                                    Next_Loop_End: '
                                    Er = 0
                                  LOOP
                              
                                  End_Loop: '
                              
                                  INCR I
                                  IF I > DrvCount THEN EXIT DO
                                  SLEEP 0
                              
                              LOOP
                              
                              GOTO Skip_Dirs: '
                              
                              FindFile: '
                              
                              J = INSTR(-1, A$, ".")
                              
                              IF J = 0 THEN RETURN
                              
                              B$ = MID$(A$, J)
                              
                              IF (NumExtns = 1) AND (Filextn$(1) = "*.*") THEN
                                IF SrchCount = 0 THEN
                                  INCR FileFound
                                  REDIM PRESERVE FileFnd$(1 TO FileFound)
                                  FileFnd$(FileFound) = A$
                                  RETURN
                                END IF
                                GOTO Open_File
                              END IF
                              
                              FOR J = 1 TO NumExtns
                                IF B$ = Filextn$(J) THEN EXIT FOR
                              NEXT J
                              IF J > NumExtns THEN RETURN
                              
                              Open_File: '
                              
                              IF SrchCount = 0 THEN
                                INCR FileFound
                                REDIM PRESERVE FileFnd$(1 TO FileFound)
                                FileFnd$(FileFound) = A$
                                RETURN
                              END IF
                              
                              Er = 0
                              FileOpen = FREEFILE
                              
                              TRY
                                OPEN A$ FOR BINARY AS #FileOpen
                                CATCH
                                  Er = ERR
                                  ERRCLEAR
                                  IF Er = 70 THEN
                                    TRY
                                      SETATTR A$, %NORMAL
                                      Er = 0
                                      CATCH
                                        Er = ERR
                                        ERRCLEAR
                                    END TRY
                                  END IF
                              END TRY
                              
                              IF Er <> 0 THEN
                                CLOSE #FileOpen
                                RETURN
                              END IF
                              
                              Er = 0
                              TRY
                                FileLen = LOF(#FileOpen)
                                CATCH
                                  Er = ERR
                                  ERRCLEAR
                              END TRY
                              
                              IF Er THEN
                                CLOSE #FileOpen
                                RETURN
                              END IF
                              IF FileLen <= 64000 THEN
                                B$ = SPACE$(FileLen)
                              ELSE
                                B$ = SPACE$(64000)
                              END IF
                              
                              FileLoc = 1
                              
                              Continue_File_Read: '
                              
                              Er = 0
                              TRY
                                GET #FileOpen, FileLoc, B$
                                B$ = UCASE$(B$)
                                CATCH
                                  Er = ERR
                                  ERRCLEAR
                              END TRY
                              
                              IF Er THEN
                                CLOSE #FileOpen
                                RETURN
                              END IF
                              
                              FOR J = 1 TO SrchCount
                                IF INSTR(B$, SStr$(J)) <> 0 THEN
                                  INCR FileFound
                                  REDIM PRESERVE Filefnd(1 TO FileFound)
                                  FileFnd(FileFound) = A$
                                  GOTO End_File_Search
                                END IF
                              NEXT J
                              
                              FileLoc = SEEK(#FileOpen)
                              
                              IF FileLoc < FileLen THEN
                                FileLoc = FileLoc - LEN(SrchStr$) - 1
                                J = FileLen - FileLoc
                                IF J <= 64000 THEN
                                  B$ = SPACE$(J)
                                ELSE
                                  B$ = SPACE$(64000)
                                END IF
                                GOTO Continue_File_Read
                              END IF
                              
                              End_File_Search: '
                              
                              CLOSE #FileOpen
                              RETURN
                              
                              Skip_Dirs: '
                              
                              @Trd_ptr.Done = Tid
                              IF FileFound = 0 THEN
                                @trd_ptr.pAry = 0
                                @trd_ptr.AryL = -1
                                @trd_ptr.AryU = -1
                              ELSE
                                @trd_ptr.pAry = VARPTR(FileFnd$(1))
                                @trd_ptr.AryL = 1
                                @trd_ptr.AryU = FileFound
                              END IF
                              
                              THndl = @trd_Ptr.tHnd
                              
                              Suspend_Thread: '
                              
                              THREAD SUSPEND THndl TO I
                              
                              IF @trd_ptr.tClose = 0 THEN GOTO Suspend_Thread
                              
                              END FUNCTION
                              Last edited by Walt Decker; 25 Feb 2009, 09:47 PM.
                              Walt Decker

                              Comment


                              • #16
                                Code:
                                Thread_Done: '
                                J = 1
                                DO
                                  FOR M = 1 TO DrvCnt
                                 ...
                                  NEXT
                                LOOP
                                Kind of the long way here... also inefficient (not terribly material here since program is doing nothing else).

                                Code:
                                  REDIM hThread (DriveCount-1)
                                  FOR M = 0 TO DriveCount-1 
                                     THREAD CREATE    ThreadFunction  (M+1) TO hThread(M) 
                                     Sleep/Wait on ready event 
                                   NEXT
                                   WaitForMultipleObjects  DriveCount, BYVAL VARPTR(hThead(0)), %TRUE, %INFINITE
                                   ' when you get here, all thread functions have completed. 
                                  '  close your thread handles and pop a cold one.
                                MCM
                                Michael Mattias
                                Tal Systems (retired)
                                Port Washington WI USA
                                [email protected]
                                http://www.talsystems.com

                                Comment


                                • #17
                                  Michael, I thought about using "WaitForMultipleObjects", but since the data is stored in local arrays I figured it would be too late to extract the data.

                                  I also thought about using threaded arrays, since they are global, but I'm not sure whether a threaded array has a life beyond the life of the thread.
                                  Walt Decker

                                  Comment


                                  • #18
                                    since the data is stored in local arrays ...threaded arrays, since they are global
                                    ????

                                    The management of your application data is independent of your management of the program's thread handles.

                                    Bottom line, there is no such thing as "automatic thread specific application data" other than as you define by using variables LOCAL to the thread function, THREADED variables, or using the Windows' Thread Local Storage features.

                                    MCM
                                    Michael Mattias
                                    Tal Systems (retired)
                                    Port Washington WI USA
                                    [email protected]
                                    http://www.talsystems.com

                                    Comment


                                    • #19
                                      Walt,
                                      Doing it this way cut the time down to under 4 minutes.

                                      Further suggestions are appreciated.
                                      Suggestion 1, make sure your antivirus is turned off.

                                      From just over 4 minutes to just under 4 minutes is not much of an improvement, I'd have expected more.
                                      If I do a similar search using Windows search I can scan my C drive in 4m52s and my D drive in 2m50s giving a total of 7m42s. But if I set off 2 searches at the same time in 2 different Windows I finish both in 5m54s, about 25% faster. That demonstrates that we're along the right lines in searching different drives in different threads.
                                      I'd expect a bigger improvement if the drives had equal data as the last 2 minutes of the 5m54s it's just the C drive on its own as the D drive has finished.

                                      If I get time over the weekend I'll look into doing it in PB and see how fast it gets.

                                      It might be useful to look at the Performance tab in task manager (press ctl-alt-del) whle your search is taking place and you'll see how much idle time the cpu has.

                                      Paul.
                                      Last edited by Paul Dixon; 1 Mar 2009, 07:22 AM.

                                      Comment

                                      Working...
                                      X