Announcement

Collapse
No announcement yet.

how to write an incremental backup application?

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • #21
    seem to be having trouble editing my last post!
    here's slightly more tested code
    Code:
    #PBFORMS CREATED V1.51
    #COMPILE EXE
    #DIM ALL
    
    #PBFORMS BEGIN INCLUDES
    #IF NOT %DEF(%WINAPI)
        #INCLUDE "WIN32API.INC"
    #ENDIF
    #PBFORMS END INCLUDES
    
    #PBFORMS BEGIN CONSTANTS
    %IDD_DIALOG1     =  101
    %IDC_LABEL1      = 1001
    %IDC_LABEL2      = 1002
    %IDC_LISTBOX1    = 1003
    %IDC_CURRFILE_TB = 1005
    %IDC_lastfile_tb = 1004
    %IDC_BUTTON1     = 1007
    %IDC_LABEL4      = 1008
    %IDC_MINCHUNK_TB = 1009
    %IDC_F1SIZE_TB   = 1010
    %IDC_LABEL3      = 1011
    %IDC_F2SIZE_TB   = 1012
    %IDC_LABEL5      = 1013
    #PBFORMS END CONSTANTS
    
    #PBFORMS DECLARATIONS
    
    GLOBAL gsLast, gsCurr AS STRING
    GLOBAL ghDlg AS DWORD
    GLOBAL gminchunksize AS LONG
    GLOBAL gstack() AS LONG
    GLOBAL gstackptr AS LONG
    '-----------------------------------------------------------------------------------
    SUB pop ( BYREF l AS LONG)
        IF gstackptr -1 < 0 THEN
            l = 0
        ELSE
            DECR gstackptr
            l = gstack(gstackptr)
        END IF
    END SUB
    '-----------------------------------------------------------------------------------
    SUB push ( BYVAL l AS LONG)
        gstack(gstackptr) = l
        INCR gstackptr
    END SUB
    '-----------------------------------------------------------------------------------
    ' try to find chunks of gsCurr delimited by llow & lhigh, in gsLast
    FUNCTION compare ( lhigh AS LONG ) AS LONG
        LOCAL i, n AS LONG
        LOCAL s AS STRING
    
        n = lhigh/gminchunksize
        REDIM glptrs ( 0 TO n -1, 2) AS LONG
        FOR i = 0 TO n-1
            glptrs(i, 0) = 1 + (i * gminchunksize)                                ' pos in current file
            s = MID$(gsCurr,i*gminchunksize +1 , gminchunksize )
            glptrs(i, 1) = INSTR(gsLast, s)                                       ' pos in last known file
            glptrs(i, 2) = gminchunksize                                          ' chunksize
            IF glptrs(i, 1) <> 0 THEN
                LISTBOX ADD ghDlg, %IDC_LISTBOX1, "curr: " + STR$(glptrs(i, 0)) + " last: " + STR$(glptrs(i, 1)) + " len: " + STR$(glptrs(i, 2))
            END IF
        NEXT
        
    END FUNCTION
    
    '-----------------------------------------------------------------------------------
    CALLBACK FUNCTION ShowDIALOG1Proc()
        STATIC sLastFile, sCurrFile AS STRING
        LOCAL s AS STRING
        LOCAL l AS LONG
    
        SELECT CASE AS LONG CBMSG
            CASE %WM_INITDIALOG
                ghDlg = CBHNDL
                DIM gstack(0 TO 31) AS GLOBAL LONG
    
            CASE %WM_COMMAND
                SELECT CASE AS LONG CBCTL
                    CASE %IDC_BUTTON1
                        IF CBCTLMSG = %BN_CLICKED OR CBCTLMSG = 1 THEN
                            LISTBOX RESET CBHNDL, %IDC_LISTBOX1
                            CONTROL GET TEXT CBHNDL, %IDC_LASTFILE_TB TO sLastFile
                            CONTROL GET TEXT CBHNDL, %IDC_CURRFILE_TB TO sCurrFile
                            CONTROL GET TEXT CBHNDL, %IDC_MINCHUNK_TB TO s
                            gMinChunksize = VAL(TRIM$(s))
                            OPEN sCurrFile FOR BINARY AS #1
                            l = LOF(1)
                            gsCurr = STRING$(l,0)
                            CONTROL SET TEXT CBHNDL, %IDC_F1SIZE_TB, STR$(LOF(1))
                            GET #1, 1, gsCurr
                            CLOSE #1
                            OPEN sLastfile FOR BINARY AS #1
                            gsLast = STRING$(LOF(1),0)
                            CONTROL SET TEXT CBHNDL, %IDC_F2SIZE_TB, STR$(l)
                            GET #1, 1, gsLast
                            CLOSE #1
                            compare ( l)
                        END IF
    
                END SELECT
        END SELECT
    END FUNCTION
    
    '----------------------------------------------------------------------
    FUNCTION ShowDIALOG1(BYVAL hParent AS DWORD) AS LONG
        LOCAL lRslt AS LONG
    
    #PBFORMS BEGIN DIALOG %IDD_DIALOG1->->
        LOCAL hDlg  AS DWORD
    
        DIALOG NEW hParent, "file differences by Sancho Panza's ***", 33, 81, 528, 121, %WS_POPUP OR %WS_BORDER OR %WS_DLGFRAME OR _
            %WS_SYSMENU OR %WS_CLIPSIBLINGS OR %WS_VISIBLE OR %DS_MODALFRAME OR %DS_3DLOOK OR %DS_NOFAILCREATE OR %DS_SETFONT, _
            %WS_EX_CONTROLPARENT OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR, TO hDlg
        CONTROL ADD LABEL,   hDlg, %IDC_LABEL1, "name of last known file", 5, 5, 85, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR _
            %SS_CENTERIMAGE, %WS_EX_LEFT OR %WS_EX_LTRREADING
        CONTROL ADD LABEL,   hDlg, %IDC_LABEL2, "name of current file", 5, 40, 85, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR _
            %SS_CENTERIMAGE, %WS_EX_LEFT OR %WS_EX_LTRREADING
        CONTROL ADD LISTBOX, hDlg, %IDC_LISTBOX1, , 180, 15, 340, 100, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %WS_HSCROLL OR _
            %WS_VSCROLL OR %LBS_HASSTRINGS, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
        CONTROL ADD TEXTBOX, hDlg, %IDC_lastfile_tb, "1.txt", 5, 20, 60, 15
        CONTROL ADD TEXTBOX, hDlg, %IDC_CURRFILE_TB, "2.txt", 5, 55, 60, 15
        CONTROL ADD TEXTBOX, hDlg, %IDC_MINCHUNK_TB, "64", 5, 90, 25, 15, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %ES_LEFT OR _
            %ES_AUTOHSCROLL OR %ES_NUMBER, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
        CONTROL ADD LABEL,   hDlg, %IDC_LABEL4, "minimum chunk size", 5, 75, 85, 15
        CONTROL ADD BUTTON,  hDlg, %IDC_BUTTON1, "start", 125, 105, 45, 15
        CONTROL ADD TEXTBOX, hDlg, %IDC_F1SIZE_TB, "", 70, 20, 80, 15, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %ES_LEFT OR _
            %ES_AUTOHSCROLL OR %ES_READONLY, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
        CONTROL ADD LABEL,   hDlg, %IDC_LABEL3, "bytes", 150, 20, 20, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR %SS_CENTERIMAGE, _
            %WS_EX_LEFT OR %WS_EX_LTRREADING
        CONTROL ADD TEXTBOX, hDlg, %IDC_F2SIZE_TB, "", 70, 55, 80, 15, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %ES_LEFT OR _
            %ES_AUTOHSCROLL OR %ES_READONLY, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
        CONTROL ADD LABEL,   hDlg, %IDC_LABEL5, "bytes", 150, 55, 20, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR %SS_CENTERIMAGE, _
            %WS_EX_LEFT OR %WS_EX_LTRREADING
    #PBFORMS END DIALOG
    
        DIALOG SHOW MODAL hDlg, CALL ShowDIALOG1Proc TO lRslt
    
    #PBFORMS BEGIN CLEANUP %IDD_DIALOG1
    #PBFORMS END CLEANUP
    
        FUNCTION = lRslt
    END FUNCTION
    
    '==================================================================================================================================
    FUNCTION PBMAIN()
        ShowDIALOG1 %HWND_DESKTOP
    END FUNCTION

    Comment


    • #22
      Chris, I thought it might be good to 1st test for the general extent of changes in the modified file, then further test only within that range. Here's an idea to quickly determine the edited range:

      Code:
      #COMPILE EXE
      #DIM ALL
      %STRINGLEN = 20000000
      %ARRAYsIZE = %STRINGLEN \ 4 -1
      
      FUNCTION PBMAIN () AS LONG
      
          LOCAL fileOrig, fileMod AS STRING, ii AS LONG
      
          ? "First make some test strings (representing files)..."
      '    OPEN "c:\tRndfile1.dat" FOR BINARY AS #1
      '    OPEN "c:\tRndfile2.dat" FOR BINARY AS #2
          fileOrig = SPACE$(%STRINGLEN)
          DIM fileOrigArr(%ARRAYsIZE) AS LONG AT STRPTR(fileOrig)
          FOR ii = 0 TO %ARRAYsIZE    'fill string w/ data
             fileOrigArr(ii) = RND * &h7fffffff
          NEXT
          fileMod = fileOrig
          
          ? "Ok, got two supposedly equal strings, so let's for proof of concept verify it DWORD way..."
          DIM fileModArr(%ARRAYsIZE) AS LONG AT STRPTR(fileMod)
          FOR ii = 0 TO %ARRAYsIZE    'verify they're =
             IF fileOrigArr(ii) <> fileModArr(ii) THEN
                ? "Error--some major glitch" & STR$(ii)
                ? HEX$(fileOrigArr(ii)) & $CRLF & HEX$(fileModArr(ii))
                EXIT FUNCTION
             END IF
          NEXT
      
          ? "And try the easy PB way for double verify..."
          IF fileMod <> fileOrig THEN ? "Error--some major glitch"
      
          ? "Strings match--as we expected. Now modify the fileMod string..."
          fileMod = LEFT$(fileMod, 8000000) & "Let's add a few characters here...& a few numbers 1234556778900" & _
                    MID$(fileMod, 8000001)
      '              PUT #1,, fileOrig       'for testing you can uncomment these lines
      '              PUT #2,, fileMod
      '              CLOSE
      
          ? "Modification complete. Let's find the beginning and end of the modification now..."
          REDIM fileModArr(LEN(fileMod) \ 4) AS LONG AT STRPTR(fileMod) '1st get the beginning change point
          FOR ii = 0 TO LEN(fileMod) \ 4
             IF fileOrigArr(ii) <> fileModArr(ii) THEN
                ? "Found beginning of difference at DWORD #" & STR$(ii + 1) & $CRLF & _
                  "Original file had: " & HEX$(fileOrigArr(ii)) & $CRLF & _
                  "Changed file has: " & HEX$(fileModArr(ii))
                  EXIT FOR
             END IF
          NEXT
      
          ? "Now find the end of change by reading the files backwards"
                                               'redim the changed-file-array to exactly line up with its last byte...
          REDIM fileModArr(%ARRAYsIZE) AS LONG AT STRPTR(fileMod) + LEN(fileMod) - (%ARRAYsIZE + 1) * 4
          FOR ii = %ARRAYsIZE TO 0 STEP -1     'now get end of change by reading backwards
             IF fileOrigArr(ii) <> fileModArr(ii) THEN
                ? "Found end of difference at DWORD #" & STR$(ii + 2) & $CRLF & _
                  "Original file had: " & HEX$(fileOrigArr(ii)) & $CRLF & _
                  "Changed file has: " & HEX$(fileModArr(ii))
                  EXIT FOR
             END IF
          NEXT
          ? "done"
      END FUNCTION

      Comment


      • #23
        This program may do it. Sounds like every write request at the system level is archived for selected files. Would like to see how this is done, too.
        http://www.acertant.com/web/versomatic/default.htm

        Comment


        • #24
          Originally posted by John Gleason View Post
          1st test for the general extent of changes in the modified file
          John, that's an interesting technique, to work inwards from the ends of the file looking for matching chunks, but it depends upon the files being in the same sequence, and we can't make that assumption.

          Using the array as I did above is not brilliant as more searches are required than are theoretically necessary, and some matching sequences will be incompletely identified, but it does show that it is pretty easy to identify data in the current file which matches data in the reconstructed previous generation. Even using this crude technique will result in a considerably smaller (than the current file size) deltas for typical masterfiles where data are not very volatile.

          Using a binary chop approach (like I tried in the first example, but the muse was not on me) would be a lot more efficient, as we will look for bigger matching chunks first. I will try again with this.

          There is a problem here with spurious matches, because they will happen as chunk sizes diminish (chunk size 1 is almost certain to result in in a spurious match, for example) due to chopping and they will prevent smaller chunks at the ends of bigger ones from being amalgamated into the bigger chunks, so I will have to work around that one.

          What we end up with is a delta file. The file structure will be something like:
          • identifier block - date, time, path of file being backed up, etc
          • descriptor block - array of descriptors, length, offset, file index
          • unmatched data block - the data from the current file which could not be matched in the reconstructed previous generation.

          Comment


          • #25
            Originally posted by Mike Doty View Post
            This program may do it...
            That's an interesting one!

            Comment


            • #26
              Originally posted by Chris Holbrook View Post
              Using a binary chop approach ...would be a lot more efficient, as we will look for bigger matching chunks first. I will try again with this.
              See code below.
              Originally posted by Chris Holbrook View Post
              Using a binary chop approach ...would be a lot more efficient, as we will look for bigger matching chunks first. I will try again with this.
              See code below. Also please note the length of the (DDT) source file - 100 lines, one of the reasons I am staying with PB.

              In testing I used (amongst other test data) a SQLite database of 11Mb where the previous and current versions differed by one table. The result was that about 500K of data was not matched. This would have resulted in a tiny delta (the whole point of the application), and roughly corresponded to the increased size of the "current" version of the database over the "previous". So that's "Proof of Concept"!

              Code:
              #COMPILE EXE
              #DIM ALL
              #INCLUDE "WIN32API.INC"
              %IDD_DIALOG1     =  101
              %IDC_LABEL1      = 1001
              %IDC_LABEL2      = 1002
              %IDC_LISTBOX1    = 1003
              %IDC_CURRFILE_TB = 1005
              %IDC_lastfile_tb = 1004
              %IDC_BUTTON1     = 1007
              %IDC_LABEL4      = 1008
              %IDC_F1SIZE_TB   = 1010
              %IDC_LABEL3      = 1011
              %IDC_F2SIZE_TB   = 1012
              %IDC_LABEL5      = 1013
              
              GLOBAL gsLast, gsCurr AS STRING
              GLOBAL ghDlg AS DWORD
              GLOBAL gbytesmatched AS LONG
              '-----------------------------------------------------------------------------------
              SUB chop (llow AS LONG, lhigh AS LONG)
                  LOCAL p AS LONG
                  LOCAL s AS STRING
                  
                  s = MID$(gsCurr, llow, lhigh-llow)
                  p = INSTR(gsLast, s)
                  IF p = 0 THEN
                      chop (llow, llow + ((lhigh-llow)/2)-1  )
                      chop (llow + ((lhigh-llow)/2), lhigh)
                  ELSE
                      gbytesmatched = gbytesmatched + lhigh-llow
                      LISTBOX ADD ghDlg, %IDC_LISTBOX1, "curr: " + STR$(llow) + " last: " + STR$(p) + " len: " + STR$(lhigh-llow)
                  END IF
              END SUB
              '-----------------------------------------------------------------------------------
              CALLBACK FUNCTION ShowDIALOG1Proc()
                  STATIC sLastFile, sCurrFile AS STRING
                  LOCAL s AS STRING
                  LOCAL l AS LONG
              
                  SELECT CASE AS LONG CBMSG
                      CASE %WM_INITDIALOG
                          ghDlg = CBHNDL
                      CASE %WM_COMMAND
                          SELECT CASE AS LONG CBCTL
                              CASE %IDC_BUTTON1
                                  IF CBCTLMSG = %BN_CLICKED OR CBCTLMSG = 1 THEN
                                      LISTBOX RESET CBHNDL, %IDC_LISTBOX1
                                      CONTROL GET TEXT CBHNDL, %IDC_LASTFILE_TB TO sLastFile
                                      CONTROL GET TEXT CBHNDL, %IDC_CURRFILE_TB TO sCurrFile
                                      OPEN sCurrFile FOR BINARY AS #1
                                      l = LOF(1)
                                      gsCurr = STRING$(l,0)
                                      CONTROL SET TEXT CBHNDL, %IDC_F1SIZE_TB, STR$(LOF(1))
                                      GET #1, 1, gsCurr
                                      CLOSE #1
                                      OPEN sLastfile FOR BINARY AS #1
                                      gsLast = STRING$(LOF(1),0)
                                      CONTROL SET TEXT CBHNDL, %IDC_F2SIZE_TB, STR$(l)
                                      GET #1, 1, gsLast
                                      CLOSE #1
                                      gbytesmatched = 0
                                      chop (1,l)
                                      LISTBOX ADD CBHNDL, %IDC_LISTBOX1, "total matched: " + STR$(gbytesmatched)
                                  END IF
                          END SELECT
                  END SELECT
              END FUNCTION
              '----------------------------------------------------------------------
              FUNCTION ShowDIALOG1(BYVAL hParent AS DWORD) AS LONG
                  LOCAL lRslt AS LONG
                  LOCAL hDlg  AS DWORD
              
                  DIALOG NEW hParent, "file differences by Sancho Panza's ***", 33, 81, 528, 121, %WS_POPUP OR %WS_BORDER OR %WS_DLGFRAME OR _
                      %WS_SYSMENU OR %WS_CLIPSIBLINGS OR %WS_VISIBLE OR %DS_MODALFRAME OR %DS_3DLOOK OR %DS_NOFAILCREATE OR %DS_SETFONT, _
                      %WS_EX_CONTROLPARENT OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR, TO hDlg
                  CONTROL ADD LABEL,   hDlg, %IDC_LABEL1, "name of last known file", 5, 5, 85, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR _
                      %SS_CENTERIMAGE, %WS_EX_LEFT OR %WS_EX_LTRREADING
                  CONTROL ADD LABEL,   hDlg, %IDC_LABEL2, "name of current file", 5, 40, 85, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR _
                      %SS_CENTERIMAGE, %WS_EX_LEFT OR %WS_EX_LTRREADING
                  CONTROL ADD LISTBOX, hDlg, %IDC_LISTBOX1, , 180, 15, 340, 100, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %WS_HSCROLL OR _
                      %WS_VSCROLL OR %LBS_HASSTRINGS, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
                  CONTROL ADD TEXTBOX, hDlg, %IDC_lastfile_tb, "enter name of previous version of file", 5, 20, 90, 15
                  CONTROL ADD TEXTBOX, hDlg, %IDC_CURRFILE_TB, "enter name of current version of file", 5, 55, 90, 15
                  CONTROL ADD BUTTON,  hDlg, %IDC_BUTTON1, "start", 125, 105, 45, 15
                  CONTROL ADD TEXTBOX, hDlg, %IDC_F1SIZE_TB, "", 100, 20, 50, 15, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %ES_LEFT OR _
                      %ES_AUTOHSCROLL OR %ES_READONLY, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
                  CONTROL ADD LABEL,   hDlg, %IDC_LABEL3, "bytes", 150, 20, 20, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR %SS_CENTERIMAGE, _
                      %WS_EX_LEFT OR %WS_EX_LTRREADING
                  CONTROL ADD TEXTBOX, hDlg, %IDC_F2SIZE_TB, "", 100, 55, 50, 15, %WS_CHILD OR %WS_VISIBLE OR %WS_TABSTOP OR %ES_LEFT OR _
                      %ES_AUTOHSCROLL OR %ES_READONLY, %WS_EX_CLIENTEDGE OR %WS_EX_LEFT OR %WS_EX_LTRREADING OR %WS_EX_RIGHTSCROLLBAR
                  CONTROL ADD LABEL,   hDlg, %IDC_LABEL5, "bytes", 150, 55, 20, 15, %WS_CHILD OR %WS_VISIBLE OR %SS_LEFT OR %SS_CENTERIMAGE, _
                      %WS_EX_LEFT OR %WS_EX_LTRREADING
                  DIALOG SHOW MODAL hDlg, CALL ShowDIALOG1Proc TO lRslt
                  FUNCTION = lRslt
              END FUNCTION
              '==================================================================================================================================
              FUNCTION PBMAIN()
                  ShowDIALOG1 %HWND_DESKTOP
              END FUNCTION

              Comment


              • #27
                i never heard of this BackupPc but wanted to post a fyi on it.

                http://backuppc.sourceforge.net/

                http://wiki.contribs.org/BackupPC

                i do use smeserver ver 7.?
                p purvis

                Comment


                • #28
                  Time for my 2.5 cents worth. I majored in backup methods during my stint in systems support.

                  First, under no circumstances would I think it justifiable to write my own backup software solution, especially if I were contemplating using compuression or encryption methods. A failed backup is useless, and I would insist on a thoroughly tested and highly recommended product for the job.

                  Second, Windows is restrictive when it comes to letting you access certain files at certain times, and any joe-hickey programmer is going to be faced with some major problems about getting access to all files, and writing a backup in real time with applications running and new entries being made on the fly. You need a product that can deal with that.

                  Third, you have to pay close attention to the device you are using for your backup. As MCM stated, tape sucks big time. So do floppies, CDs, DVDs, and just about anything other than hard drives. And for critical backup situations, hard drives also suck unless set up in a RAID configuration that ensures data integrity and survivable. For survivability, that means archiving off site as well.

                  So, from my perspective and experience, this topic is simply off the mark.

                  Comment


                  • #29
                    FWIT: I backup to to two machines on my home network. Here's my incremental backup:
                    Code:
                    xcopy d:\*.* "\\Machine1\masterbackup\*.*" /a/v/s
                    xcopy d:\*.* "\\Machine2\masterbackup\*.*" /m/v/s
                    In the first one, the /A switch copies only those with the archive bit set but doesn't reset the bit.

                    In the second one, the /M copies and resets the archive bit.
                    There are no atheists in a fox hole or the morning of a math test.
                    If my flag offends you, I'll help you pack.

                    Comment


                    • #30
                      Originally posted by Donald Darden View Post
                      Time for my 2.5 cents worth. I majored in backup methods during my stint in systems support.

                      First, under no circumstances would I think it justifiable to write my own backup software solution, especially if I were contemplating using compuression or encryption methods. A failed backup is useless, and I would insist on a thoroughly tested and highly recommended product for the job.

                      Second, Windows is restrictive when it comes to letting you access certain files at certain times, and any joe-hickey programmer is going to be faced with some major problems about getting access to all files, and writing a backup in real time with applications running and new entries being made on the fly. You need a product that can deal with that.

                      Third, you have to pay close attention to the device you are using for your backup. As MCM stated, tape sucks big time. So do floppies, CDs, DVDs, and just about anything other than hard drives. And for critical backup situations, hard drives also suck unless set up in a RAID configuration that ensures data integrity and survivable. For survivability, that means archiving off site as well.

                      So, from my perspective and experience, this topic is simply off the mark.
                      Sorry, Joe. I credited MCM with putting the dump on using tapes, when you were the one that pointed out how bad they are. Another big problem with tapes (besides how unreliable they are, and time consuming to use), is that they really do not work well for partial restores. A good backup program should let you select specific folders and files for restores, but tapes lack the ability to precisely reposition within an archive for this purpose. Tapes are also subject to high wear rates because their surfaces are in touch with many surfaces, whereas hard drives avoid having the heads actually touch the surface, so hard drives only tend to die due to excessive heat and wear on the motor and bearings. Magnetic tapes are stretched by the wrap.unwrap around two spools, and subject to flexing as they wind around surfaces and rubbed against the head. The bonding glue that keeps the magnetic oxide attached to the tape breaks down over time, causing the head and other surfaces to collect sticky iron oxide coatings, which add more abrasive action to the wear on the tapes. The only thing going for tape is that it was once the only high storage media available, and that it is relatively cheap. But for backup purposes, reliability and other factors are of far greater importance.

                      Comment


                      • #31
                        A good backup program should let you select specific folders and files for restores, but tapes lack the ability to precisely reposition within an archive for this purpose
                        ???
                        Microsoft Backup lets you do selective restore. So does BackupExec, which I used on prior system.

                        (Hard to believe an 'included with Windows' Microsoft utlity is actually useful in the Real World, huh? But it's true in this case!)
                        Michael Mattias
                        Tal Systems (retired)
                        Port Washington WI USA
                        [email protected]
                        http://www.talsystems.com

                        Comment


                        • #32
                          Originally posted by Donald Darden View Post
                          First, under no circumstances would I think it justifiable to write my own backup software solution, especially if I were contemplating using compuression or encryption methods. A failed backup is useless, and I would insist on a thoroughly tested and highly recommended product for the job.
                          I'll try to bear that in mind.

                          Joe Hickey-Programmer, BSc (Windmills)

                          Comment


                          • #33
                            Years ago I use to use Drive Image from PartitionMagic for company backups and Jaz drives for company backups. They had about three compression settings that you could specify, and I tried using the highest, figuring the added time for compression would not hurt because the process was somewhat slow to begin with, and the drives only offered 1GB storage capacity (we were using 2 GB hard drives in the 386s that we had).

                            The backups appeared to be fine, at first. But as we added more applications and data, we hit a critical point where one backup failed to restore properly. Somehow the compression algorythm caused the restore to think it was done when only about 40 percent of the restore was finished. The same software and data on other PCs showed the same problem. I reduced the compression ratio to the lowest setting, backed up a PC that was intact, and was able to restore it completely to the effected machine.

                            From then on, I stuck with the lowest setting. I sometimes debated whether I could have used the midrange setting instead, but I wanted to distance myself from the setting that had caused the problem, and the lowest setting was adequate for getting the job done.

                            It's a fact that higher compression increases the risk of data failure, and that is not good when it comes to backups. I've also found that despite the reputed error correction capabilities attributed to CDs and DVDs, these are far more prone to failure than hard drives are. And the labor of switching disks, their slow burn speed, and having to make new labels or write on the surface to identify them are all time consuming. There is another problem with removable storage is that you can have compatibility issues between disks burned and read on different drives. The problem is not as bad as it use to be, but like I said, a failed backup is useless. Actually, it is far worse than useless, because it gives you the false impression that you are covered against a possible failure.

                            As system support and performing some of the duties of system administrator, my real concern was to protect my machines and their contents by whatever means were possible. Now how would I explain to someone that I decided to trust my own skills as a programmer to write a backup process, where there are commercial-grade products ready and able to do the job? Even if I trust myself to do a good job, why should anyone else accept my judgment in this matter?

                            I had to use compresson algorythms (PKZip) to compress and store data onto the hard drives, because with just 2 GB or storage, I had to squeeze every byte of space that I could from what I had. I spent months developing and refining my programs, and the data was tested and retested to make sure that the algorythms did not alter the contents. That meant I had to write the data to disk, compress it into a smaller file, extract it from the smaller file, and compare it byte for byte with the uncompressed data before I could delete the uncompressed version. Slow, but really essential if you cannot afford to compromise your data by possible errors in handling. So I well understood the need to handle compressed data and maintain storage, but again, I depended heavily on other backup solutions for long term and offsite storage.

                            Comment


                            • #34
                              postscript

                              Performance of the incremental backup approach which I have outlined above is interesting, because unlike compression backups, the elapsed time depends on the number and size of matching chunks found, and the minimum chunk size. Therefore if there are no matches, all searches will terminate when the miniumum chunk size is reached. This is very time consuming and leads to a slow backup - the number of searches is dictated by the target file size and the search duration, by the size of the base file. However, when the files mostly match, performance is very good and the delta is tiny, as would be expected. The restore time is very fast in either case.

                              Comment


                              • #35
                                Testable version attached

                                The attached zip file contains a testable version exe for comments. I have to say that there is absolutely no warranty and it is used entirely at your own risk. I would really appreciate comments on it, my skin is thick and my shoulders are broad - at least, metaphorically.

                                To summarise, the application creates a delta file by comparing two files, the base file and the file to back up. The delta contains differences and metadata which enables the file which is backed up to be restored by referring to the base file. The application handles the restore, too.

                                There is no compression of the delta. So if there are few differences, the delta will be small, and if the files are very different, then the delta will be large. The delta can be smaller than the size of the file differences because if there are repeating blocks of data, only a single copy is needed.

                                There is no encryption of data. There is a checksum calculation to make reasonably sure that a delta does not get added to the wrong base file, and to check that the recreated file is "just like" the backed up file.

                                The run time depends on the file size, the correspondence between the two files, and the amount of memory in your PC. Because the method looks for matching data, in dissimilar files this can take a long time! Equally, if your PC has less memory than your goldfish, it could be coffee time.

                                Where would you use this (subject to successful testing), as a backup solution? In situations where large files with a relatively small rate of change were encountered.

                                Where else would you use it (subject to successful testing)? To synchronise remote copies of large files without downloading the whole file.
                                Attached Files

                                Comment


                                • #36
                                  Beyond Compare

                                  I got rid of tape backup long ago and quit doing backups to CDROM also, both unreliable and slow.

                                  I backup to an external USB hard drive using Beyond Compare, by scootersoftware.com. BC is awesome for comparing folders and files also.

                                  I use DriveImage for image backups of the C: drive on my Windows ME PC, but since PowerQuest was bought by Symantec, I didn't upgrade.

                                  I guess I should get an image backup of my XP PCs, but I've only had to recover once since I bought them.

                                  Comment

                                  Working...
                                  X