Announcement

Collapse

Forum Guidelines

This forum is for finished source code that is working properly. If you have questions about this or any other source code, please post it in one of the Discussion Forums, not here.
See more
See less

RLE Compression for PBCC / PBDLL

Collapse
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • RLE Compression for PBCC / PBDLL

    Code:
    '***************************************************************
    ' Name        : Compression, uncompression using RLE-algorithm
    ' Description : Compresses strings, most effective on bitmap files
    ' (C) 97 Jouni Vuorio - Finnland
    ' adapted for pbcc 5.0 by Ralph Berger
    
    
    $INCLUDE "win32api.inc"
    
    ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    ' rle compression
    ' not too fast - if you need more speed try using strptr
    ' in for x - loop
    '
    FUNCTION compress(BYVAL text1 AS STRING) AS STRING
        LOCAL x       AS INTEGER
        LOCAL fchar   AS INTEGER
        LOCAL flag    AS LONG
        LOCAL maxlen  AS INTEGER
        LOCAL test    AS INTEGER
        LOCAL START   AS LONG
        LOCAL ENDE    AS LONG
        LOCAL tt      AS LONG
        LOCAL procstr AS STRING
    
        ''''' count characters used
        DIM cUsed(0:255) AS LONG
        START = STRPTR(text1)
        ENDE  = START + LEN(TEXT1)
        FOR tt = START TO ENDE
            INCR cUsed(PEEK(tt))
        NEXT tt
    
        ''''' unused char is startmark for rle
        FOR fchar = 255 TO 0 STEP -1
           IF cUsed(fchar) = 0 THEN EXIT
        NEXT fchar
    
        ''''' no unused char -> no compression
        IF fchar < 0 THEN
    '       PRINT "cannot compress"
           FUNCTION = text1
           EXIT FUNCTION
        END IF
    
        procstr = text1
        ''''' loop through all chars
        FOR x = 0 TO 255
            IF cUsed(x) < 4 THEN ITERATE   ''' not interesting, skip to next char
            maxlen = MIN(136, cUsed(x))    ''' rle stores up to 256 repeats
                         ''' Range 5 to 255    try to match with your recordlen to tune up
                         
            FOR test = maxlen TO 4 STEP -1                    ''' try to find
               flag = INSTR(procStr,REPEAT$( test , CHR$(x))) ''' drill down search lenght
               IF flag = 0 THEN ITERATE
                                                              ''' start rle compression
               REPLACE MID$(procStr,flag,test) _              ''' found char * len
               WITH CHR$(fchar,test,x) _                      ''' RLE sign,len,char
               IN procStr
            NEXT test
        NEXT x
    
        FUNCTION = CHR$(255,255,fchar,255) + procStr
    END FUNCTION
    
    
    
    ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    ' decompress rle encoded strings
    ' this one should be fast enough
    '
    FUNCTION uncompress(text1 AS STRING) AS STRING
    
        LOCAL fchar   AS STRING, _
              procStr AS STRING, _
              flag    AS LONG
    
        ''' check for magic header
        IF ASC(MID$(TEXT1,1,1)) <> 255 OR _
           ASC(MID$(TEXT1,2,1)) <> 255 OR _
           ASC(MID$(TEXT1,4,1)) <> 255 THEN
    '       messagebox 0,"No Magic Header ","decompress",%MB_OK
           FUNCTION = text1
           EXIT FUNCTION
        ELSE
           fChar   = MID$(TEXT1,3,1)
           procStr = MID$(TEXT1,5)
        END IF
    
        ''' replace rle entries with real values
        DO
           flag = INSTR(procStr,fChar)
           IF flag = 0 THEN
              EXIT LOOP
           ELSE
              REPLACE MID$(procStr,flag,3) _                                         ''' RLE sign,len,char
               WITH REPEAT$(ASC(MID$(procStr,flag+1,1)), MID$(procStr, flag+2,1)) _  ''' create char * len
               IN procStr
           END IF
        LOOP
    
        FUNCTION = procStr
    END FUNCTION
    
    
    
    
    FUNCTION pbmain()
       LOCAL ins AS STRING
       LOCAL new AS STRING
       LOCAL ff AS LONG
       LOCAL I AS LONG
    
       ff = FREEFILE
       OPEN "huge.dbf" FOR BINARY SHARED AS #ff '..........select a filename
       GET$ #ff, LOF(FF), ins
       CLOSE #ff
    
       PRINT LEN(ins)
       PRINT TIMER
       neu = compress(ins)
       PRINT TIMER
       PRINT LEN(neu)
       neu = uncompress(neu)
       PRINT TIMER
       PRINT LEN(neu)
       PRINT NEU = ins
       
       ''''' suspicious ? check all bytes
       'FOR i= 1 TO LEN(ins)
       '   IF MID$(ins,i,1) <> MID$(new,i,1) THEN
       '      PRINT i, MID$(ins,i,1) , MID$(new,i,1)
       '   END IF
       'NEXT i
       EXIT FUNCTION
    
    
    END FUNCTION









  • #2
    Hi E.B. ,

    you are right with your sample string the compressed string will be the same as the original string. RLE encoding only replaces repeats of the same chraracter with a special sign followed by the number of repeats. Try a more realistic sample like a bitmap or a partially filled Array.

    RLE is the most simply file compression. To get a string like yours compressed you should look for a LZ compression routine.

    Best rgds

    Ralph Berger

    EMAIL From: E.B. Knoppert
    Betreff: Errors with RLE (1)

    I'm affraid i can't get it to work.
    The returned string is equal to provided string.
    Would you look at it?

    Thanks,
    E.B. Knoppert

    Code:
     
    $IF %DEF(%pb_dll16)
     
    FUNCTION WinMain (BYVAL hCurInstance  AS INTEGER, _
                      BYVAL hPrevInstance AS INTEGER, _
                      lpCmdLine           AS ASCIIZ, _
                      BYVAL nCmdShow      AS INTEGER) AS INTEGER
     
    
    $Else    '32Bit;
     
    FUNCTION WinMain (BYVAL hCurInstance  AS LONG, _
                      BYVAL hPrevInstance AS LONG, _
                      lpszCmdLine         AS ASCIIZ PTR, _
                      BYVAL nCmdShow      AS LONG) EXPORT AS LONG
     
    
    $ENDIF
     
    
        Dim T As String
        Dim C As String
     
        T = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
        T = T & T
        T = T & T
        T = T & T
     
        C = RLECompress(T)
     
        Msgbox Str$( Len( T )) &", " & Str$( Len( C ) ) & ", " & C
     
        FUNCTION = 1    'Success
     
    End Function
     
    ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    ' rle compression
    ' not too fast - if you need more speed try using strptr
    ' in for x - loop
    '
    FUNCTION RLECompress(BYVAL text1 AS STRING) AS STRING
     
        LOCAL x       AS INTEGER
        LOCAL fchar   AS INTEGER
        LOCAL flag    AS LONG
        LOCAL maxlen  AS INTEGER
        LOCAL test    AS INTEGER
        LOCAL START   AS LONG
        LOCAL ENDE    AS LONG
        LOCAL tt      AS LONG
        LOCAL procstr AS STRING
     
        ''''' count characters used
        DIM cUsed(0:255) AS LONG
     
        START = STRPTR(text1)
        ENDE  = START + LEN(TEXT1)
     
        FOR tt = START TO ENDE
     
            INCR cUsed( PEEK( tt ) )
     
        NEXT tt
     
        ''''' unused char is startmark for rle
        FOR fchar = 255 TO 0 STEP -1
     
           IF cUsed(fchar) = 0 THEN EXIT
     
        NEXT fchar
     
        ''''' no unused char -> no compression
        IF fchar < 0 THEN
     
           Msgbox "cannot compress"
     
           FUNCTION = text1
     
           EXIT FUNCTION
     
        END IF
     
        procstr = text1
     
        ''''' loop through all chars
        FOR x = 0 TO 255
     
            IF cUsed(x) < 4 THEN ITERATE   ''' not interesting, skip to next char
     
            maxlen = MIN(136, cUsed(x))    ''' rle stores up to 256 repeats
                         ''' Range 5 to 255    try to match with your recordlen to tune up
                         
            FOR test = maxlen TO 4 STEP -1                    ''' try to find
     
               flag = INSTR(procStr,REPEAT$( test , CHR$(x))) ''' drill down search lenght
     
               IF flag = 0 THEN ITERATE
                                                              ''' start rle compression
               REPLACE MID$(procStr,flag,test) _              ''' found char * len
               WITH CHR$(fchar,test,x) _                      ''' RLE sign,len,char
               IN procStr
     
            NEXT test
     
        NEXT x
     
        FUNCTION = CHR$(255,255,fchar,255) + procStr
     
    END FUNCTION
     
    ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
    ' decompress rle encoded strings
    ' this one should be fast enough
    '
    FUNCTION RLEDeCompress(text1 AS STRING) AS STRING
     
        LOCAL fchar   AS STRING, _
              procStr AS STRING, _
              flag    AS LONG
     
        ''' check for magic header
        IF ASC(MID$(TEXT1,1,1)) <> 255 OR _
           ASC(MID$(TEXT1,2,1)) <> 255 OR _
           ASC(MID$(TEXT1,4,1)) <> 255 THEN
     
    '       messagebox 0,"No Magic Header ","decompress",%MB_OK
     
           FUNCTION = text1
           EXIT FUNCTION
     
        ELSE
     
           fChar   = MID$(TEXT1,3,1)
           procStr = MID$(TEXT1,5)
     
        END IF
     
        ''' replace rle entries with real values
        DO
     
           flag = INSTR(procStr,fChar)
     
           IF flag = 0 THEN
     
              EXIT LOOP
     
           ELSE
     
              REPLACE MID$(procStr,flag,3) _                                         ''' RLE sign,len,char
              WITH REPEAT$(ASC(MID$(procStr,flag+1,1)), MID$(procStr, flag+2,1)) _  ''' create char * len
              IN procStr
     
           END IF
     
    LOOP
     
    FUNCTION = procStr
     
    END FUNCTION

    Comment


    • #3
      ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      ' rle compression
      ' not too fast - if you need more speed try using strptr
      ' in for x - loop
      ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      ' (in the first version there is a problem - in large fields of equal chars
      ' it sometimes cautions a mistake (control char on a place of third char)
      ' normally is sequention controlChar_numberOfChars_Char and bad sequention
      ' was in several files controlChar_numberOfChars_controlChar !!)
      ' Compression is now faster (not enough!)
      '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      FUNCTION RLECompress(BYVAL text1 AS STRING) AS STRING

      LOCAL x AS INTEGER
      LOCAL fchar AS INTEGER
      LOCAL flag AS LONG
      LOCAL maxlen AS INTEGER
      LOCAL test AS INTEGER
      LOCAL START AS LONG
      LOCAL ENDE AS LONG
      LOCAL tt AS LONG
      LOCAL procstr AS STRING
      LOCAL prevChr AS BYTE
      LOCAL aktChr AS BYTE
      LOCAL countChr AS LONG
      LOCAL maxChr AS LONG
      LOCAL flagChr AS BYTE
      LOCAL START2 AS LONG

      ''''' count characters used
      DIM cUsed(0:255) AS LONG
      DIM cSmysl(0:255) AS LONG
      rem in cSmysl field are characters with min.one repetition

      START = STRPTR(text1)
      ENDE = START + LEN(TEXT1)

      maxChr=0
      countChr=1
      FOR tt = START TO ENDE
      aktChr=PEEK( tt )
      INCR cUsed(aktChr)
      IF tt=START THEN
      prevChr=aktChr
      flagChr=0
      ELSE
      IF prevChr=aktChr THEN
      rem Is that a sequention or only one char?
      INCR countChr
      maxChr=MAX(maxChr,countChr)
      rem Maximum repeated chars
      ELSE
      countChr=1
      flagChr=0
      END IF
      prevChr=aktChr
      IF flagChr=0 AND countChr>3 THEN
      flagChr=1
      INCR cSmysl(aktChr)
      rem aktChr is in the file with repetition min.4 chars
      END IF
      END IF
      NEXT tt
      ''''' no unused char -> no compression
      maxChr=MIN(maxChr,255) 'maxChr can be max 255 - type overflow (byte)
      IF maxChr < 4 THEN
      rem no usefull repetition
      FUNCTION = text1
      EXIT FUNCTION
      END IF

      ''''' unused char is startmark for rle
      FOR fchar = 255 TO 0 STEP -1
      IF cUsed(fchar) = 0 THEN EXIT FOR
      NEXT fchar

      ''''' no unused char -> no compression
      IF fchar < 0 THEN
      FUNCTION = text1
      EXIT FUNCTION
      END IF

      procstr = text1

      ''''' loop through all chars
      FOR x = 0 TO 255
      IF cUsed(x) < 4 THEN ITERATE FOR ''' not interesting, skip to next char
      IF cSmysl(x)=0 THEN ITERATE FOR 'not interesting (group of chars < 4), skip to next char
      maxlen = MAX(MIN(maxChr, cUsed(x)), 5)''' Range 5 to max length of repetition
      FOR test = maxlen TO 4 STEP -1
      flag = INSTR(procStr,REPEAT$( test , CHR$(x))) ''' drill down search lenght
      IF flag = 0 THEN ITERATE FOR
      ''' start rle compression
      WHILE flag<>0
      START2=STRPTR(procStr)

      IF PEEK(START2+flag-3)=fchar THEN
      rem IF MID$(procStr,flag-2,1)=CHR$(fchar) THEN 'slower alternative
      rem Is position of flag the third position of previous controlChars ? - I must skip it or shift it to right
      IF test>4 THEN
      INCR flag
      testnov&=test-1
      rem I shift it to right
      ELSE
      rem I skip it
      ITERATE FOR
      END IF
      ELSE
      testnov&=test
      END IF
      procStr=LEFT$(procStr,flag-1)+CHR$(fchar,testnov&,x)+MID$(procstr,flag+testnov&)
      flag = INSTR( flag+2, procStr, REPEAT$( testnov& , CHR$(x)))
      rem I find next "test" sequention
      WEND
      REM next code is potentialy dangerous - no control about left third byte (fchar)
      rem function REPLACE is "blind" to control chars in a text
      REM REPLACE MID$(procStr,flag,test) _ ''' found char * len
      REM WITH CHR$(fchar,test,x) _ ''' RLE sign,len,char
      REM IN procStr
      NEXT test
      NEXT x
      FUNCTION = CHR$(255,255,fchar,255) + procStr
      END FUNCTION

      ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
      ' decompress rle encoded strings
      ' this one should be fast enough
      '
      FUNCTION RLEDeCompress(text1 AS STRING) AS STRING

      LOCAL fchar AS STRING, _
      procStr AS STRING, _
      flag AS LONG

      ''' check for magic header
      IF ASC(MID$(TEXT1,1,1)) <> 255 OR _
      ASC(MID$(TEXT1,2,1)) <> 255 OR _
      ASC(MID$(TEXT1,4,1)) <> 255 THEN
      REM PRINT "It is not RLE file"
      FUNCTION = text1
      EXIT FUNCTION
      ELSE
      fChar = MID$(TEXT1,3,1)
      procStr = MID$(TEXT1,5)
      END IF

      ''' replace rle entries with real values
      DO
      flag = INSTR(procStr,fChar)
      IF flag = 0 THEN
      EXIT LOOP
      ELSE
      rem without a previous control in Compression is this function dangerous
      rem (sequention controlChar_numberOfChars_controlChar is a suicide for a file )
      REPLACE MID$(procStr,flag,3) _ ''' RLE sign,len,char
      WITH REPEAT$(ASC(MID$(procStr,flag+1,1)), MID$(procStr, flag+2,1)) _ ''' create char * len
      IN procStr
      END IF
      LOOP

      FUNCTION = procStr

      END FUNCTION


      ------------------
      petrschreiber@gmail.com

      Comment

      Working...
      X