Announcement

Collapse

Forum Guidelines

This forum is for finished source code that is working properly. If you have questions about this or any other source code, please post it in one of the Discussion Forums, not here.
See more
See less

REGEXPR and REGREPL demo

Collapse
X
 
  • Filter
  • Time
  • Show
Clear All
new posts

  • REGEXPR and REGREPL demo

    This is the PD code included with my article, Regular Expressions: Do They Live Up to Their Potential? which appeared in the Augus 1999 issue of BASICally Speaking
    Code:
     
    $IF 0
    '  FILE: Regexpression.bas FOR PB/CC 2.0
    '  9.17.99  For BASICally Speaking article.
    '  Author: Michael Mattias Racine WI USA
    '  Released to the public domain by the author
    $ENDIF
    #REGISTER NONE
    #DIM ALL
    #DEBUG ERROR ON
    #OPTION VERSION4
    $INCLUDE "WIN32API.INC"
    
    DECLARE FUNCTION LoadFile (FileName AS STRING) AS STRING
    
    FUNCTION WinMain (BYVAL hCurInstance  AS LONG, _
                      BYVAL hPrevInstance AS LONG, _
                      lpszCmdLine         AS ASCIIZ PTR, _
                      BYVAL nCmdShow      AS LONG) EXPORT AS LONG
    
       DIM ThisFile AS ASCIIZ * %Max_Path
       DIM Stat AS LONG
       DIM J$
       Stat = GetModuleFileName (hCurInstance, ThisFile, SIZEOF(ThisFile) )
       PRINT "Hello World from Program " & ThisFile
       J$ = WAITKEY$
       CALL Reg1
       J$ = WAITKEY$
    END FUNCTION
    
    SUB Reg1
       DIM TheFile AS STRING, FileData AS STRING ' , H AS LONG
       DIM I AS LONG
       DIM J$
       ' REGEXPR, REREPL variables
       DIM Mask AS STRING, posvar AS LONG, lenvar AS LONG, startvar AS LONG
       DIM Mask2 AS STRING
       DIM TheMain AS STRING               ' 'MAIN' IS AN UNDOCUMENTED KEYWORD
       DIM TheWord AS STRING
       DIM NewMain AS STRING
       DIM ReplaceMask AS STRING
    
    '  ****    REGEXPR demos   *****
    '   GOSUB PbHelpFile                        ' Tested 9/29/99 OK
    '   GOSUB PhoneNumbers                      ' tested 9/29/99 OK
    '   GOSUB AlternateSpelling                 ' tested 9/29/99 OK
    '   GOSUB FindEachWordInString              ' tested 9/29/99 OK
    '   GOSUB FindAllLabels_InSourceCode        ' tested 9/29/99 OK
    '   GOSUB ThreewayAlternation               ' tested 9/29/99 OK
    '   GOSUB RegExprLineInput                  ' tested 9/29/99 OK
    '   GOSUB FindAllProceduresInSourceCode     ' tested 9/29/99 OK
    
    ' ****  REGREPL demos     *****
    '   GOSUB CompressSpaces                    ' tested 9/29/99 OK
    '   GOSUB ReplaceEachWordInStringWithPlural ' tested 9/29/99 OK
    '   GOSUB ParseCSVFile                      ' tested 9/29/99 OK
       GOSUB CreateDeclareFile                 ' tested 9/30/99 OK
    
        EXIT SUB
    
    PBHelpFile:
       TheMain = "Send your comments to BASICally Speaking at [email protected]"
       Mask    = "[a-z_.-][email protected][a-z_.-]+
       REGEXPR Mask IN TheMain TO PosVar, LenVar
       PRINT "REGEXPR returns:" & MID$(TheMain, PosVar, lenVar)
       WAITKEY$
         RETURN
    
    AlternateSpelling:
      Mask = "Colo[u]?r"      ' a 'u' in the indicated position is optional
      TheMain = " The color of the leaves is more brilliant than the colour of the television."
      GOSUB RegExprLoop
       RETURN
    
    CompressSpaces:
       TheMain = " The color   of the    leaves is more brilliant than the colour of     the television."
       Mask    = "\x20[\x20]+"          ' find space followed by one or more spaces
       ReplaceMask = "\x20"             ' replace with a single space.
       PosVar = 1 :  StartVar = 1
       PRINT "TheMain Begins as " & $DQ & TheMain & $DQ
       WHILE StartVar
          REGREPL Mask IN TheMain WITH ReplaceMask AT StartVar TO Posvar, NewMain
          StartVar = PosVar
          TheMain = NewMain
       WEND
       PRINT "TheMain Converted to "  & $DQ & NewMain & $DQ
        RETURN
    
    RegExprLoop:
    ' INPUT: TheMain, Mask.  OUTPUT: multiple REGEXPR results
      PosVar = 1:   StartVar = 1
      WHILE PosVar
        REGEXPR Mask IN TheMain AT StartVar TO PosVar,Lenvar
        IF PosVar THEN
             TheWord = MID$(TheMain, Posvar, LenVar)
             PRINT TheWord
             StartVar = PosVar + LenVar
        END IF
      WEND
        RETURN
    
    RegExprLineInput:
       TheFile = "Regexpression.BAS"
       FileData = LoadFile (TheFile)
       PRINT "Size of file is " & STR$(LEN(FileData))
       WAITKEY$: StartVar = 1: PosVar   = 1
       Mask = "\n"                       ' \n = newline
       WHILE PosVar
          REGEXPR Mask IN FileData AT StartVar TO PosVar, LenVar
          IF PosVar THEN
             TheWord = MID$(FileData, StartVar, PosVar - StartVar-1)  ' NOTE: must remove the newline character
             PRINT "x";TheWord;"x"
             StartVar = PosVar + LenVar
          END IF
       WEND
          RETURN
    
    PhoneNumbers:
     Mask = "\([0-9][0-9][0-9]\)-[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]"
     TheMain = " call me at (900)-123-4567"
     REGEXPR Mask IN TheMain TO PosVar,LenVar
     IF PosVar THEN PRINT MID$(TheMain, PosVar, LenVar)
        RETURN
    
    ThreewayAlternation:
      TheMain = " Two is too many to do the job "
      Mask = "(to\b)|(too\b)|(two\b)"     ' works fine 2, 9, 18 found
    '  Mask = "(to)|(too)|(two)\b"         ' did not find too(9), but did find to (18) and Two (2)  BUGBUGBUG
    '  Mask = "((to)|(too)|(two))\b"        ' found Two (2) and to (18) but not too (9)
    '  Mask = "[(to)|(too)|(two)]\b"       ' found single 'o' chararters at 4, 11, 19 and 22
    '  Mask = "[(to)|(too)|(two)]+\b"      ' found Two(2), too(9), to (18) and 'o' at 22 (in 'do')
       GOSUB RegExprLoop
         RETURN
    
    ReplaceEachWordInStringWithplural:
       TheMain = " Now is the time for    all good men" & $CRLF & "to come to the aid of his party."
       'Find each word in a string      WORKS  9/17/99 even with embedded spaces, CRLF is an undocumented word boundary
       LET mask = "([a-z]+)\b"        ' \b = word boundary
       LET ReplaceMask = "\01s"       ' for each word, append an 's'
       StartVar = 1: PosVar =1
       WHILE Posvar
          REGREPL Mask IN TheMain WITH ReplaceMask AT Startvar TO Posvar, NewMain
          TheMain = NewMain
          REGEXPR Mask IN TheMain AT StartVar TO Posvar, LenVar
          IF PosVar THEN
             TheWord = MID$(TheMain, posvar,Lenvar): PRINT TheWord
             StartVar = PosVar + Lenvar + 1
          END IF
       WEND
       PRINT NewMain
         RETURN
    
    FindEachWordInString:
       TheMain = " Now is the time for    all good men" & $CRLF & "to come to the aid of his party."
       LET mask = "[a-z]+\b"        ' \b = word boundary. CRLF is (undocumented) word boundary
       GOSUB RegExprLoop
         RETURN
    
    FindAllLabels_InSourceCode:
      TheFile = "Regexpression.bas"
      TheMain = LoadFile(TheFile)
      Mask = "\b[a-z0-9._]+:[\x20]*\r"   ' \n does not work, because CR precedes LF at end-of-line
      GOSUB RegExprLoop
        RETURN
    
    ParseCSVFile:
    ' file in:  "Value","value", "value",crlf, "value", "value", "value", Crlf.....
      FileData = $DQ & "Smith" & $DQ & "," & $DQ & "John"   & $DQ & "," & $DQ & "555-1234" & $DQ & $CRLF & _
                 $DQ & "Doe"   & $DQ & "," & $DQ & "Jane"   & $DQ & "," & $DQ & "555-6789" & $DQ & $CRLF & _
                 $DQ & "Brown" & $DQ & "," & $DQ & "Willie" & $DQ & "," & $DQ & "555-8765" & $DQ & $CRLF
    
      DIM WordMask AS STRING
      WordMask = "[a-z0-9-]+"      ' a 'standard' word does not allow the dash, so we create out own
    
      Mask = "\q(" & WordMask & ")\q"
      ReplaceMask = "\01"
      PRINT FileData
      PRINT "*** END OF FILEDATA OF LEN=" & STR$(LEN(FileData)) & "***"
    
      J$ = WAITKEY$
      PRINT:  PRINT
      PosVar = 1: StartVar = 1
      WHILE Posvar
          REGEXPR Mask IN FileData AT Startvar TO PosVar, LenVar   ' find items within quotes
          IF PosVar THEN
            REGREPL Mask IN FileData WITH ReplaceMask AT Startvar TO Posvar, NewMain   ' posvar is AFTER the replacement
            FileData = NewMain
           ' for demo only, get the value  using RegExpr again, just looking for words
            REGEXPR WordMask IN FileData AT StartVar TO posvar, lenVar
            TheWord = MID$(FileData, posvar,Lenvar)
            PRINT TheWord, LEN(TheWord)
            StartVar = PosVar + LenVar       ' reset the start var to follow the word
         END IF                              ' if we even found the item to replace
      WEND
      PRINT Filedata                        ' show the modified file
         RETURN
    
    FindAllProceduresInSourceCode:
      TheFile = "Regexpression.bas"
      TheMain = LoadFile(TheFile)
      Mask = "^[\x20]*((SUB)|(FUNCTION))[\x20]+[a-z]+"  ' DOES WHAT I WANT IT TO SUB|FUNCTION procName
       GOSUB RegExprLoop
         RETURN
    
    
    CreateDeclareFile:
    
      TheFile = "Regexpression.bas"
      FileData = LoadFile(TheFile)
      Mask = "^[\x20]*((SUB)|(FUNCTION))[\x20]+([a-z]+)([a-z0-9_\*(\) ,]*$)"   ' gets all, but only the
          ' first line of SUB/FUNCTIONs which use _ continuation to next line
      ReplaceMask = "DECLARE \01 \04 \05"
      PosVar = 1
      PRINT "START OUTPUT for Mask = " & mask
      WHILE PosVar
            REGEXPR Mask IN FileData AT StartVar TO PosVar, LenVar        ' extract "SUB subname" as its own string
            TheWord = MID$(FileData,Posvar,Lenvar)
            REGREPL Mask IN TheWord WITH ReplaceMask AT 1 TO I,NewMain    ' create the "DECLARE SUB subname parameters" string
            PRINT NewMain
            StartVar = PosVar + LenVar + 1                                ' reset pointer
      WEND
         RETURN
    
    
    END SUB
    
      FUNCTION   XXX  AS LONG
         ' dummy function, used in FindAllProceduresInSourceCode; indented on purpose
      END FUNCTION
    
    FUNCTION ManyParameters (A AS SINGLE, B AS DOUBLE, C AS SECURITY_ATTRIBUTES, Z_A AS STRING * 8) AS LONG
       ' dummy function for demo
    END FUNCTION
    
    
    FUNCTION LoadFile (FileName AS STRING) AS STRING
      DIM H AS LONG, FData AS STRING
      H = FREEFILE
      OPEN FileName FOR BINARY AS H BASE = 0
      GET$ H, LOF(H), FData
      CLOSE H
      FUNCTION = FData
    END FUNCTION
    ------------------
    Michael Mattias
    Racine WI USA
    [email protected]
    Michael Mattias
    Tal Systems Inc. (retired)
    Racine WI USA
    [email protected]
    http://www.talsystems.com
Working...
X