Announcement

Collapse
No announcement yet.

Text capitalisation algo.

Collapse
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Text capitalisation algo.

    This was a test piece that someone may find useful, it will capitalise the leading character of each word in text. Limited in its use, some titles are done this way with each word capitalised.

    Code:
    ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
    
     FUNCTION capitalise(src$) as STRING
    
        #REGISTER NONE
    
        LOCAL pstr as DWORD
    
        pstr = StrPtr(src$)
    
      PREFIX "!"
        lea esi, ttable                         ; load the table address in esi
        mov edx, pstr                           ; load string address in edx
        sub edx, 1
    
      pre:
        mov edi, 0                              ; clear edi as flag
      lbl:
        add edx, 1
        movzx eax, BYTE PTR [edx]               ; get byte in edx
        test eax, eax
        jz bye                                  ; exit on terminator
        movzx ecx, BYTE PTR [esi+eax]           ; load test byte in ecx
        test ecx, ecx
        jz pre
        cmp edi, 1                              ; only let the flag edi be set once
        je lbl
        cmp ecx, 2
        je nxt
        sub BYTE PTR [edx], 32                  ; convert character to upper case
      nxt:
        mov edi, 1                              ; set edi as flag once
        jmp lbl
    
      bye:
      END PREFIX
    
        FUNCTION = src$
        Exit FUNCTION
    
      PREFIX "!"
        ttable:
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1    ; lower case only
          db 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
      END PREFIX
    
     End FUNCTION
    
    ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
    hutch at movsd dot com
    The MASM Forum

    www.masm32.com

  • #2
    What is your dedinition of "word" in this conext.

    Is it the same as the PB one used for MCASE$?

    i.e. "A word is considered to be a consecutive series of letters"

    or the one in REXEPR:

    "one or more characters that include an alphabetic character (A-Z or a-z), a numeric character (0-9), and an underscore. For example, "abc_123" is considered a single word and "abc-123" is considered two words."

    or some other definition of word?

    Comment


    • #3
      Hi Stuart,

      What consists of a word is the characters in the table. This version is still not reliable, I have a zero terminated version that works fine but this one required more work to get it right. It would be easier to convert the source to lower case first but I am trying to avoid that approach as it requires a rescan of the data. This approach is useful in that you can simply modify the character table to change its characteristics. This is the zero terminated string version that is testing OK so far.
      Code:
      ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
      
       FUNCTION capitalise(ByVal pstr as DWORD) as DWORD
      
          #REGISTER NONE
      
        PREFIX "!"
          lea esi, ttable                         ; load the table address in esi
          mov edx, pstr                           ; load string address in edx
          sub edx, 1
      
        pre:
          mov edi, 0                              ; edi is used as a flag
        lbl:
          add edx, 1
          movzx eax, BYTE PTR [edx]               ; get byte in edx
          test eax, eax
          jz bye                                  ; exit on terminator
          movzx ecx, BYTE PTR [esi+eax]           ; load test byte in ecx
          test ecx, ecx
          jz pre
          cmp edi, 1                              ; only let the edi to be set once
          je lbl
          cmp ecx, 2
          je nxt
          sub BYTE PTR [edx], 32                  ; convert character to upper case
        nxt:
          mov edi, 1                              ; set edi once
          jmp lbl
      
        bye:
        END PREFIX
      
          Exit FUNCTION
      
        PREFIX "!"
          ttable:
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
            db 2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
            db 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
            db 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
            db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
        END PREFIX
      
       End FUNCTION
      
      ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
      hutch at movsd dot com
      The MASM Forum

      www.masm32.com

      Comment


      • #4
        This is a better and more reliable version.

        Code:
        ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
        
            #compile exe "cap.exe"
            #include "\basic\include\win32api.inc"
            #include "..\macros.inc"
            #include "..\szprocs.bas"
        
        ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
        
         FUNCTION PBmain as LONG
        
            LOCAL ptxt as DWORD
            LOCAL azstr as STRINGZ * 128
        
          ' ------------------
          ' macro local string
          ' ------------------
            ptxt = localstr("This is a capitalisation test"+$CRLF,32)
            capitalise ptxt
            conout ptxt
        
          ' ----------------------
          ' zero terminated string
          ' ----------------------
            azstr = "Cannons to right of us, cannons to left of us, " + _
                    "cannons in front of us, volleyed and thundered"+$CRLF
        
            ptxt = VarPtr(azstr)
            capitalise ptxt
            conout ptxt
        
          ' --------------------
          ' basic dynamic string
          ' --------------------
            bas$ = "Cannons to right of us, cannons to left of us, " + _
                   "cannons in front of us, volleyed and thundered"+$CRLF
        
            ptxt = StrPtr(bas$)
            capitalise ptxt
            conout ptxt
        
            pause
        
         End FUNCTION
        
        ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
        
         FUNCTION capitalise(ByVal pstr as DWORD) as DWORD
        
            #REGISTER NONE
        
          PREFIX "!"
            lea esi, ttable                         ; load the table address in esi
            mov edx, pstr                           ; load string address in edx
        
            sub edx, 1
        
          pre:
            xor edi, edi
          lbl:
            add edx, 1
            movzx eax, BYTE PTR [edx]               ; get byte in edx
            test eax, eax
            jz bye                                  ; exit on terminator
            movzx ecx, BYTE PTR [esi+eax]           ; load test byte in ecx
            test ecx, ecx
            jz pre
            cmp edi, 1                              ; only let edi be set once
            je lbl
            cmp ecx, 2
            je nxt
            sub BYTE PTR [edx], 32                  ; convert character to upper case
          nxt:
            mov edi, 1                              ; set edi once
            jmp lbl
        
          bye:
          END PREFIX
        
            Exit FUNCTION
        
          PREFIX "!"
            ttable:
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
              db 2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0
              db 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
              db 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
              db 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
          END PREFIX
        
         End FUNCTION
        
        ' ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
        hutch at movsd dot com
        The MASM Forum

        www.masm32.com

        Comment

        Working...
        X