TIP: Click on subject to list as thread! ANSI
echo: power_bas
to: GARY PRICE
from: BRIAN MCLAUGHLIN
date: 1995-10-26 08:27:00
subject: Hashing in PowerBASIC 3/4

>>> Continued from previous message
Q3.3    Okay, how about a real working example of hashing in QuickBASIC,
        Quinn?  Theory is fine for CompSci freaks, but I'm a coffee and
        pizza programmer, not an egghead.
A3.3    I mentioned that one perfect use of hashing is for word distribution
        counters.  Here is one from Rich Geldreich that has been tweaked
        by me to account for some things that Rich did not know then
        about hashing table sizes.
S5.0    WORDHASH.BAS [F210S05.BAS]
'WORDHASH.BAS v1.10 By Rich Geldreich 1992
' Modified by Quinn Tyler Jackson for demonstrative purposes.
'
'Uses hashing to quickly tally up the frequency of all of the words in a
'text file. (This program assumes that words are seperated by either tab
'or space characters. Also, all words are converted to uppercase before
'the search.)
'
DEFINT A-Z
DECLARE SUB Show.Counts ()
DECLARE SUB Process.Line (A$)
DECLARE SUB UpdateFreq (A$, KeyIndex)
CONST TRUE = -1, FALSE = 0
DIM SHARED TableSize
Main:
 FileName$ = COMMAND$
 CLS
 LOCATE 1, 1
 PRINT "WORDHASH.BAS By Rich Geldreich 1992"
 PRINT "     Tweaked by Quinn Tyler Jackson 1993"
 OPEN FileName$ FOR INPUT AS #1 LEN = 16384
' In Rich's original version, the TableSize was set at 7000.  My version
' guesses at how large the table needs to be based on this:
' There are 5.5 characters in the average word.  Therefore, divide the
' text file length by 5.5.  For safety, assume that as many as
' half of those will be unique.  In normal text, half the words are in the
' hundred most common list, so this plays it pretty safe!  It will die
' if you take a file that is over about 50% unique words, however!  This
' is for NORMAL text files, not word dictionaries, where all entries are
' unique!
'
'SPLICE IN FROM EARLIER SAMPLE 4.0 IN THIS FAQ
'           VVVVVVVVVVVVV
TableSize = funFirstPrime(LOF(1) * .09)
REDIM SHARED WordTable$(TableSize)
REDIM SHARED Counts(TableSize)
DIM SHARED New.Words
 DO UNTIL EOF(1)
     LINE INPUT #1, A$
     Process.Line A$
     N = N + 1
     LOCATE 3, 1: PRINT N; "lines processed,"; New.Words; "words found"
 LOOP
SUB Process.Line (A$)
    ASEG = SSEG(A$) 'QuickBASIC 4.5 users change this to VARSEG(A$)
    AOFS& = SADD(A$)
    DEF SEG = ASEG + AOFS& \ 16
    AAddress = AOFS& AND 15
    Astart = AAddress
    AEndAddress = AAddress + LEN(A$)
    'get a word
    GOSUB GetAWord
    'update the frequency of the word until there aren't any words left
    DO WHILE Word$  ""
        UpdateFreq Word$, KeyIndex
        GOSUB GetAWord
    LOOP
    EXIT SUB
GetAWord:
    Word$ = ""
    'find a character
    P = PEEK(AAddress)
    DO WHILE (P = 32 OR P = 9) AND AAddress  AEndAddress
        AAddress = AAddress + 1
        P = PEEK(AAddress)
    LOOP
    'if not at end of string then find a space
    IF AAddress  AEndAddress THEN
        KeyIndex = 0
        GOSUB UpdateKeyIndex
        'remember where the character started
        WordStart = AAddress
        AAddress = AAddress + 1
        P = PEEK(AAddress)
        GOSUB UpdateKeyIndex
        'find the leading space
        DO UNTIL (P = 32 OR P = 9) OR AAddress = AEndAddress
            AAddress = AAddress + 1
            P = PEEK(AAddress)
            GOSUB UpdateKeyIndex
        LOOP
        KeyIndex = KeyIndex - L
        'make the word
   Word$ = UCASE$(MID$(A$, WordStart - Astart + 1, AAddress - WordStart))
    END IF
RETURN
UpdateKeyIndex:
    IF P >= 97 AND P <= 122 THEN
        L = P - 32
        KeyIndex = KeyIndex + L
    ELSE
        L = P
        KeyIndex = KeyIndex + L
    END IF
RETURN
END SUB
SUB UpdateFreq (A$, KeyIndex)
STATIC collisions
    'adjust the keyindex so its within the table
    KeyIndex = KeyIndex MOD TableSize
    'calculate an offset for retries
    IF KeyIndex = 0 THEN
        Offset = 1
    ELSE
        Offset = TableSize - KeyIndex
    END IF
    'main loop of hashing
    DO
        'is this entry empty?
        IF WordTable$(KeyIndex) = "" THEN
            'add this entry to the hash table
            WordTable$(KeyIndex) = A$
            New.Words = New.Words + 1
            IF New.Words = TableSize THEN
                BEEP
>>> Continued to next message
 * SLMR 2.1a * MAXLIB For PB v1.2 - Access arrays and files in EMS/XMS!
--- WILDMAIL!/WC v4.12 
---------------
* Origin: Com-Dat BBS - Hillsboro, OR. HST DS (1:105/314.0)

SOURCE: echomail via exec-pc

Email questions or comments to sysop@ipingthereforeiam.com
All parts of this website painstakingly hand-crafted in the U.S.A.!
IPTIA BBS/MUD/Terminal/Game Server List, © 2025 IPTIA Consulting™.