A.2.2   Producing the Data Table Con’t
A.2.2.2   Listing of TABBUILD

PROGRAM TABBUILD (TEXTFILE, OUTPUT,

{ total table and one for each character }

T_TABLE, B_TABLE, L_TABLE, N_TABLE,

J_TABLE, R_TABLE, S_TABLE, NA_TABLE,

{ ditto for indexes }

T_INDEX, B_INDEX, L_INDEX, N_INDEX,

J_INDEX, R_INDEX, S_INDEX, NA_INDEX);

CONST WLENGTH = 20; { maximum word length }

TYPE TOKEN_TYPE = PACKED ARRAY [1..WLENGTH] OF CHAR;

PCH2 = PACKED ARRAY [1..2] OF CHAR;

PCH3 = PACKED ARRAY [1..3] OF CHAR;

REFLINK = ^REFREC;

{ record for each word token encountered }

REFREC = RECORD

SEQNO : INTEGER;

ID : PCH2;

SECTNO : PCH2;

PAGENO : PCH3;

LINENO : INTEGER;

NEXTREF : REFLINK { i.e. a linked list }

END;

PTR = ^TREE_NODE;

{ record for each word type encountered }

TREE_NODE = RECORD

WORD : TOKEN_TYPE;

COUNT : INTEGER;

LENGTH : INTEGER;

LPTR, RPTR : PTR;

FIRSTREF : REFLINK; { ptr to a refrec }

LASTREF : REFLINK

END;

INDEX_RECORD = RECORD

KEYWORD : [KEY] TOKEN_TYPE;

TABRECNO : INTEGER;

END;

INDEX = FILE OF INDEX_RECORD;

BRANCHES = (LEFT, RIGHT);

 

 

VAR TEXTFILE : TEXT; { input text }

{ output tables }

B_TABLE, L_TABLE, N_TABLE : TEXT;

J_TABLE, R_TABLE, S_TABLE : TEXT;

T_TABLE, NA_TABLE, NULL_TABLE : TEXT;

{ output indexes }

B_INDEX, L_INDEX, N_INDEX : INDEX;

J_INDEX, R_INDEX, S_INDEX : INDEX;

T_INDEX, NA_INDEX, NULL_INDEX : INDEX;

{ total counts }

B_COUNT, L_COUNT, N_COUNT : INTEGER;

J_COUNT, R_COUNT, S_COUNT : INTEGER;

T_COUNT, NA_COUNT, NULL_COUNT : INTEGER;

{ current output records numbers for use with indexes }

B_CURRECNO, L_CURRECNO, N_CURRECNO : INTEGER;

J_CURRECNO, R_CURRECNO, S_CURRECNO : INTEGER;

T_CURRECNO, NA_CURRECNO, NULL_CURRECNO : INTEGER;

LENGTH : INTEGER;

LINENO : INTEGER;

SEQNO : INTEGER; { unique sequence no. for all tokens }

NSTCH : SET OF CHAR;

CHCODE : INTEGER;

CURRENT_TOKEN : TOKEN_TYPE;

BLANK_TOKEN : TOKEN_TYPE; { blank token }

{ pointers to the roots of trees }

B_F_ENTRY, L_F_ENTRY, N_F_ENTRY : PTR;

J_F_ENTRY, R_F_ENTRY, S_F_ENTRY : PTR;

T_F_ENTRY, NA_F_ENTRY, NULL_F_ENTRY : PTR;

ENTRY : PTR;

REF : REFLINK;

SC, OLDSC : PCH2; { current and previous speech codes }

PREV_WORD_AT_EOLN : BOOLEAN;

SECTNO : PCH2;

PAGENO : PCH3;

 

 

VALUE BLANK_TOKEN := (WLENGTH OF ' ');

{ characters not to be stored either as part of tokens,

or as tokens in their own right }

NSTCH := ['.','-',',','"','''',';',

':','!','?','(',')',' '];

 

 

{**************************************************************}

 

 

PROCEDURE INITIALIZE;

 

{ FUNCTION: Sets up initial variables

CALLED BY: Tabbuild

CALLS: Nothing }

 

BEGIN { Initialize }

NSTCH := NSTCH + [CHR(12)]; { add in form feed char }

SEQNO := 0;

PREV_WORD_AT_EOLN := FALSE;

{ blank out total counts }

T_COUNT := 0;

B_COUNT := 0;

L_COUNT := 0;

N_COUNT := 0;

J_COUNT := 0;

R_COUNT := 0;

S_COUNT := 0;

NA_COUNT := 0;

{ set pointers to start of each tree to NIL }

T_F_ENTRY := NIL;

B_F_ENTRY := NIL;

L_F_ENTRY := NIL;

N_F_ENTRY := NIL;

J_F_ENTRY := NIL;

R_F_ENTRY := NIL;

S_F_ENTRY := NIL;

NA_F_ENTRY := NIL;

SC := ' '; { blank out speech code };

OLDSC := ' ';

LINENO := 0;

PAGENO := '***';

{ set output record numbers for each index to 0 }

T_CURRECNO := 0;

B_CURRECNO := 0;

L_CURRECNO := 0;

N_CURRECNO := 0;

J_CURRECNO := 0;

R_CURRECNO := 0;

S_CURRECNO := 0;

NA_CURRECNO := 0;

{ open textfile to be processed }

OPEN (FILE_VARIABLE := TEXTFILE,

FILE_NAME := 'TEXTFILE',

HISTORY := READONLY,

ACCESS_METHOD := SEQUENTIAL,

RECORD_TYPE := VARIABLE,

ORGANIZATION := SEQUENTIAL);

RESET (TEXTFILE);

END; { Initialize }

 

{----------------------------------------------------}

 

PROCEDURE SPACE (VAR OUTFILE : TEXT);

{ Current output file }

 

{ FUNCTION: Writes a space in the current output file

CALLED BY: Write_entry, Write_Refline

CALLS: Nothing }

 

BEGIN { Space }

WRITE (OUTFILE, ' ')

END; { Space }

 

{----------------------------------------------------}

 

PROCEDURE GET_TOKEN (VAR NEW_TOKEN : TOKEN_TYPE);

{ Token into which word is put }

 

{ FUNCTION: Gets a single word from the input file

CALLED BY: Tabbuild

CALLS: Nothing }

 

VAR POSN : INTEGER;

CH : CHAR;

 

BEGIN { Get_Token}

{ initialize }

NEW_TOKEN := BLANK_TOKEN;

POSN := 1;

{ check conditions associated with changing lines }

IF PREV_WORD_AT_EOLN THEN { previous token at end of line }

BEGIN

PREV_WORD_AT_EOLN := FALSE;

LINENO := LINENO + 1; { so increment line number }

END;

{ process token }

REPEAT { until ch = ' ' or eof }

READ (TEXTFILE, CH);

{ check current character }

IF CH IN NSTCH THEN { process any punctuation correctly }

BEGIN

IF CH = '"' THEN { ignore ( and do not store) all other

NSTCH characters }

IF SC = ' ' THEN

SC := OLDSC { resume current speech code }

ELSE

BEGIN { save current speech code }

OLDSC := SC;

SC := ' ';

END;

END

ELSE { part of word }

BEGIN

NEW_TOKEN[POSN] := CH; { place current ch in array }

POSN := POSN + 1;

END;

{ if at end of current line, set global flag for next

invocation of GET_TOKEN }

IF EOLN (TEXTFILE) THEN PREV_WORD_AT_EOLN := TRUE;

UNTIL (CH = ' ') OR EOF (TEXTFILE);

LENGTH := POSN - 1;

END; { Get_Token }

 

{------------------------------------------------}

 

PROCEDURE NEW_ENTRY (NEW_WORD : TOKEN_TYPE;

{ New word to be inserted }

BRANCH : BRANCHES;

{ Where this node will be linked

relative to its parent }

PREVNODE : PTR;

{ Pointer to parent of this node }

VAR CUR_F_ENTRY : PTR);

{ Root of this tree }

 

{ FUNCTION: Creates and fills a new entry

CALLED BY: Putintree

CALLS: Nothing }

BEGIN { New_Entry }

NEW (ENTRY); { create a new record }

ENTRY^.WORD := NEW_WORD;

ENTRY^.COUNT := 1;

ENTRY^.LENGTH := LENGTH;

ENTRY^.LPTR := NIL;

ENTRY^.RPTR := NIL;

NEW (REF);

ENTRY^.FIRSTREF := REF;

REF^.SEQNO := SEQNO;

REF^.ID := SC;

REF^.SECTNO := SECTNO;

REF^.PAGENO := PAGENO;

REF^.LINENO := LINENO;

REF^.NEXTREF := NIL;

ENTRY^.LASTREF := REF; { set pointer to end of list }

IF CUR_F_ENTRY = NIL THEN

CUR_F_ENTRY := ENTRY { if this is first node }

ELSE

IF BRANCH = LEFT THEN

PREVNODE^.LPTR := ENTRY

ELSE { BRANCH = RIGHT }

PREVNODE^.RPTR := ENTRY

END; { New_entry }

 

{-------------------------------------------}

 

FUNCTION SEARCH (SEARCH_WORD : TOKEN_TYPE;

{ Word to be searched for }

VAR BRANCH : BRANCHES;

{ Gets set to branch search went down}

VAR LINK,

{ Pointer to current node in tree }

PREVNODE : PTR)

{ Pointer to node last searched }

: BOOLEAN; { Function returns boolean }

 

{ FUNCTION: Checks to see if a word is already in the tree

CALLED BY: Putintree

CALLS: Nothing }

 

VAR FOUND : BOOLEAN;

BEGIN { Search }

FOUND := FALSE;

REPEAT

{ until next node to be searched doen't exist or word found }

IF SEARCH_WORD < LINK^.WORD THEN { before }

BEGIN

BRANCH := LEFT;

PREVNODE := LINK;

LINK := LINK^.LPTR;

END

ELSE

IF SEARCH_WORD > LINK^.WORD THEN { after }

BEGIN

BRANCH := RIGHT;

PREVNODE := LINK;

LINK := LINK^.RPTR;

END

ELSE { found }

FOUND := TRUE;

UNTIL (LINK = NIL) OR (FOUND = TRUE);

SEARCH := FOUND;

END; { Search }

 

{--------------------------------------------------}

 

PROCEDURE PUTINTREE (NEW_WORD : TOKEN_TYPE;

{ Word to be put in tree }

VAR CUR_F_ENTRY : PTR;

{ Start of current tree }

VAR CUR_COUNT : INTEGER);

{ Total word count }

 

{ FUNCTION: Puts a word in tree

CALLED BY: Select_Tree

CALLS: Search, New_Entry }

 

VAR LINK, PREVNODE : PTR;

BRANCH : BRANCHES;

CH : CHAR;

FOUND : BOOLEAN;

 

BEGIN { Putintree }

CUR_COUNT := CUR_COUNT + 1;

SEQNO := SEQNO + 1;

BRANCH := LEFT; { actual value doesn't matter }

{ uncapitalise first letters }

CHCODE := ORD (NEW_WORD[1]);

IF (CHCODE > 64) AND (CHCODE < 91) THEN

NEW_WORD[1] := CHR (CHCODE + 32);

IF CUR_F_ENTRY = NIL THEN { no nodes in tree }

NEW_ENTRY(NEW_WORD, BRANCH, NIL, CUR_F_ENTRY)

{ left value irrelevant }

ELSE

BEGIN { tree must be searched }

LINK := CUR_F_ENTRY;

FOUND := SEARCH (NEW_WORD, BRANCH, LINK, PREVNODE);

IF FOUND THEN { word found }

BEGIN

LINK^.COUNT := LINK^.COUNT + 1; { increment count }

NEW (REF); { create new reference }

LINK^.LASTREF^.NEXTREF := REF; { link into reflist }

LINK^.LASTREF := REF; { make lastref point to curr. ref }

REF^.SEQNO := SEQNO;

REF^.ID := SC; { set current speech code }

REF^.SECTNO := SECTNO;

REF^.PAGENO := PAGENO;

REF^.LINENO := LINENO;

REF^.NEXTREF := NIL;

END

ELSE

NEW_ENTRY (NEW_WORD, BRANCH, PREVNODE, CUR_F_ENTRY);

END

END; { Putintree }

 

{---------------------------------------------}

 

PROCEDURE SELECT_TREE (NEW_WORD : TOKEN_TYPE);

{ Word to be put in tree }

 

{ FUNCTION: Selects tree for each word

CALLED BY: Main

CALLS: Putintree }

 

BEGIN

{ put into overall tree }

PUTINTREE (NEW_WORD, T_F_ENTRY, T_COUNT);

{ then put into separate trees }

IF SC = 'B ' THEN

PUTINTREE (NEW_WORD, B_F_ENTRY, B_COUNT)

ELSE

IF SC = 'L ' THEN

PUTINTREE (NEW_WORD, L_F_ENTRY, L_COUNT)

ELSE

IF SC = 'N ' THEN

PUTINTREE (NEW_WORD, N_F_ENTRY, N_COUNT)

ELSE

IF SC = 'J ' THEN

PUTINTREE (NEW_WORD, J_F_ENTRY, J_COUNT)

ELSE

IF SC = 'R ' THEN

PUTINTREE (NEW_WORD, R_F_ENTRY, R_COUNT)

ELSE

IF SC = 'S ' THEN

PUTINTREE (NEW_WORD, S_F_ENTRY, S_COUNT)

ELSE

IF SC = 'NA' THEN

PUTINTREE (NEW_WORD, NA_F_ENTRY, NA_COUNT)

ELSE

IF SC = ' ' THEN

PUTINTREE (NEW_WORD, NULL_F_ENTRY, NULL_COUNT);

END;

 

{--------------------------------------------}

 

PROCEDURE PROCESS_TOKEN (NEW_TOKEN : TOKEN_TYPE);

{ Input token }

 

{ FUNCTION: Filters out special tokens

CALLED BY: Main

CALLS: Select_Tree }

 

VAR TEMPNO : PCH3;

CH, FIRST_CHAR : CHAR;

TINDEX, PINDEX : INTEGER;

 

BEGIN

FIRST_CHAR := NEW_TOKEN[1];

CASE FIRST_CHAR OF

'@' : { section number token }

BEGIN { set new section no. }

SECTNO[1] := NEW_TOKEN[2];

SECTNO[2] := NEW_TOKEN[3];

LINENO := LINENO - 1; { ignore line with chap. no. }

END;

'*' : { page number token }

BEGIN { set new page no. }

TEMPNO[1] := NEW_TOKEN[2];

TEMPNO[2] := NEW_TOKEN[3];

TEMPNO[3] := NEW_TOKEN[4];

{ right justify pageno }

TINDEX := 3;

REPEAT

CH := TEMPNO[TINDEX];

IF CH = ' ' THEN TINDEX := TINDEX - 1;

UNTIL CH <> ' ';

PINDEX := 3;

PAGENO := ' ';

REPEAT

PAGENO[PINDEX] := TEMPNO[TINDEX];

PINDEX := PINDEX - 1;

TINDEX := TINDEX - 1;

UNTIL TINDEX = 0;

LINENO := 0; { ignore line containing page no. }

END;

'#' : { speech code token }

BEGIN { set new speech code }

SC[1] := NEW_TOKEN[2];

SC[2] := NEW_TOKEN[3];

END;

' ' : { blank - do nothing };

OTHERWISE SELECT_TREE (NEW_TOKEN) { word to be put into tree }

END; { Case }

END; { Process_Token }

 

{--------------------------------------------}

 

PROCEDURE INDEX_ENTRY (KEYWORD : TOKEN_type;

{ Keyword for current index entry }

VAR CUR_INDEX : INDEX;

{ Current index file }

NOFRECS : INTEGER;

{ Number of records for this entry }

VAR CUR_CURRECNO : INTEGER);

{ Current record number in current

table output file }

 

{ FUNCTION: Write an index entry for the current table record

CALLED BY: Write_Entry

CALLS: Nothing }

 

VAR CUR_ENTRY : INDEX_RECORD;

 

BEGIN

CUR_ENTRY.KEYWORD := KEYWORD;

CUR_ENTRY.TABRECNO := CUR_CURRECNO;

WRITE (CUR_INDEX, CUR_ENTRY);

CUR_CURRECNO := CUR_CURRECNO + NOFRECS;

END;

 

{---------------------------------------------}

 

PROCEDURE WRITE_ENTRY (CUR_TREE : PTR;

{ Current tree for output }

VAR CUR_TABLE : TEXT;

{ Table to be created }

VAR CUR_INDEX : INDEX;

{ Index to be created }

VAR CUR_CURRECNO : INTEGER;

{ Current output record }

CUR_COUNT : INTEGER);

{ Count of total number of

tokens for this table}

 

{ FUNCTION: Write a single tree node

CALLED BY: Do_Strip

CALLS: Index_Entry }

VAR FREQ : REAL;

NOFRECS : INTEGER;

CURREF : REFLINK;

REFS_WRITTEN : INTEGER;

 

BEGIN { Write_entry }

NOFRECS := 0;

REFS_WRITTEN := 0;

WRITE (CUR_TABLE, '*'); { start of type marker }

WRITE (CUR_TABLE, CUR_TREE^.WORD:WLENGTH);

WRITE (CUR_TABLE, CUR_TREE^.COUNT:5); { count for whole text }

SPACE (CUR_TABLE); { frequency for whole text }

FREQ := CUR_TREE^.COUNT / CUR_COUNT;

WRITE (CUR_TABLE, FREQ:10:6);

SPACE (CUR_TABLE);

WRITE (CUR_TABLE, CUR_TREE^.LENGTH:2);

WRITELN (CUR_TABLE); { first line contains main info only }

NOFRECS := NOFRECS + 1;

CURREF := CUR_TREE^.FIRSTREF;

REPEAT { until end of ref. list }

WRITE (CUR_TABLE, CURREF^.SEQNO:5);

SPACE (CUR_TABLE);

WRITE (CUR_TABLE, CURREF^.ID);

SPACE (CUR_TABLE);

WRITE (CUR_TABLE, CURREF^.SECTNO);

SPACE (CUR_TABLE);

WRITE (CUR_TABLE, CURREF^.PAGENO);

SPACE (CUR_TABLE);

WRITE (CUR_TABLE, CURREF^.LINENO:2);

SPACE (CUR_TABLE);

REFS_WRITTEN := REFS_WRITTEN + 1;

CURREF := CURREF^.NEXTREF;

IF CURREF = NIL THEN { last reference }

WRITE (CUR_TABLE, '$') { $ to terminate list of refs }

ELSE { separate references }

WRITE (CUR_TABLE, '/'); { use / as ref separator }

IF (REFS_WRITTEN MOD 2) = 0 THEN { at end of current line }

BEGIN

WRITELN (CUR_TABLE); { start new line if 2 refs written }

NOFRECS := NOFRECS + 1;

END

UNTIL CURREF = NIL;

IF (REFS_WRITTEN MOD 2) = 1 THEN

BEGIN

WRITELN (CUR_TABLE, ' '); { pad table }

NOFRECS := NOFRECS + 1

END;

INDEX_ENTRY (CUR_TREE^.WORD, CUR_INDEX, NOFRECS, CUR_CURRECNO);

END; { Write_entry }

 

{---------------------------------------------}

 

PROCEDURE DO_STRIP (CUR_TREE : PTR;

{ Tree to be stripped }

VAR CUR_TABLE : TEXT;

{ Table for output }

VAR CUR_INDEX : INDEX;

{ Index for this table }

VAR CUR_CURRECNO,

{ Current output record number }

CUR_COUNT : INTEGER);

{ Total count for this tree }

 

{ FUNCTION: Recursively Strip Tree

CALLED BY: StripTree

CALLS: Itself, Write_Entry }

BEGIN { Do_Strip }

IF CUR_TREE <> NIL THEN

BEGIN

DO_STRIP(CUR_TREE^.LPTR, CUR_TABLE, CUR_INDEX,

CUR_CURRECNO, CUR_COUNT); { left branch }

WRITE_ENTRY (CUR_TREE, CUR_TABLE, CUR_INDEX,

CUR_CURRECNO, CUR_COUNT); { current node }

DO_STRIP(CUR_TREE^.RPTR, CUR_TABLE, CUR_INDEX,

CUR_CURRECNO, CUR_COUNT); { left branch }

END;

END; { Do_Strip }

 

{-------------------------------------------}

 

PROCEDURE STRIPTREE (CUR_TREE : PTR;

{ Tree to be stripped }

VAR CUR_TABLE : TEXT;

{ Table for output }

VAR CUR_INDEX : INDEX;

{ Index for this table }

VAR CUR_CURRECNO,

{ Current output record number }

CUR_COUNT : INTEGER);

{ Total count for this tree }

 

{ FUNCTION: Strip a tree into a table and index it

CALLED BY: Strip_All_Trees

CALLS: Do_Strip }

BEGIN { Striptree }

OPEN (FILE_VARIABLE := CUR_TABLE,

HISTORY := NEW,

RECORD_LENGTH := 40,

ACCESS_METHOD := SEQUENTIAL,

RECORD_TYPE := FIXED,

ORGANIZATION := SEQUENTIAL);

REWRITE (CUR_TABLE);

OPEN (FILE_VARIABLE := CUR_INDEX,

HISTORY := NEW,

ACCESS_METHOD := KEYED,

RECORD_TYPE := FIXED,

ORGANIZATION := INDEXED);

REWRITE (CUR_INDEX);

DO_STRIP(CUR_TREE, CUR_TABLE, CUR_INDEX,

CUR_CURRECNO, CUR_COUNT);

CLOSE (CUR_TABLE);

CLOSE (CUR_INDEX);

END; { Striptree }

 

{--------------------------------------}

 

PROCEDURE STRIP_ALL_TREES;

 

{ FUNCTION: Make calls to strip all trees

CALLED BY: Main

CALLS: Striptree }

 

BEGIN

STRIPTREE (T_F_ENTRY, T_TABLE, T_INDEX, T_CURRECNO, T_COUNT);

STRIPTREE (B_F_ENTRY, B_TABLE, B_INDEX, B_CURRECNO, B_COUNT);

STRIPTREE (L_F_ENTRY, L_TABLE, L_INDEX, L_CURRECNO, L_COUNT);;

STRIPTREE (N_F_ENTRY, N_TABLE, N_INDEX, N_CURRECNO, N_COUNT);

STRIPTREE (J_F_ENTRY, J_TABLE, J_INDEX, J_CURRECNO, J_COUNT);

STRIPTREE (R_F_ENTRY, R_TABLE, R_INDEX, R_CURRECNO, R_COUNT);

STRIPTREE (S_F_ENTRY, S_TABLE, S_INDEX, S_CURRECNO, S_COUNT);

STRIPTREE (NA_F_ENTRY, NA_TABLE, NA_INDEX, NA_CURRECNO,

NA_COUNT);

END;

 

 

{******************************************}

 

 

BEGIN { Tabbuild }

INITIALIZE;

REPEAT { until end of text }

GET_TOKEN (CURRENT_TOKEN); { get a token }

PROCESS_TOKEN (CURRENT_TOKEN)

UNTIL EOF (TEXTFILE);

STRIP_ALL_TREES;

END.{ Tabbuild }

 

Previous Next
[ Back to Site Index / Roadmap ] [ Table of Contents ] [ E-mail Author ]

©Andrew Treloar, 2015. W: http://andrew.treloar.net/ E: andrew.treloar@gmail.com

Last modified: Monday, 18-Sep-2017 03:30:02 AEST