(*-------------------------------------------------------------------------------------------------------*) PROGRAM Scanner (Input,Output,Source,List); (* --------------------------------------------------------- *) (* constants of scanner *) (* --------------------------------------------------------- *) CONST Identifierlaenge = 28; AnzahlSchluesselworte = 18; esc : CHAR = #$18; CR : CHAR = #$0D; CtrlC : CHAR = #$03; CPMeof : CHAR = #$1A; Liston : BOOLEAN = FALSE; OnLine : BOOLEAN = TRUE; idcharacters : SET OF CHAR = ['0'..'9','A'..'Z','_']; Characterset : SET OF CHAR = [#$1B, ' '..'z']; HexCharacter : SET OF CHAR = ['0'..'9','A'..'F']; (* --------------------------------------------------------- *) (* types of scanner *) (* --------------------------------------------------------- *) TYPE (* enum type with basic symbols *) (* RESERVED words are written in CAPITALS *) symbol = ( (* symbols for special characters, operators *) (* and expressions: *) null, odd, times, divsy, modsy, plus, minus, eql, neq, lss, leq, gtr, geq, comma, rparent, THENsy, lparent, becomes, (* symbols for the constants: *) CharCon, IntCon, HexCon, (* symbols, which could stay at the end of a *) (* statement or a declaration: *) ELSEsy, ENDsy, FIsy, ODsy, INTsy, CHARsy, semicolon, period, eofsy, (* symbols, which could stay at the beginning *) (* of a statement or a declaration: *) MODULEsy, Identifier, BEGINsy, CONSTsy, VARsy, PROCsy, DOsy, IFsy, EXITsy, USEsy, READsy, WRITEsy); T_Name = STRING[Identifierlaenge]; (* type of variable (identifier) name *) STR255 = STRING[255]; Cardinal = 0..MAXINT; (* --------------------------------------------------------- *) (* variables of scanner *) (* --------------------------------------------------------- *) VAR SwT : ARRAY [1..AnzahlSchluesselworte] OF RECORD s : T_Name; Nr : INTEGER; END; (* table of key words *) sym : Symbol; (* sym gives output of lexical analysis *) (* to parser *) idname : T_Name; (* name of identifier *) source, list : TEXT; (* source- and listfile *) intval, (* Value of a found integer constant *) ichar, (* counter for working position at input buffer *) SatzEnde, (* pointer to last character of input buffer *) errcount (* count of errors *) : INTEGER; Charval, (* Value of a found character constant *) ch, (* character to process *) ctrl (* control character *) : CHAR; Satz : STRING[255]; (* input buffer *) lesen, Nochmal, noerr, NoCode, LiesCrt, Neu : BOOLEAN; (* --------------------------------------------------------- *) (* procedures and functions of scanner *) (* --------------------------------------------------------- *) PROCEDURE Markiere(n:Cardinal); (* marks error in source code *) VAR i : INTEGER; BEGIN WRITELN(Satz); FOR i := 1 TO ichar - 1 DO WRITE ('.'); WRITELN('^'); END (* Markiere *); PROCEDURE Error(n:Cardinal;x:STR255); (* reports detectet errors and locks flags NOERR and NOCODE *) BEGIN noerr := FALSE; NoCode := TRUE; Markiere(n); WRITELN(x); WRITELN('missing number ',n:3); errcount := errcount + 1; IF errcount > 38 THEN BEGIN WRITELN('more than 38 blocks - Emergency stop!'); HALT; END; END (* Error *); PROCEDURE ZeigeSatz; (* Writes the record, which was read from disk, to screen *) (* and possibly to list file . *) BEGIN WRITELN; WRITELN(Satz); WRITELN; WRITELN('--------------------------------'); IF liston THEN BEGIN WRITELN(List); WRITELN(List,Satz); WRITELN(List); WRITELN(List,'--------------------------------'); END; END (* ZeigeSatz *); PROCEDURE LesCRT; (* fills buffer from terminal *) VAR ch : CHAR; BEGIN WRITELN; WRITELN('Input record - quit with (ESC)'); WRITELN; Satz := ''; REPEAT READ (KBD,ch); WRITE(ch); IF ch IN Characterset THEN Satz := Satz + ch; UNTIL (ch = CR) OR (ch = Esc); WRITELN; WRITELN(' -------------------------------'); END (* LesCRT *); PROCEDURE FuellePuffer; (* fills character buffer "Satz" with one line of source code *) BEGIN IF NOT (LiesCrt) AND EOF(Source) THEN BEGIN Satz[1] := CPMeof ; Satz[0] := CHR (1); ichar := 1; END ELSE BEGIN IF LiesCrt THEN LesCRT ELSE READLN (Source,Satz); Satz := Satz + ' '; IF NOT (LiesCrt) AND (Online) THEN ZeigeSatz; SatzEnde := Length(Satz); ichar := 1; END; END (* FuellePuffer *); PROCEDURE HoleZeichen; (* fetchs valid character from buffer "Satz" *) BEGIN REPEAT IF ichar > SatzEnde THEN FuellePuffer; ch := Satz[ichar]; ichar := ichar + 1; UNTIL (ORD(ch) > 31) OR (ch = CPMeof) OR (ch = Esc); ch := UpCase(ch); (*WRITELN('Fetch char : ORD(ch) = ',ORD(ch)); *) END (* HoleZeichen *); PROCEDURE GetCh; (* get next character from source code *) VAR i : INTEGER; BEGIN IF lesen THEN BEGIN HoleZeichen; (* WRITELN('Getch : ch = ',ch, ' ORD(ch) = ',ORD(ch)); *) END ELSE (* read = false : *) (* Neede, if next char always had been read in scanner *) (* (operators <=, etc). *) lesen := TRUE; END (* GetCh *); PROCEDURE GetIdentifier; (* read identifier and insert into symbol table *) VAR i, j, k : INTEGER; BEGIN (* get identifier name *) i := 0; idname := ''; WHILE ch IN idcharacters DO BEGIN (* entire identifier will be read, but only chars will be stored *) IF i < IdentifierLaenge + 1 THEN BEGIN i:=i+1; idname := Concat(idname,ch); END; getch; END; Writeln('GetIdentifier : Name = ',idname); lesen := FALSE; (* search identifier in table of key words *) (* search algorithm non sequential (binaer) *) i := 1; j := AnzahlSchluesselworte; REPEAT k := (i + j) DIV 2; IF idname <= SwT[k].s THEN j := k - 1; IF idname >= SwT[k].s THEN i := k + 1; UNTIL i > j; IF (i - 1) > j THEN BEGIN (* here : explicite type assignement *) (* of integer number SwT[k].Nr *) (* to enum type symbol. *) (* (non standard Pascal) *) sym := SYMBOL (SwT[k].Nr); END ELSE sym := identifier; (* define symbol *) END (* GetIdentifier *); PROCEDURE GetNumber; (* read int number as sequence of digits (max. 5 digits), *) (* return value in INTVAL or report error *) VAR st : STRING[5]; i : INTEGER; BEGIN sym := intcon; intval := 0; i := 0; REPEAT i := i + 1; IF i < 6 THEN st[i] := ch; GetCh; UNTIL NOT (ch IN ['0'..'9']); lesen := FALSE; IF i > 5 THEN BEGIN i := 5; Error(51,'Error in integer constant : more than 5 digits'); END; st[0] := CHR(i); VAL(st,intval,i); IF i <> 0 THEN Error(52,'Illegal char in integer number'); END (* GetNumber *); PROCEDURE CharacterKonstante; (* read exactly one char as character constant and return *) (* the value (the char) in CHARVAL or report error *) (* The constant should not be longer than the line. *) BEGIN sym := Charcon; GetCh; IF ch IN CharacterSet THEN Charval := ch ELSE Error(53,'Error in character constant'); GetCh; IF ch <> '''' THEN BEGIN Error(54,'Apostrophe expected'); (* "Wrong line end (Blank)" discard and delete charval *) IF charval = ' ' THEN charval := CHR(0); lesen := FALSE; END; END (* CharacterKonstante *); FUNCTION HexWert(ch:CHAR;VAR Wert:INTEGER):BOOLEAN; (* Check, if char ch is a hex number *) (* (i.e.: in 0,..,9,A,..,F) and if, return value in WERT *) VAR i : INTEGER; BEGIN Wert := 0; HexWert := FALSE; IF ch IN Hexcharacter THEN BEGIN HexWert := TRUE; i := ORD(ch); IF (47 < i) AND (i < 57) THEN Wert := i - 48 ELSE IF (64 < i) AND (i < 71) THEN Wert := i - 55; END; END (* HexWert *); PROCEDURE HexaWerte; (* read two byte hex number and convert into char *) VAR HighByte, LowByte : INTEGER; BEGIN sym := Hexcon; Charval := CHR(0); HighByte := 0; LowByte := 0; GetCh; IF NOT HexWert(ch,HighByte) THEN BEGIN Error(55,'Error in hex number : high byte wrong'); END; GetCh; IF HexWert(ch,LowByte) THEN Charval := CHR(HighByte * 16 + LowByte) ELSE BEGIN Error(56,'Error in hex number : low byte wrong'); lesen := FALSE; END; END (* HexaWerte *); PROCEDURE Kommentar; (* process comments by skipping *) VAR exit : BOOLEAN; BEGIN exit := FALSE; REPEAT REPEAT GetCh; UNTIL ch = '*'; GetCh; exit := ch = ')'; UNTIL exit; IF OnLine THEN WRITELN('***** Comment *****'); Nochmal := TRUE; END (* Kommentar *); PROCEDURE KlammerAuf; (* decide, if Symbol LPARENT or comment is available *) BEGIN GetCh; IF ch = '*' THEN Kommentar ELSE BEGIN sym := lparent; lesen := FALSE; END; END (* KlammerAuf *); PROCEDURE Groesser; (* decide, if operator ">" or ">=" is available *) BEGIN GetCh; IF ch = '=' THEN sym := geq ELSE BEGIN sym := gtr; lesen := FALSE; END; END (* Groesser *); PROCEDURE Kleiner; (* decide, if operator "<", "<>" or "<=" is available*) BEGIN GetCh; IF ch = '=' THEN sym := leq ELSE IF ch = '>' THEN sym := neq ELSE BEGIN sym := lss; lesen := FALSE; END; END (* Kleiner *); PROCEDURE SonderZeichen; (* Process special symbols. *) (* The explicite assignment of special symbol to sym: *) (* sym := Null *) (* could be omitted, because sym will be initialised *) (* with zero at entry into scanner. *) BEGIN IF OnLine THEN WRITELN('Sonderzeichen ORD(ch) = ',INTEGER(ch):2); IF liston THEN WRITELN(List,'Special symbol ORD(ch) = ',ORD(ch):2); END (* SonderZeichen *); PROCEDURE DoppelPunkt; (* Decide, if assignment or colon is available *) VAR ch2, ch3 : CHAR; BEGIN ch2 := ch; GetCh; IF ch = '=' THEN sym := becomes ELSE BEGIN lesen := FALSE; ch3 := ch; ch := ch2; Sonderzeichen; ch := ch3; END; END (* DoppelPunkt *); PROCEDURE InitScanner; (* Initialising scanner, tables, variables and files *) FUNCTION ParamCount:integer ; (* only needed for Turbo-Pascal 2.0 und 1.0 *) VAR b : byte absolute $80; BEGIN ParamCount:=0; IF b>0 THEN ParamCount:=1; END (* ParamCount *); FUNCTION ParamStr(i:integer):str255; (* only needed for Turbo-Pascal 2.0 und 3.0 *) VAR s : str255 absolute $80; BEGIN delete(s,1,1); ParamStr := s; END (* ParamStr *); BEGIN (* InitScanner *) ch := ' '; Ctrl := ' '; sym := Null; intval := 0; Charval := ' '; errcount := 0; Satz := ''; lesen := TRUE; NoCode := FALSE; LiesCrt := FALSE; IF (ParamCount = 9) OR (Neu) THEN BEGIN (* define input channel : *) WRITELN('Input from terminal (T) or file (D)?'); READ(KBD,ch); ch := UpCase(ch); LiesCrt := ch = 'T'; IF LiesCrt THEN BEGIN WRITELN('Input from terminal.'); WRITELN('Process line after = TEST.SRC'); READLN(Satz); IF Satz = '' THEN satz := 'TEST.SRC' ELSE satz := satz + '.SRC'; ASSIGN(Source,Satz); RESET (Source); END (* IF LiesCrt *) END (* IF ParamCount = 8 *) ELSE BEGIN Satz := ParamStr(1) + '.SRC'; ASSIGN(Source,Satz); RESET (Source); END; Satz := ''; WRITELN('Please press key after each analysis'); IF liston THEN BEGIN ASSIGN(list,'TEST.LST'); REWRITE(list); END; Fuellepuffer; IF NOT (LiesCrt) AND (OnLine) THEN ZeigeSatz; SwT[ 1].s := 'BEGIN'; SwT[ 1].NR := 32; SwT[ 2].s := 'CHAR'; SwT[ 2].NR := 26; SwT[ 3].s := 'CONST'; SwT[ 3].NR := 33; SwT[ 4].s := 'DO'; SwT[ 4].NR := 36; SwT[ 5].s := 'ELSE'; SwT[ 5].NR := 21; SwT[ 6].s := 'END'; SwT[ 6].NR := 22; SwT[ 7].s := 'EXIT'; SwT[ 7].NR := 38; SwT[ 8].s := 'FI'; SwT[ 8].NR := 23; SwT[ 9].s := 'IF'; SwT[ 9].NR := 37; SwT[10].s := 'INT'; SwT[10].NR := 25; SwT[11].s := 'MODULE'; SwT[11].NR := 38; SwT[12].s := 'OD'; SwT[12].NR := 24; SwT[13].s := 'PROC'; SwT[13].NR := 35; SwT[14].s := 'READ'; SwT[14].NR := 48; SwT[15].s := 'THEN'; SwT[15].NR := 15; SwT[16].s := 'USE'; SwT[16].NR := 39; SwT[17].s := 'VAR'; SwT[17].NR := 34; SwT[18].s := 'WRITE'; SwT[18].NR := 41; END (* InitScanner *); PROCEDURE SymName(i:INTEGER); (* These procedure is for test purposes only. *) (* Will be called by PROMPT with the number of symbol and *) (* writes name of symbol to screen. *) (* Can be omitted later. *) CONST AnzKw = 41; kw : ARRAY[0..AnzKw] OF STRING[10] = ('null ', 'odd ', 'times ', 'divsy ', 'modsy ', 'plus ', 'minus ', 'eql ', 'neq ', 'lss ', 'leg ', 'gtr ', 'geq ', 'comma ', 'rparent ', 'THENsy ', 'lparent ', 'becomes ', 'CharCon ', 'IntCon ', 'HexCon ', 'ELSEsy ', 'ENDsy ', 'FIsy ', 'ODsy ', 'INTsy ', 'CHARsy ', 'semicolon ', 'period ', 'eofsy ', 'MODULEsy ', 'Identifier', 'BEGINsy ', 'CONSTsy ', 'VARsy ', 'PROCsy ', 'DOsy ', 'IFsy ', 'EXITsy ', 'USEsy ', 'READsy ', 'WRITEsy '); BEGIN WRITE ('sym = '); IF liston THEN BEGIN WRITELN(List); WRITE (List,'sym = '); END; IF i <= AnzKw THEN WRITE(kw[i]) ELSE BEGIN WRITE ('**** Symnam : ORD(sy*) to big : value = ',i); IF liston THEN WRITE(list,'**** Symname : ORD(sy*) to big : value = ',i); END; WRITE(' ORD(sy*) = ',i:2,' '); IF liston THEN BEGIN WRITE(list,' ORD(sy*) = ',i:2,' '); END; END (* SymName *); PROCEDURE Display; (* Salutation *) BEGIN ClrScr; WRITELN('+---------------------------------------------------+'); WRITELN('! !'); WRITELN('! scanner started - enter text! !'); WRITELN('! exit with !'); WRITELN('! !'); WRITELN('+---------------------------------------------------+'); WRITELN; END (* Display *); PROCEDURE ByeBye; BEGIN ClrScr; WRITELN('-------------------------------------'); WRITELN('> EOF (Source) reached. <'); WRITELN('> <'); WRITELN('> Program scanner terminated normal <'); WRITELN('> <'); WRITELN('-------------------------------------'); END (* ByeBye *); PROCEDURE Prompt; (* These procedure is for test purposes only. *) (* Will be called by preliminary main program of scanner *) (* Used to trace analysing process. *) (* Can be omitted later. *) BEGIN Symname(ORD(sym)); IF sym = identifier THEN BEGIN WRITELN ('Name = ',idname); IF liston THEN WRITELN (list,'Name = ',idname) END ELSE IF (sym = intcon) THEN BEGIN WRITELN ('Value = ',intval); IF liston THEN WRITELN (LIST,'Value = ',intval); END ELSE IF sym = Charcon THEN BEGIN WRITELN('Value = ',Charval:2,' ASCII : ',ORD(Charval):2); IF liston THEN WRITELN (LIST,'Value = ',Charval:2,' ASCII : ',ORD(Charval):2); END ELSE IF sym = Hexcon THEN BEGIN WRITELN('Char = ',Charval,' Value = ',ORD(Charval)); IF liston THEN WRITELN (LIST,'Value = ',Charval); END ELSE WRITELN; WRITELN ('--------------------------------'); IF liston THEN BEGIN WRITELN(List); WRITELN(List,'--------------------------------'); END; END (* Prompt *); PROCEDURE Scanner; (* This is the part where the real lexical analysis is running *) VAR ch2 : CHAR; exit,bool,incl : BOOLEAN; chval,ch2val : INTEGER; i,j,k : INTEGER; istringval : INTEGER; BEGIN REPEAT sym := Null; Nochmal := FALSE; GetCh; WHILE ch = ' ' DO getch; CASE ch OF 'A'..'Z' : GetIdentifier; '0'..'9' : getnumber; '''' : CharacterKonstante; '$' : HexaWerte; '(' : KlammerAuf; '>' : Groesser; '<' : Kleiner; ':' : DoppelPunkt; '!' : sym := WRITEsy; '?' : sym := READsy; ')' : sym := rparent; ';' : sym := semicolon; '.' : sym := period; ',' : sym := comma; '=' : sym := eql; '/' : sym := divsy; '#' : sym := modsy; '@' : sym := odd; '*' : sym := times; '+' : sym := plus; '-' : sym := minus; #$1A : sym := eofsy; ELSE SonderZeichen; END (* Case *); UNTIL NOT Nochmal; END (* Scanner *); PROCEDURE GetSym; (* control routine for procedure SCANNER with dialog. *) (* Can be omitted later, if parser subroutine is included in *) (* main program and SCANNER is renamed *) BEGIN Scanner; IF OnLine THEN BEGIN Prompt; IF NOT LiesCrt THEN BEGIN READ (KBD,ctrl); END; END; END (* Getsym *); PROCEDURE Analyse; (* Analyse allows multiple start of scanner. *) (* Very useful during test phase. *) (* Can be omitted after test phase. *) BEGIN InitScanner; REPEAT GetSym; UNTIL (ch = CPMeof) OR (ch = Esc) OR (ctrl = Esc); WRITELN('Further analysis (j/n)?'); READ(KBD,Ch) ; Ch := UpCase (Ch); neu := (ch <> 'N'); END (* Analyse *); (* --------------------------------------------------------- *) (* main program of scanner for test run *) (* --------------------------------------------------------- *) BEGIN neu := FALSE; REPEAT Display; Analyse; UNTIL ch IN ['N','n']; ByeBye; END.