00001 /*----------------------------------------------------------------------------*/ 00002 /* */ 00003 /* Copyright (c) 2004-2009 William Data Systems Ltd. and Geoff Stevens. */ 00004 /* All rights reserved. */ 00005 /* */ 00006 /* This program and the accompanying materials are made available under */ 00007 /* the terms of the Common Public License v1.0 which accompanies this */ 00008 /* distribution. A copy is also available at the following address: */ 00009 /* http://www.opensource.org/licenses/cpl1.0.php */ 00010 /* */ 00011 /* Redistribution and use in source and binary forms, with or without */ 00012 /* modification, are permitted provided that the following conditions */ 00013 /* are met: */ 00014 /* */ 00015 /* Redistributions of source code must retain the above copyright */ 00016 /* notice, this list of conditions and the following disclaimer. */ 00017 /* */ 00018 /* Redistributions in binary form must reproduce the above copyright */ 00019 /* notice, this list of conditions and the following disclaimer in the */ 00020 /* documentation and/or other materials provided with the distribution. */ 00021 /* */ 00022 /* Neither the name or trademarks of William Data Systems nor the names */ 00023 /* of its contributors may be used to endorse or promote products */ 00024 /* derived from this software without specific prior written permission. */ 00025 /* */ 00026 /* DISCLAIMER */ 00027 /* */ 00028 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ 00029 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */ 00030 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */ 00031 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */ 00032 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */ 00033 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */ 00034 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 00035 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */ 00036 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ 00037 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */ 00038 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 00039 /* */ 00040 /*----------------------------------------------------------------------------*/ 00041 00042 -- flyweight token 00043 ::CLASS token public 00044 ::ATTRIBUTE symbol -- the text of the token 00045 ::ATTRIBUTE type -- token type 00046 ::ATTRIBUTE index -- position in list 00047 ::ATTRIBUTE user -- for user classification 00048 00049 -- @param symbol - string representation of the tokenized symbol 00050 -- @param type - type of the tokenized symbol 00051 -- @param index - index of the tokenized symbol in a tokenlist 00052 ::METHOD init 00053 use strict arg symbol, type, index 00054 self~symbol = symbol 00055 self~type = type 00056 self~index = index 00057 self~user = .nil 00058 00059 -- list of tokens 00060 -- retains state during navigation 00061 ::CLASS tokenlist public 00062 ::METHOD current ATTRIBUTE -- current token in navigation 00063 ::METHOD tokens ATTRIBUTE -- queue of tokens 00064 00065 -- @param tokens - queue of token objects 00066 ::METHOD init 00067 use strict arg tokens 00068 self~tokens = tokens 00069 00070 -- get token by index 00071 -- @param index - index of token to retrieve 00072 -- @return a token 00073 ::METHOD gettoken 00074 expose tokens 00075 use arg index 00076 return tokens~at(index) 00077 00078 -- get first token 00079 -- @return first token 00080 ::METHOD getfirst 00081 expose tokens current 00082 current = 1 00083 return tokens~at(current) 00084 00085 -- get next token 00086 -- @return next token 00087 ::METHOD getnext 00088 expose tokens current 00089 current = current + 1 00090 return tokens~at(current) 00091 00092 -- get next non blank token 00093 -- @return next non blank token or .nil 00094 ::METHOD getnextnonblank 00095 expose tokens current 00096 current = current + 1 00097 do j = current to tokens~items 00098 if tokens~at(j)~symbol~words > 0 00099 then do 00100 current = j 00101 return tokens~at(j) 00102 end 00103 end 00104 current = 0 00105 return .nil 00106 00107 00108 -- return list of subtokens 00109 -- note this is a list of *new* tokens, not part 00110 -- of the tokenlist called 00111 -- @param starttoke - index of first token to return 00112 -- @param endtoke - index of last token to return 00113 -- @return - list of tokens 00114 ::METHOD subtokens 00115 use arg starttoke, endtoke 00116 subtokens = .queue~new 00117 do j = starttoke to endtoke 00118 subtokens~queue(token~at(j)) 00119 end 00120 return .tokenlist~new(subtokens) 00121 00122 -- return list of tokens 00123 -- note this is the list in the tokenlist called 00124 -- @return - list of tokens 00125 ::METHOD gettokens 00126 expose tokens 00127 return tokens 00128 00129 -- glue tokens together 00130 -- @param starttoke - index of first token to return 00131 -- @param endtoke - index of last token to return 00132 -- @return - string of assembled tokens 00133 ::METHOD detokenize 00134 expose tokens 00135 use arg starttoke, endtoke 00136 00137 flat = '' 00138 do j = starttoke to endtoke 00139 flat = flat || tokens~at(j) 00140 end 00141 return flat 00142 00143 -- abstract tokenizer 00144 -- Tokenizer lookahead sizes are calculated in the setup 00145 -- method, for use in the toke method. This is thus an ll(n) 00146 -- tokenizer, where n is programmable. 00147 ::CLASS tokenizer public 00148 00149 ::ATTRIBUTE delims -- list of delimiters 00150 ::ATTRIBUTE lookaheads -- list of lookahead lengths, descending 00151 00152 -- process a list of arrays of delimiter/space pairs 00153 -- into a list of delimiters sorted into descending 00154 -- order of length, and a directory of delimiters 00155 -- indexed from delimiter to name 00156 -- @param tokelist - list of two-element arrays 00157 -- @return two element array of 00158 -- @return sorted queue of lookaheads 00159 -- @return directory from delimiter to name 00160 ::METHOD setup CLASS 00161 use arg tokelist 00162 00163 lookaheads = '' 00164 delims = .directory~new 00165 la = .queue~new 00166 lal = '' 00167 do i over tokelist 00168 delims[i[1]] = i[2] 00169 l = i[1]~length 00170 if lal~wordpos(l) = 0 00171 then do 00172 lal = lal l 00173 la~queue(l+1) 00174 end 00175 end 00176 00177 lookaheads = la~makeArray~stableSortWith(DescendingComparator) 00178 00179 return .array~of(lookaheads, delims) 00180 00181 -- initialize tokenizer 00182 -- @param tokelist - list of two-element arrays 00183 ::METHOD init 00184 use arg tokelist 00185 array = self~class~setup(tokelist) 00186 self~lookaheads = array[1] 00187 self~delims = array[2] 00188 00189 00190 -- tokenize a string 00191 -- @param line - string to tokenize 00192 -- @return tokenlist 00193 ::METHOD toke 00194 use arg line 00195 00196 tkl = .tokenlist~new(.queue~new) 00197 00198 token = '' 00199 index = 1 00200 start = 1 00201 00202 do label top while line <> '' 00203 00204 do i over self~lookaheads 00205 parse var line 1 lah =(i) rest 00206 if self~delims~hasIndex(lah) 00207 then do 00208 if token <> '' 00209 then do 00210 tkl~tokens~queue(.token~new(token, 'symbol', start)) 00211 token = '' 00212 end 00213 line = rest 00214 tkl~tokens~queue(.token~new(lah, self~delims[lah], index)) 00215 index = index + lah~length 00216 start = index 00217 iterate top 00218 end 00219 end 00220 00221 parse var line 1 c 2 line 00222 token = token || c 00223 index = index + 1 00224 00225 end 00226 00227 if token <> '' 00228 then tkl~tokens~queue(.token~new(token, 'symbol', start)) 00229 00230 return tkl
|
|
Generated on 31 Aug 2010 05:20:36 for RexxLiterate by
0.0.1
|