Class: Rhales::RueFormatParser
- Inherits:
-
Object
- Object
- Rhales::RueFormatParser
show all
- Defined in:
- lib/rhales/parsers/rue_format_parser.rb
Overview
Hand-rolled recursive descent parser for .rue files
This parser implements .rue file parsing rules in Ruby code and produces
an Abstract Syntax Tree (AST) for .rue file processing. It handles:
- Section-based parsing: , ,
- Attribute extraction from section tags
- Delegation to HandlebarsParser for template content
- Validation of required sections
Note: This class is a parser implementation, not a formal grammar definition.
A formal grammar would be written in BNF/EBNF notation, while this class
contains the actual parsing logic written in Ruby.
File format structure:
rue_file := section+
section := ‘<’ tag_name attributes? ‘>’ content ‘</’ tag_name ‘>’
tag_name := ‘data’ | ‘template’ | ‘logic’
attributes := attribute+
attribute := key ‘=’ quoted_value
content := (text | handlebars_expression)*
handlebars_expression := ‘expression ‘}’
Defined Under Namespace
Classes: Location, Node, ParseError
Constant Summary
collapse
- REQUIRES_ONE_OF_SECTIONS =
%w[data template].freeze
- KNOWN_SECTIONS =
%w[data template logic].freeze
- ALL_SECTIONS =
KNOWN_SECTIONS.freeze
/<!--.*?-->/m
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(content, file_path = nil) ⇒ RueFormatParser
Returns a new instance of RueFormatParser.
71
72
73
74
75
76
77
78
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 71
def initialize(content, file_path = nil)
@content = preprocess_content(content)
@file_path = file_path
@position = 0
@line = 1
@column = 1
@ast = nil
end
|
Instance Attribute Details
#ast ⇒ Object
Returns the value of attribute ast.
86
87
88
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 86
def ast
@ast
end
|
Instance Method Details
#advance ⇒ Object
283
284
285
286
287
288
289
290
291
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 283
def advance
if current_char == "\n"
@line += 1
@column = 1
else
@column += 1
end
@position += 1
end
|
#advance_to_position(target_position) ⇒ Object
Add this helper method to advance position tracking to a specific offset
216
217
218
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 216
def advance_to_position(target_position)
advance while @position < target_position && !at_end?
end
|
#at_end? ⇒ Boolean
293
294
295
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 293
def at_end?
@position >= @content.length
end
|
#consume(expected) ⇒ Object
262
263
264
265
266
267
268
269
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 262
def consume(expected)
if peek_string?(expected)
expected.length.times { advance }
true
else
false
end
end
|
#current_char ⇒ Object
271
272
273
274
275
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 271
def current_char
return "\0" if at_end?
@content[@position]
end
|
#current_location ⇒ Object
305
306
307
308
309
310
311
312
313
314
315
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 305
def current_location
pos = current_position
Location.new(
start_line: pos[:line],
start_column: pos[:column],
end_line: pos[:line],
end_column: pos[:column],
start_offset: pos[:offset],
end_offset: pos[:offset],
)
end
|
#current_position ⇒ Object
301
302
303
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 301
def current_position
{ line: @line, column: @column, offset: @position }
end
|
#parse! ⇒ Object
80
81
82
83
84
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 80
def parse!
@ast = parse_rue_file
validate_ast!
self
end
|
#parse_attributes ⇒ Object
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 160
def parse_attributes
attributes = {}
while !at_end? && current_char != '>'
skip_whitespace
break if current_char == '>'
attr_name = parse_identifier
skip_whitespace
consume('=') || parse_error("Expected '=' after attribute name")
skip_whitespace
attr_value = parse_quoted_string
attributes[attr_name] = attr_value
skip_whitespace
end
attributes
end
|
#parse_error(message) ⇒ Object
339
340
341
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 339
def parse_error(message)
raise ParseError.new(message, line: @line, column: @column, offset: @position)
end
|
#parse_identifier ⇒ Object
242
243
244
245
246
247
248
249
250
251
252
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 242
def parse_identifier
start_pos = @position
advance while !at_end? && current_char.match?(/[a-zA-Z0-9_]/)
if start_pos == @position
parse_error('Expected identifier')
end
@content[start_pos...@position]
end
|
#parse_quoted_string ⇒ Object
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 220
def parse_quoted_string
quote_char = current_char
unless ['"', "'"].include?(quote_char)
parse_error('Expected quoted string')
end
advance value = []
while !at_end? && current_char != quote_char
value << current_char
advance
end
consume(quote_char) || parse_error('Unterminated quoted string')
value.join
end
|
#parse_rue_file ⇒ Object
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 98
def parse_rue_file
sections = []
until at_end?
skip_whitespace
break if at_end?
sections << parse_section
end
if sections.empty?
raise ParseError.new('Empty .rue file', line: @line, column: @column, offset: @position)
end
Node.new(:rue_file, current_location, children: sections)
end
|
#parse_section ⇒ Object
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 115
def parse_section
start_pos = current_position
consume('<') || parse_error("Expected '<' to start section")
tag_name = parse_tag_name
attributes = parse_attributes
consume('>') || parse_error("Expected '>' to close opening tag")
content = parse_section_content(tag_name)
consume("</#{tag_name}>") || parse_error("Expected '</#{tag_name}>' to close section")
end_pos = current_position
location = Location.new(
start_line: start_pos[:line],
start_column: start_pos[:column],
end_line: end_pos[:line],
end_column: end_pos[:column],
start_offset: start_pos[:offset],
end_offset: end_pos[:offset],
)
Node.new(:section, location, value: {
tag: tag_name,
attributes: attributes,
content: content,
}
)
end
|
#parse_section_content(tag_name) ⇒ Object
Uses StringScanner to parse “content” in <section>content</section>
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 185
def parse_section_content(tag_name)
content_start = @position
closing_tag = "</#{tag_name}>"
scanner = StringScanner.new(@content[content_start..])
if scanner.scan_until(/(?=#{Regexp.escape(closing_tag)})/)
content_length = scanner.charpos
raw_content = @content[content_start, content_length]
advance_to_position(content_start + content_length)
if tag_name == 'template'
handlebars_parser = HandlebarsParser.new(raw_content)
handlebars_parser.parse!
handlebars_parser.ast.children
else
raw_content.empty? ? [] : [Node.new(:text, current_location, value: raw_content)]
end
else
parse_error("Expected '#{closing_tag}' to close section")
end
end
|
#parse_tag_name ⇒ Object
148
149
150
151
152
153
154
155
156
157
158
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 148
def parse_tag_name
start_pos = @position
advance while !at_end? && current_char.match?(/[a-zA-Z0-9_]/)
if start_pos == @position
parse_error('Expected tag name')
end
@content[start_pos...@position]
end
|
#peek_char ⇒ Object
277
278
279
280
281
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 277
def peek_char
return "\0" if @position + 1 >= @content.length
@content[@position + 1]
end
|
#peek_closing_tag?(tag_name) ⇒ Boolean
254
255
256
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 254
def peek_closing_tag?(tag_name)
peek_string?("</#{tag_name}>")
end
|
#peek_string?(string) ⇒ Boolean
258
259
260
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 258
def peek_string?(string)
@content[@position, string.length] == string
end
|
#preprocess_content(content) ⇒ Object
Preprocess content to strip XML/HTML comments outside of sections
Uses Ruby 3.4+ pattern matching for robust, secure parsing
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 345
def preprocess_content(content)
tokens = tokenize_content(content)
result_parts = []
in_section = false
tokens.each do |token|
case token
in { type: :comment } unless in_section
next
in { type: :section_start }
in_section = true
result_parts << token[:content]
in { type: :section_end }
in_section = false
result_parts << token[:content]
in { type: :comment | :text, content: content }
result_parts << content
end
end
result_parts.join
end
|
#sections ⇒ Object
88
89
90
91
92
93
94
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 88
def sections
return {} unless @ast
@ast.children.each_with_object({}) do |section_node, sections|
sections[section_node.value[:tag]] = section_node
end
end
|
#skip_whitespace ⇒ Object
297
298
299
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 297
def skip_whitespace
advance while !at_end? && current_char.match?(/\s/)
end
|
#tokenize_content(content) ⇒ Object
Tokenize content into structured tokens for pattern matching
Uses StringScanner for better performance and cleaner code
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 374
def tokenize_content(content)
scanner = StringScanner.new(content)
tokens = []
until scanner.eos?
tokens << case
when scanner.scan(/<!--.*?-->/m)
{ type: :comment, content: scanner.matched }
when scanner.scan(/<(data|template|logic)(\s[^>]*)?>/m)
{ type: :section_start, content: scanner.matched }
when scanner.scan(%r{</(data|template|logic)>}m)
{ type: :section_end, content: scanner.matched }
when scanner.scan(/[^<]+/)
{ type: :text, content: scanner.matched }
else
{ type: :text, content: scanner.getch }
end
end
tokens
end
|
#validate_ast! ⇒ Object
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
|
# File 'lib/rhales/parsers/rue_format_parser.rb', line 317
def validate_ast!
sections = @ast.children.map { |node| node.value[:tag] }
required_present = REQUIRES_ONE_OF_SECTIONS & sections
if required_present.empty?
raise ParseError.new("Must have at least one of: #{REQUIRES_ONE_OF_SECTIONS.join(', ')}", line: 1, column: 1)
end
duplicates = sections.select { |tag| sections.count(tag) > 1 }.uniq
if duplicates.any?
raise ParseError.new("Duplicate sections: #{duplicates.join(', ')}", line: 1, column: 1)
end
unknown = sections - KNOWN_SECTIONS
if unknown.any?
raise ParseError.new("Unknown sections: #{unknown.join(', ')}", line: 1, column: 1)
end
end
|