Class: Rhales::RueFormatParser

Inherits:
Object
  • Object
show all
Defined in:
lib/rhales/parsers/rue_format_parser.rb

Overview

Hand-rolled recursive descent parser for .rue files

This parser implements .rue file parsing rules in Ruby code and produces an Abstract Syntax Tree (AST) for .rue file processing. It handles:

  • Section-based parsing: ,
  • Attribute extraction from section tags
  • Delegation to HandlebarsParser for template content
  • Validation of required sections

Note: This class is a parser implementation, not a formal grammar definition. A formal grammar would be written in BNF/EBNF notation, while this class contains the actual parsing logic written in Ruby.

File format structure: rue_file := section+ section := ‘<’ tag_name attributes? ‘>’ content ‘</’ tag_name ‘>’ tag_name := ‘data’ | ‘template’ | ‘logic’ attributes := attribute+ attribute := key ‘=’ quoted_value content := (text | handlebars_expression)* handlebars_expression := ‘expression ‘}’

Defined Under Namespace

Classes: Location, Node, ParseError

Constant Summary collapse

REQUIRES_ONE_OF_SECTIONS =
%w[data template].freeze
KNOWN_SECTIONS =
%w[data template logic].freeze
ALL_SECTIONS =
KNOWN_SECTIONS.freeze
COMMENT_REGEX =

Regular expression to match HTML/XML comments outside of sections

/<!--.*?-->/m

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content, file_path = nil) ⇒ RueFormatParser

Returns a new instance of RueFormatParser.



71
72
73
74
75
76
77
78
# File 'lib/rhales/parsers/rue_format_parser.rb', line 71

def initialize(content, file_path = nil)
  @content   = preprocess_content(content)
  @file_path = file_path
  @position  = 0
  @line      = 1
  @column    = 1
  @ast       = nil
end

Instance Attribute Details

#astObject (readonly)

Returns the value of attribute ast.



86
87
88
# File 'lib/rhales/parsers/rue_format_parser.rb', line 86

def ast
  @ast
end

Instance Method Details

#advanceObject (private)



283
284
285
286
287
288
289
290
291
# File 'lib/rhales/parsers/rue_format_parser.rb', line 283

def advance
  if current_char == "\n"
    @line  += 1
    @column = 1
  else
    @column += 1
  end
  @position += 1
end

#advance_to_position(target_position) ⇒ Object (private)

Add this helper method to advance position tracking to a specific offset



216
217
218
# File 'lib/rhales/parsers/rue_format_parser.rb', line 216

def advance_to_position(target_position)
  advance while @position < target_position && !at_end?
end

#at_end?Boolean (private)

Returns:

  • (Boolean)


293
294
295
# File 'lib/rhales/parsers/rue_format_parser.rb', line 293

def at_end?
  @position >= @content.length
end

#consume(expected) ⇒ Object (private)



262
263
264
265
266
267
268
269
# File 'lib/rhales/parsers/rue_format_parser.rb', line 262

def consume(expected)
  if peek_string?(expected)
    expected.length.times { advance }
    true
  else
    false
  end
end

#current_charObject (private)



271
272
273
274
275
# File 'lib/rhales/parsers/rue_format_parser.rb', line 271

def current_char
  return "\0" if at_end?

  @content[@position]
end

#current_locationObject (private)



305
306
307
308
309
310
311
312
313
314
315
# File 'lib/rhales/parsers/rue_format_parser.rb', line 305

def current_location
  pos = current_position
  Location.new(
    start_line: pos[:line],
    start_column: pos[:column],
    end_line: pos[:line],
    end_column: pos[:column],
    start_offset: pos[:offset],
    end_offset: pos[:offset],
  )
end

#current_positionObject (private)



301
302
303
# File 'lib/rhales/parsers/rue_format_parser.rb', line 301

def current_position
  { line: @line, column: @column, offset: @position }
end

#parse!Object



80
81
82
83
84
# File 'lib/rhales/parsers/rue_format_parser.rb', line 80

def parse!
  @ast = parse_rue_file
  validate_ast!
  self
end

#parse_attributesObject (private)



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/rhales/parsers/rue_format_parser.rb', line 160

def parse_attributes
  attributes = {}

  while !at_end? && current_char != '>'
    skip_whitespace
    break if current_char == '>'

    # Parse attribute name
    attr_name = parse_identifier
    skip_whitespace

    consume('=') || parse_error("Expected '=' after attribute name")
    skip_whitespace

    # Parse attribute value
    attr_value            = parse_quoted_string
    attributes[attr_name] = attr_value

    skip_whitespace
  end

  attributes
end

#parse_error(message) ⇒ Object (private)

Raises:



339
340
341
# File 'lib/rhales/parsers/rue_format_parser.rb', line 339

def parse_error(message)
  raise ParseError.new(message, line: @line, column: @column, offset: @position)
end

#parse_identifierObject (private)



242
243
244
245
246
247
248
249
250
251
252
# File 'lib/rhales/parsers/rue_format_parser.rb', line 242

def parse_identifier
  start_pos = @position

  advance while !at_end? && current_char.match?(/[a-zA-Z0-9_]/)

  if start_pos == @position
    parse_error('Expected identifier')
  end

  @content[start_pos...@position]
end

#parse_quoted_stringObject (private)



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/rhales/parsers/rue_format_parser.rb', line 220

def parse_quoted_string
  quote_char = current_char
  unless ['"', "'"].include?(quote_char)
    parse_error('Expected quoted string')
  end

  advance # Skip opening quote
  value = []

  while !at_end? && current_char != quote_char
    value << current_char
    advance
  end

  consume(quote_char) || parse_error('Unterminated quoted string')

  # NOTE: Character-by-character parsing is acceptable here since attribute values
  # in section tags (e.g., <tag attribute="value">) are typically short strings.
  # Using StringScanner would be overkill for this use case.
  value.join
end

#parse_rue_fileObject (private)



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/rhales/parsers/rue_format_parser.rb', line 98

def parse_rue_file
  sections = []

  until at_end?
    skip_whitespace
    break if at_end?

    sections << parse_section
  end

  if sections.empty?
    raise ParseError.new('Empty .rue file', line: @line, column: @column, offset: @position)
  end

  Node.new(:rue_file, current_location, children: sections)
end

#parse_sectionObject (private)



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/rhales/parsers/rue_format_parser.rb', line 115

def parse_section
  start_pos = current_position

  # Parse opening tag
  consume('<') || parse_error("Expected '<' to start section")
  tag_name   = parse_tag_name
  attributes = parse_attributes
  consume('>') || parse_error("Expected '>' to close opening tag")

  # Parse content
  content = parse_section_content(tag_name)

  # Parse closing tag
  consume("</#{tag_name}>") || parse_error("Expected '</#{tag_name}>' to close section")

  end_pos  = current_position
  location = Location.new(
    start_line: start_pos[:line],
    start_column: start_pos[:column],
    end_line: end_pos[:line],
    end_column: end_pos[:column],
    start_offset: start_pos[:offset],
    end_offset: end_pos[:offset],
  )

  Node.new(:section, location, value: {
    tag: tag_name,
    attributes: attributes,
    content: content,
  }
  )
end

#parse_section_content(tag_name) ⇒ Object (private)

Uses StringScanner to parse “content” in <section>content</section>



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/rhales/parsers/rue_format_parser.rb', line 185

def parse_section_content(tag_name)
  content_start = @position
  closing_tag = "</#{tag_name}>"

  # Create scanner from remaining content
  scanner = StringScanner.new(@content[content_start..])

  # Find the closing tag position
  if scanner.scan_until(/(?=#{Regexp.escape(closing_tag)})/)
    # Calculate content length (scanner.charpos gives us position right before closing tag)
    content_length = scanner.charpos
    raw_content = @content[content_start, content_length]

    # Advance position tracking to end of content
    advance_to_position(content_start + content_length)

    # Process content based on tag type
    if tag_name == 'template'
      handlebars_parser = HandlebarsParser.new(raw_content)
      handlebars_parser.parse!
      handlebars_parser.ast.children
    else
      # For data and logic sections, keep as simple text
      raw_content.empty? ? [] : [Node.new(:text, current_location, value: raw_content)]
    end
  else
    parse_error("Expected '#{closing_tag}' to close section")
  end
end

#parse_tag_nameObject (private)



148
149
150
151
152
153
154
155
156
157
158
# File 'lib/rhales/parsers/rue_format_parser.rb', line 148

def parse_tag_name
  start_pos = @position

  advance while !at_end? && current_char.match?(/[a-zA-Z0-9_]/)

  if start_pos == @position
    parse_error('Expected tag name')
  end

  @content[start_pos...@position]
end

#peek_charObject (private)



277
278
279
280
281
# File 'lib/rhales/parsers/rue_format_parser.rb', line 277

def peek_char
  return "\0" if @position + 1 >= @content.length

  @content[@position + 1]
end

#peek_closing_tag?(tag_name) ⇒ Boolean (private)

Returns:

  • (Boolean)


254
255
256
# File 'lib/rhales/parsers/rue_format_parser.rb', line 254

def peek_closing_tag?(tag_name)
  peek_string?("</#{tag_name}>")
end

#peek_string?(string) ⇒ Boolean (private)

Returns:

  • (Boolean)


258
259
260
# File 'lib/rhales/parsers/rue_format_parser.rb', line 258

def peek_string?(string)
  @content[@position, string.length] == string
end

#preprocess_content(content) ⇒ Object (private)

Preprocess content to strip XML/HTML comments outside of sections Uses Ruby 3.4+ pattern matching for robust, secure parsing



345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/rhales/parsers/rue_format_parser.rb', line 345

def preprocess_content(content)
  tokens = tokenize_content(content)

  # Use pattern matching to filter out comments outside sections
  result_parts = []
  in_section = false

  tokens.each do |token|
    case token
    in { type: :comment } unless in_section
      # Skip comments outside sections
      next
    in { type: :section_start }
      in_section = true
      result_parts << token[:content]
    in { type: :section_end }
      in_section = false
      result_parts << token[:content]
    in { type: :comment | :text, content: content }
      # Include comments inside sections and all text
      result_parts << content
    end
  end

  result_parts.join
end

#sectionsObject



88
89
90
91
92
93
94
# File 'lib/rhales/parsers/rue_format_parser.rb', line 88

def sections
  return {} unless @ast

  @ast.children.each_with_object({}) do |section_node, sections|
    sections[section_node.value[:tag]] = section_node
  end
end

#skip_whitespaceObject (private)



297
298
299
# File 'lib/rhales/parsers/rue_format_parser.rb', line 297

def skip_whitespace
  advance while !at_end? && current_char.match?(/\s/)
end

#tokenize_content(content) ⇒ Object (private)

Tokenize content into structured tokens for pattern matching Uses StringScanner for better performance and cleaner code



374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/rhales/parsers/rue_format_parser.rb', line 374

def tokenize_content(content)
  scanner = StringScanner.new(content)
  tokens = []

  until scanner.eos?
    tokens << case
    when scanner.scan(/<!--.*?-->/m)
      # Comment token - non-greedy match for complete comments
      { type: :comment, content: scanner.matched }
    when scanner.scan(/<(data|template|logic)(\s[^>]*)?>/m)
      # Section start token - matches opening tags with optional attributes
      { type: :section_start, content: scanner.matched }
    when scanner.scan(%r{</(data|template|logic)>}m)
      # Section end token - matches closing tags
      { type: :section_end, content: scanner.matched }
    when scanner.scan(/[^<]+/)
      # Text token - consolidates runs of non-< characters for efficiency
      { type: :text, content: scanner.matched }
    else
      # Fallback for single characters (< that don't match patterns)
      # This maintains compatibility with the original character-by-character behavior
      { type: :text, content: scanner.getch }
    end
  end

  tokens
end

#validate_ast!Object (private)



317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# File 'lib/rhales/parsers/rue_format_parser.rb', line 317

def validate_ast!
  sections = @ast.children.map { |node| node.value[:tag] }

  # Check that at least one required section is present
  required_present = REQUIRES_ONE_OF_SECTIONS & sections
  if required_present.empty?
    raise ParseError.new("Must have at least one of: #{REQUIRES_ONE_OF_SECTIONS.join(', ')}", line: 1, column: 1)
  end

  # Check for duplicates
  duplicates = sections.select { |tag| sections.count(tag) > 1 }.uniq
  if duplicates.any?
    raise ParseError.new("Duplicate sections: #{duplicates.join(', ')}", line: 1, column: 1)
  end

  # Check for unknown sections
  unknown = sections - KNOWN_SECTIONS
  if unknown.any?
    raise ParseError.new("Unknown sections: #{unknown.join(', ')}", line: 1, column: 1)
  end
end