--- test-original.zig 2025-01-31 20:54:17.051725647 -0800 +++ test.zig 2025-01-31 20:59:58.435542190 -0800 @@ -95,32 +95,32 @@ /// A helper function equivalent to the Rust impl T { is_keyword }. pub fn isKeyword(tok: T) bool { return switch (tok) { - T.KwIf, - T.KwThen, - T.KwElse, - T.KwWhen, - T.KwIs, - T.KwAs, - T.KwDbg, - T.KwExpect, - T.KwCrash, - T.KwHas, - T.KwWhere, - T.KwImplements, - T.KwExposes, - T.KwImports, - T.KwWith, - T.KwGenerates, - T.KwPackage, - T.KwPackages, - T.KwRequires, - T.KwProvides, - T.KwTo, - T.KwInterface, - T.KwApp, - T.KwPlatform, - T.KwHosted, - T.KwExpectFx, + .KwIf, + .KwThen, + .KwElse, + .KwWhen, + .KwIs, + .KwAs, + .KwDbg, + .KwExpect, + .KwCrash, + .KwHas, + .KwWhere, + .KwImplements, + .KwExposes, + .KwImports, + .KwWith, + .KwGenerates, + .KwPackage, + .KwPackages, + .KwRequires, + .KwProvides, + .KwTo, + .KwInterface, + .KwApp, + .KwPlatform, + .KwHosted, + .KwExpectFx, => true, else => false, }; @@ -161,29 +161,33 @@ indent: Indent, }; +pub const Token = struct { + kind: T, + offset: u32, + length: u32, +}; + /// The buffer that accumulates tokens. pub const TokenizedBuffer = struct { allocator: std.mem.Allocator, - kinds: std.ArrayList(T), - offsets: std.ArrayList(u32), - lengths: std.ArrayList(u32), + tokens: std.MultiArrayList(Token), lines: std.ArrayList(Line), pub fn init(allocator: std.mem.Allocator) !TokenizedBuffer { return TokenizedBuffer{ .allocator = allocator, - .kinds = std.ArrayList(T).init(allocator), - .offsets = std.ArrayList(u32).init(allocator), - .lengths = std.ArrayList(u32).init(allocator), + .tokens = .empty, .lines = std.ArrayList(Line).init(allocator), }; } /// Pushes a token with the given kind, token offset, and length. pub fn pushToken(self: *TokenizedBuffer, kind: T, tok_offset: usize, tok_length: usize) !void { - try self.kinds.append(kind); - try self.offsets.append(@intCast(tok_offset)); - try self.lengths.append(@intCast(tok_length)); + try self.tokens.append(self.allocator, .{ + .kind = kind, + .offset = @intCast(tok_offset), + .length = @intCast(tok_length), + }); } /// Returns the offset of the token at index `idx`. @@ -764,8 +768,8 @@ // Whitespace & control characters 0...32, '#' => { if (self.cursor.chompTrivia()) |indent| { - try self.output.lines.append(Line{ - .token_start = @intCast(self.output.kinds.items.len), + try self.output.lines.append(.{ + .token_start = @intCast(self.output.tokens.len), .indent = indent, }); } @@ -1128,13 +1132,12 @@ } } - if (self.output.lines.items[self.output.lines.items.len - 1].token_start == - self.output.kinds.items.len) - { + if (self.output.lines.items[self.output.lines.items.len - 1].token_start == self.output.tokens.len) { _ = self.output.lines.pop(); } - try self.output.offsets.append(@intCast(self.cursor.pos)); + // ??? + //try self.output.offsets.append(@intCast(self.cursor.pos)); } /// Optionally add a NoSpace token based on the next character. @@ -1149,7 +1152,7 @@ /// Finishes tokenization and returns (messages, output buffer). pub fn finish(self: Tokenizer) TokenOutput { - return TokenOutput{ + return .{ .tokens = self.output, .messages = self.cursor.messages, .message_count = self.cursor.message_count, @@ -1170,7 +1173,7 @@ const result = tokenizer.finish(); var stdout = std.io.getStdOut().writer(); try stdout.print("Tokens:\n", .{}); - for (result.tokens.kinds.items) |tok| { + for (result.tokens.tokens.items(.kind)) |tok| { try stdout.print(" {s}\n", .{@tagName(tok)}); } if (result.message_count != 0) {